[
  {
    "path": ".fern/metadata.json",
    "content": "{\n  \"cliVersion\": \"4.63.2\",\n  \"generatorName\": \"fernapi/fern-python-sdk\",\n  \"generatorVersion\": \"5.3.3\",\n  \"generatorConfig\": {\n    \"inline_request_params\": false,\n    \"extras\": {\n      \"oci\": [\n        \"oci\"\n      ]\n    },\n    \"extra_dependencies\": {\n      \"fastavro\": \"^1.9.4\",\n      \"requests\": \"^2.0.0\",\n      \"types-requests\": \"^2.0.0\",\n      \"tokenizers\": \">=0.15,<1\",\n      \"oci\": {\n        \"version\": \"^2.165.0\",\n        \"optional\": true\n      }\n    },\n    \"improved_imports\": true,\n    \"pydantic_config\": {\n      \"frozen\": false,\n      \"union_naming\": \"v1\",\n      \"require_optional_fields\": false,\n      \"extra_fields\": \"allow\",\n      \"use_str_enums\": true,\n      \"skip_validation\": true\n    },\n    \"timeout_in_seconds\": 300,\n    \"client\": {\n      \"class_name\": \"BaseCohere\",\n      \"filename\": \"base_client.py\",\n      \"exported_class_name\": \"Client\",\n      \"exported_filename\": \"client.py\"\n    },\n    \"additional_init_exports\": [\n      {\n        \"from\": \"client\",\n        \"imports\": [\n          \"Client\",\n          \"AsyncClient\"\n        ]\n      },\n      {\n        \"from\": \"bedrock_client\",\n        \"imports\": [\n          \"BedrockClient\",\n          \"BedrockClientV2\"\n        ]\n      },\n      {\n        \"from\": \"sagemaker_client\",\n        \"imports\": [\n          \"SagemakerClient\",\n          \"SagemakerClientV2\"\n        ]\n      },\n      {\n        \"from\": \"aws_client\",\n        \"imports\": [\n          \"AwsClient\"\n        ]\n      },\n      {\n        \"from\": \"oci_client\",\n        \"imports\": [\n          \"OciClient\",\n          \"OciClientV2\"\n        ]\n      },\n      {\n        \"from\": \"client_v2\",\n        \"imports\": [\n          \"AsyncClientV2\",\n          \"ClientV2\"\n        ]\n      },\n      {\n        \"from\": \"aliases\",\n        \"imports\": [\n          \"StreamedChatResponseV2\",\n          \"MessageStartStreamedChatResponseV2\",\n          \"MessageEndStreamedChatResponseV2\",\n          \"ContentStartStreamedChatResponseV2\",\n          \"ContentDeltaStreamedChatResponseV2\",\n          \"ContentEndStreamedChatResponseV2\",\n          \"ToolCallStartStreamedChatResponseV2\",\n          \"ToolCallDeltaStreamedChatResponseV2\",\n          \"ToolCallEndStreamedChatResponseV2\",\n          \"ChatResponse\"\n        ]\n      }\n    ]\n  },\n  \"originGitCommit\": \"8dfb5e03f14a05967c4cdeeb44429eb4c1dca198\",\n  \"sdkVersion\": \"6.1.0\"\n}"
  },
  {
    "path": ".fernignore",
    "content": "4.0.0-5.0.0-migration-guide.md\nbanner.png\nREADME.md\nsrc/cohere/client.py\ntests\n.github/workflows/ci.yml\n.github/ISSUE_TEMPLATE\nLICENSE\n.github/workflows/tests.yml\nsrc/cohere/utils.py\nsrc/cohere/overrides.py\nsrc/cohere/config.py\nsrc/cohere/manually_maintained\nsrc/cohere/manually_maintained/__init__.py\nsrc/cohere/bedrock_client.py\nsrc/cohere/aws_client.py\nsrc/cohere/sagemaker_client.py\nsrc/cohere/oci_client.py\nsrc/cohere/client_v2.py\nmypy.ini\nsrc/cohere/aliases.py"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "content": "---\nname: Bug report related to an SDK error\nabout: Create a report to help us improve\ntitle: ''\nlabels: ''\n\n---\n\n**SDK Version (required)**\nProvide the version you are using. To get the version, run the following python snippet\n```python\nimport cohere\n\nprint(cohere.__version__) # 5.6.1\n```\n\n**Describe the bug**\nA clear and concise description of what the bug is.\n\n**Screenshots**\nIf applicable, add screenshots to help explain your problem.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/improvement_request.md",
    "content": "---\nname: Improvement request, or addition features\nabout: Create a request to help us improve\ntitle: \"\"\nlabels: \"\"\n---\n\n**Describe the improvement**\n\nA clear and concise description of what the new improvement is.\n\n**Code snippet of expected outcome**\n\nIf applicable, add a code snippet of how you'd like to see the feature implemented\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: ci\n\non: [push]\njobs:\n  compile:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout repo\n        uses: actions/checkout@v3\n      - name: Set up python\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n      - name: Bootstrap poetry\n        uses: snok/install-poetry@v1\n        with:\n          version: 1.5.1\n          virtualenvs-in-project: false\n      - name: Install dependencies\n        run: poetry install\n      - name: Compile\n        run: poetry run mypy .\n  test:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout repo\n        uses: actions/checkout@v3\n      - name: Set up python\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n      - name: Bootstrap poetry\n        uses: snok/install-poetry@v1\n        with:\n          version: 1.5.1\n          virtualenvs-in-project: false\n      - name: Install dependencies\n        run: poetry install\n      - name: Install aws deps\n        run: poetry run pip install boto3 sagemaker botocore\n      - name: Test\n        run: poetry run pytest -rP -n auto .\n        env:\n          CO_API_KEY: ${{ secrets.COHERE_API_KEY }}\n      - name: Install aiohttp extra\n        run: poetry install --extras aiohttp\n\n      - name: Test (aiohttp)\n        run: poetry run pytest -rP -n auto -m aiohttp . || [ $? -eq 5 ]\n        env:\n          CO_API_KEY: ${{ secrets.COHERE_API_KEY }}\n\n  publish:\n    needs: [compile, test]\n    if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout repo\n        uses: actions/checkout@v3\n      - name: Set up python\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n      - name: Bootstrap poetry\n        run: |\n          curl -sSL https://install.python-poetry.org | python - -y --version 1.5.1\n      - name: Install dependencies\n        run: poetry install\n      - name: Publish to pypi\n        run: |\n          poetry config repositories.remote https://upload.pypi.org/legacy/\n          poetry --no-interaction -v publish --build --repository remote --username \"$PYPI_USERNAME\" --password \"$PYPI_PASSWORD\"\n        env:\n          PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}\n          PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n"
  },
  {
    "path": ".gitignore",
    "content": ".mypy_cache/\n.ruff_cache/\n__pycache__/\ndist/\npoetry.toml\n"
  },
  {
    "path": "4.0.0-5.0.0-migration-guide.md",
    "content": "## `cohere==4` to `cohere==5` migration guide\n\nAs we migrate from the handwritten, manually-maintained sdk to our auto-generated sdk, there are some breaking changes that must be accommodated during migration. These should mostly improve the developer experience but thank you for bearing with us as we make these changes.\n\n### Installation\n\nTo install the latest version of the cohere sdk `pip3 install --upgrade cohere`.\n\n### Migrating usages\n\n#### Migrating function calls\n\n[This diff view](https://github.com/cohere-ai/cohere-python/compare/old-usage...new-usage) enumerates all usages of the old sdk and how they map to the new sdk. Some fields are no longer supported in the new sdk.\n\n#### Migrating streaming usage\n\nThe `streaming: boolean` are no longer supported in the new sdk. Instead, you can replace the `chat` function with `chat_stream` and `generate` function with `generate_stream`. These will automatically inject the `streaming` parameter into the request. The following is an example usage for `chat_stream`:\n\n```python\nstream = co.chat_stream(\n    message=\"Tell me a short story\"\n)\n\nfor event in stream:\n    if event.event_type == \"text-generation\":\n        print(event.text, end='')\n```\n\n### Migrating deprecated `num_workers` Client constructor parameter\n\nThe Client constructor accepts an `httpx_client` which can be configured to limit the maximum number of connections.\n\n```python\nlimits = httpx.Limits(max_connections=10)\ncohere.Client(httpx_client=httpx.Client(limits=limits))\n```\n\n### Removed functionality (subject to change)\n\nThe following lists name the functions that are not in the new SDK and what their ongoing support status is.\n\n#### No longer supported\n\n* check_api_key\n* loglikelihood\n* batch_generate\n* codebook\n* batch_tokenize\n* batch_detokenize\n* detect_language\n* generate_feedback\n* generate_preference_feedback\n* create_cluster_job\n* get_cluster_job\n* list_cluster_jobs\n* wait_for_cluster_job\n* create_custom_model\n* wait_for_custom_model\n* get_custom_model\n* get_custom_model_by_name\n* get_custom_model_metrics\n* list_custom_models\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2021 Cohere\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE."
  },
  {
    "path": "README.md",
    "content": "# Cohere Python SDK\n\n![](banner.png)\n\n[![version badge](https://img.shields.io/pypi/v/cohere)](https://pypi.org/project/cohere/)\n![license badge](https://img.shields.io/github/license/cohere-ai/cohere-python)\n[![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-SDK%20generated%20by%20Fern-brightgreen)](https://github.com/fern-api/fern)\n\nThe Cohere Python SDK allows access to Cohere models across many different platforms: the cohere platform, AWS (Bedrock, Sagemaker), Azure, GCP and Oracle OCI. For a full list of support and snippets, please take a look at the [SDK support docs page](https://docs.cohere.com/docs/cohere-works-everywhere).\n\n## Documentation\n\nCohere documentation and API reference is available [here](https://docs.cohere.com/).\n\n## Installation\n\n```\npip install cohere\n```\n\n## Usage\n\n```Python\nimport cohere\n\nco = cohere.ClientV2()\n\nresponse = co.chat(\n    model=\"command-r-plus-08-2024\",\n    messages=[{\"role\": \"user\", \"content\": \"hello world!\"}],\n)\n\nprint(response)\n```\n\n> [!TIP]\n> You can set a system environment variable `CO_API_KEY` to avoid writing your api key within your code, e.g. add `export CO_API_KEY=theapikeyforyouraccount`\n> in your ~/.zshrc or ~/.bashrc, open a new terminal, then code calling `cohere.Client()` will read this key.\n\n\n## Streaming\n\nThe SDK supports streaming endpoints. To take advantage of this feature for chat,\nuse `chat_stream`.\n\n```Python\nimport cohere\n\nco = cohere.ClientV2()\n\nresponse = co.chat_stream(\n    model=\"command-r-plus-08-2024\",\n    messages=[{\"role\": \"user\", \"content\": \"hello world!\"}],\n)\n\nfor event in response:\n    if event.type == \"content-delta\":\n        print(event.delta.message.content.text, end=\"\")\n```\n\n## Oracle Cloud Infrastructure (OCI)\n\nThe SDK supports Oracle Cloud Infrastructure (OCI) Generative AI service. First, install the OCI SDK:\n\n```\npip install 'cohere[oci]'\n```\n\nThen use the `OciClient` or `OciClientV2`:\n\n```Python\nimport cohere\n\n# Using OCI config file authentication (default: ~/.oci/config)\nco = cohere.OciClient(\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n)\n\nresponse = co.embed(\n    model=\"embed-english-v3.0\",\n    texts=[\"Hello world\"],\n    input_type=\"search_document\",\n)\n\nprint(response.embeddings)\n```\n\n### OCI Authentication Methods\n\n**1. Config File (Default)**\n```Python\nco = cohere.OciClient(\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n    # Uses ~/.oci/config with DEFAULT profile\n)\n```\n\n**2. Custom Profile**\n```Python\nco = cohere.OciClient(\n    oci_profile=\"MY_PROFILE\",\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n)\n```\n\n**3. Session-based Authentication (Security Token)**\n```Python\n# Works with OCI CLI session tokens\nco = cohere.OciClient(\n    oci_profile=\"MY_SESSION_PROFILE\",  # Profile with security_token_file\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n)\n```\n\n**4. Direct Credentials**\n```Python\nco = cohere.OciClient(\n    oci_user_id=\"ocid1.user.oc1...\",\n    oci_fingerprint=\"xx:xx:xx:...\",\n    oci_tenancy_id=\"ocid1.tenancy.oc1...\",\n    oci_private_key_path=\"~/.oci/key.pem\",\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n)\n```\n\n**5. Instance Principal (for OCI Compute instances)**\n```Python\nco = cohere.OciClient(\n    auth_type=\"instance_principal\",\n    oci_region=\"us-chicago-1\",\n    oci_compartment_id=\"ocid1.compartment.oc1...\",\n)\n```\n\n### Supported OCI APIs\n\nThe OCI client supports the following Cohere APIs:\n- **Embed**: Full support for all embedding models\n- **Chat**: Full support with both V1 (`OciClient`) and V2 (`OciClientV2`) APIs\n  - Streaming available via `chat_stream()`\n  - Supports Command-R and Command-A model families\n\n### OCI Model Availability and Limitations\n\n**Available on OCI On-Demand Inference:**\n- ✅ **Embed models**: available on OCI Generative AI\n- ✅ **Chat models**: available via `OciClient` (V1) and `OciClientV2` (V2)\n\n**Not Available on OCI On-Demand Inference:**\n- ❌ **Generate API**: OCI TEXT_GENERATION models are base models that require fine-tuning before deployment\n- ❌ **Rerank API**: OCI TEXT_RERANK models are base models that require fine-tuning before deployment\n- ❌ **Multiple Embedding Types**: OCI on-demand models only support single embedding type per request (cannot request both `float` and `int8` simultaneously)\n\n**Note**: To use Generate or Rerank models on OCI, you need to:\n1. Fine-tune the base model using OCI's fine-tuning service\n2. Deploy the fine-tuned model to a dedicated endpoint\n3. Update your code to use the deployed model endpoint\n\nFor the latest model availability, see the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm).\n\n## Contributing\n\nWhile we value open-source contributions to this SDK, the code is generated programmatically. Additions made directly would have to be moved over to our generation code, otherwise they would be overwritten upon the next generated release. Feel free to open a PR as a proof of concept, but know that we will not be able to merge it as-is. We suggest opening an issue first to discuss with us!\n\nOn the other hand, contributions to the README are always very welcome!\n"
  },
  {
    "path": "mypy.ini",
    "content": "[mypy]\nexclude = src/cohere/manually_maintained/cohere_aws\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"cohere\"\ndynamic = [\"version\"]\n\n[tool.poetry]\nname = \"cohere\"\nversion = \"6.1.0\"\ndescription = \"\"\nreadme = \"README.md\"\nauthors = []\nkeywords = []\nlicense = \"MIT\"\nclassifiers = [\n    \"Intended Audience :: Developers\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n    \"Programming Language :: Python :: 3.15\",\n    \"Operating System :: OS Independent\",\n    \"Operating System :: POSIX\",\n    \"Operating System :: MacOS\",\n    \"Operating System :: POSIX :: Linux\",\n    \"Operating System :: Microsoft :: Windows\",\n    \"Topic :: Software Development :: Libraries :: Python Modules\",\n    \"Typing :: Typed\",\n    \"License :: OSI Approved :: MIT License\"\n]\npackages = [\n    { include = \"cohere\", from = \"src\"}\n]\n\n[tool.poetry.urls]\nRepository = 'https://github.com/cohere-ai/cohere-python'\n\n[tool.poetry.dependencies]\npython = \"^3.10\"\naiohttp = { version = \">=3.10.0,<4\", optional = true}\nfastavro = \"^1.9.4\"\nhttpx = \">=0.21.2\"\nhttpx-aiohttp = { version = \"0.1.8\", optional = true}\noci = { version = \"^2.165.0\", optional = true}\npydantic = \">= 1.9.2\"\npydantic-core = \">=2.18.2,<2.44.0\"\nrequests = \"^2.0.0\"\ntokenizers = \">=0.15,<1\"\ntypes-requests = \"^2.0.0\"\ntyping_extensions = \">= 4.0.0\"\n\n[tool.poetry.group.dev.dependencies]\nmypy = \"==1.13.0\"\npytest = \"^8.2.0\"\npytest-asyncio = \"^1.0.0\"\npytest-xdist = \"^3.6.1\"\npython-dateutil = \"^2.9.0\"\ntypes-python-dateutil = \"^2.9.0.20240316\"\nruff = \"==0.11.5\"\n\n[tool.pytest.ini_options]\ntestpaths = [ \"tests\" ]\nasyncio_mode = \"auto\"\nmarkers = [\n    \"aiohttp: tests that require httpx_aiohttp to be installed\",\n]\n\n[tool.mypy]\nplugins = [\"pydantic.mypy\"]\n\n[tool.ruff]\nline-length = 120\n\n[tool.ruff.lint]\nselect = [\n    \"E\",  # pycodestyle errors\n    \"F\",  # pyflakes\n    \"I\",  # isort\n]\nignore = [\n    \"E402\",  # Module level import not at top of file\n    \"E501\",  # Line too long\n    \"E711\",  # Comparison to `None` should be `cond is not None`\n    \"E712\",  # Avoid equality comparisons to `True`; use `if ...:` checks\n    \"E721\",  # Use `is` and `is not` for type comparisons, or `isinstance()` for insinstance checks\n    \"E722\",  # Do not use bare `except`\n    \"E731\",  # Do not assign a `lambda` expression, use a `def`\n    \"F821\",  # Undefined name\n    \"F841\"   # Local variable ... is assigned to but never used\n]\n\n[tool.ruff.lint.isort]\nsection-order = [\"future\", \"standard-library\", \"third-party\", \"first-party\"]\n\n[build-system]\nrequires = [\"poetry-core\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n[tool.poetry.extras]\noci=[\"oci\"]\naiohttp=[\"aiohttp\", \"httpx-aiohttp\"]\n"
  },
  {
    "path": "reference.md",
    "content": "# Reference\n<details><summary><code>client.<a href=\"src/cohere/client.py\">chat_stream</a>(...) -> typing.Iterator[bytes]</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nGenerates a streamed text response to a user message.\n\nTo learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.chat_stream(\n    model=\"command-a-03-2025\",\n    message=\"hello!\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**message:** `str` \n\nText input for the model to respond to.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nDefaults to `false`.\n\nWhen `true`, the response will be a JSON stream of events. The final event will contain the complete response, and will have an `event_type` of `\"stream-end\"`.\n\nStreaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**accepts:** `typing.Optional[typing.Literal]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\n` delimited events.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` \n\nThe name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\nCompatible Deployments: Cohere Platform, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**preamble:** `typing.Optional[str]` \n\nWhen specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\nThe `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**chat_history:** `typing.Optional[typing.List[Message]]` \n\nA list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\nEach item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\nThe chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**conversation_id:** `typing.Optional[str]` \n\nAn alternative to `chat_history`.\n\nProviding a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**prompt_truncation:** `typing.Optional[ChatStreamRequestPromptTruncation]` \n\nDefaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\nDictates how the prompt will be constructed.\n\nWith `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\nWith `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\nWith `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\nCompatible Deployments:\n - AUTO: Cohere Platform Only\n - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**connectors:** `typing.Optional[typing.List[ChatConnector]]` \n\nAccepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\nWhen specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**search_queries_only:** `typing.Optional[bool]` \n\nDefaults to `false`.\n\nWhen `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.Optional[typing.List[ChatDocument]]` \n\nA list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\nExample:\n```\n[\n  { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n  { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n]\n```\n\nKeys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\nSome suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\nAn `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\nAn `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\nSee ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**citation_quality:** `typing.Optional[ChatStreamRequestCitationQuality]` \n\nDefaults to `\"enabled\"`.\nCitations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nDefaults to `0.3`.\n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\nRandomness can be further maximized by increasing the  value of the `p` parameter.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_input_tokens:** `typing.Optional[int]` \n\nThe maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\nInput will be truncated according to the `prompt_truncation` parameter.\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures only the top `k` most likely tokens are considered for generation at each step.\nDefaults to `0`, min value of `0`, max value of `500`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` \n\nA list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nUsed to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**raw_prompting:** `typing.Optional[bool]` \n\nWhen enabled, the user's prompt will be sent to the model without\nany pre-processing.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tools:** `typing.Optional[typing.List[Tool]]` \n\nA list of available tools (functions) that the model may suggest invoking before producing a text response.\n\nWhen `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tool_results:** `typing.Optional[typing.List[ToolResult]]` \n\nA list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\nEach tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n```\ntool_results = [\n  {\n    \"call\": {\n      \"name\": <tool name>,\n      \"parameters\": {\n        <param name>: <param value>\n      }\n    },\n    \"outputs\": [{\n      <key>: <value>\n    }]\n  },\n  ...\n]\n```\n**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**response_format:** `typing.Optional[ResponseFormat]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**safety_mode:** `typing.Optional[ChatStreamRequestSafetyMode]` \n\nUsed to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\nWhen `NONE` is specified, the safety instruction will be omitted.\n\nSafety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n**Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n**Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">chat</a>(...) -> NonStreamedChatResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nGenerates a text response to a user message.\nTo learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.chat_stream(\n    model=\"command-a-03-2025\",\n    message=\"Tell me about LLMs\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**message:** `str` \n\nText input for the model to respond to.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nDefaults to `false`.\n\nWhen `true`, the response will be a JSON stream of events. The final event will contain the complete response, and will have an `event_type` of `\"stream-end\"`.\n\nStreaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**accepts:** `typing.Optional[typing.Literal]` — Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\n` delimited events.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` \n\nThe name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\nCompatible Deployments: Cohere Platform, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**preamble:** `typing.Optional[str]` \n\nWhen specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\nThe `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**chat_history:** `typing.Optional[typing.List[Message]]` \n\nA list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\nEach item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\nThe chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**conversation_id:** `typing.Optional[str]` \n\nAn alternative to `chat_history`.\n\nProviding a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**prompt_truncation:** `typing.Optional[ChatRequestPromptTruncation]` \n\nDefaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\nDictates how the prompt will be constructed.\n\nWith `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\nWith `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\nWith `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\nCompatible Deployments:\n - AUTO: Cohere Platform Only\n - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**connectors:** `typing.Optional[typing.List[ChatConnector]]` \n\nAccepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\nWhen specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**search_queries_only:** `typing.Optional[bool]` \n\nDefaults to `false`.\n\nWhen `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.Optional[typing.List[ChatDocument]]` \n\nA list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\nExample:\n```\n[\n  { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n  { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n]\n```\n\nKeys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\nSome suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\nAn `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\nAn `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\nSee ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**citation_quality:** `typing.Optional[ChatRequestCitationQuality]` \n\nDefaults to `\"enabled\"`.\nCitations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nDefaults to `0.3`.\n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\nRandomness can be further maximized by increasing the  value of the `p` parameter.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_input_tokens:** `typing.Optional[int]` \n\nThe maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\nInput will be truncated according to the `prompt_truncation` parameter.\n\nCompatible Deployments: Cohere Platform\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures only the top `k` most likely tokens are considered for generation at each step.\nDefaults to `0`, min value of `0`, max value of `500`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` \n\nA list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nUsed to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**raw_prompting:** `typing.Optional[bool]` \n\nWhen enabled, the user's prompt will be sent to the model without\nany pre-processing.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tools:** `typing.Optional[typing.List[Tool]]` \n\nA list of available tools (functions) that the model may suggest invoking before producing a text response.\n\nWhen `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tool_results:** `typing.Optional[typing.List[ToolResult]]` \n\nA list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\nEach tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n```\ntool_results = [\n  {\n    \"call\": {\n      \"name\": <tool name>,\n      \"parameters\": {\n        <param name>: <param value>\n      }\n    },\n    \"outputs\": [{\n      <key>: <value>\n    }]\n  },\n  ...\n]\n```\n**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**force_single_step:** `typing.Optional[bool]` — Forces the chat to be single step. Defaults to `false`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**response_format:** `typing.Optional[ResponseFormat]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**safety_mode:** `typing.Optional[ChatRequestSafetyMode]` \n\nUsed to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\nWhen `NONE` is specified, the safety instruction will be omitted.\n\nSafety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n**Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n**Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">generate_stream</a>(...) -> typing.Iterator[bytes]</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n<Warning>\nThis API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat with Streaming API.\n</Warning>\nGenerates realistic text conditioned on a given input.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.generate_stream(\n    prompt=\"Please explain to me how LLMs work\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**prompt:** `str` \n\nThe input text that serves as the starting point for generating the response.\nNote: The prompt will be pre-processed and modified before reaching the model.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nWhen `true`, the response will be a JSON stream of events. Streaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n\nThe final event will contain the complete response, and will contain an `is_finished` field set to `true`. The event will also contain a `finish_reason`, which can be one of the following:\n- `COMPLETE` - the model sent back a finished reply\n- `MAX_TOKENS` - the reply was cut off because the model reached the maximum number of tokens for its context length\n- `ERROR` - something went wrong when generating the reply\n- `ERROR_TOXIC` - the model generated a reply that was deemed toxic\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` \n\nThe identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\nSmaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\nThis parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\nCan only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[GenerateStreamRequestTruncate]` \n\nOne of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\nIf `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\nDefaults to `0.75`, min value of `0.0`, max value of `5.0`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**preset:** `typing.Optional[str]` \n\nIdentifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\nWhen a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**end_sequences:** `typing.Optional[typing.List[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures only the top `k` most likely tokens are considered for generation at each step.\nDefaults to `0`, min value of `0`, max value of `500`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\nUsing `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nCan be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\nUsing `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**return_likelihoods:** `typing.Optional[GenerateStreamRequestReturnLikelihoods]` \n\nOne of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\nIf `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\nWARNING: `ALL` is deprecated, and will be removed in a future release.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">generate</a>(...) -> Generation</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n<Warning>\nThis API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n</Warning>\nGenerates realistic text conditioned on a given input.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.generate_stream(\n    prompt=\"Please explain to me how LLMs work\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**prompt:** `str` \n\nThe input text that serves as the starting point for generating the response.\nNote: The prompt will be pre-processed and modified before reaching the model.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nWhen `true`, the response will be a JSON stream of events. Streaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n\nThe final event will contain the complete response, and will contain an `is_finished` field set to `true`. The event will also contain a `finish_reason`, which can be one of the following:\n- `COMPLETE` - the model sent back a finished reply\n- `MAX_TOKENS` - the reply was cut off because the model reached the maximum number of tokens for its context length\n- `ERROR` - something went wrong when generating the reply\n- `ERROR_TOXIC` - the model generated a reply that was deemed toxic\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` \n\nThe identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\nSmaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**num_generations:** `typing.Optional[int]` — The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\nThis parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\nCan only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[GenerateRequestTruncate]` \n\nOne of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\nIf `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\nDefaults to `0.75`, min value of `0.0`, max value of `5.0`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\nCompatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**preset:** `typing.Optional[str]` \n\nIdentifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\nWhen a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**end_sequences:** `typing.Optional[typing.List[str]]` — The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` — The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures only the top `k` most likely tokens are considered for generation at each step.\nDefaults to `0`, min value of `0`, max value of `500`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\nUsing `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\nCan be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\nUsing `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**return_likelihoods:** `typing.Optional[GenerateRequestReturnLikelihoods]` \n\nOne of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\nIf `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\nWARNING: `ALL` is deprecated, and will be removed in a future release.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**raw_prompting:** `typing.Optional[bool]` — When enabled, the user's prompt will be sent to the model without any pre-processing.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">embed</a>(...) -> EmbedResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint returns text and image embeddings. An embedding is a list of floating point numbers that captures semantic information about the content that it represents.\n\nEmbeddings can be used to create classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\nIf you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.embed(\n    texts=[\n        \"hello\",\n        \"goodbye\"\n    ],\n    model=\"embed-v4.0\",\n    input_type=\"classification\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**texts:** `typing.Optional[typing.List[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**images:** `typing.Optional[typing.List[str]]` \n\nAn array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\nThe image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\nImages are only supported with Embed v3.0 and newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` — ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**input_type:** `typing.Optional[EmbedInputType]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**embedding_types:** `typing.Optional[typing.List[EmbeddingType]]` \n\nSpecifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n* `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n* `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[EmbedRequestTruncate]` \n\nOne of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\nIf `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">rerank</a>(...) -> RerankResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.rerank(\n    documents=[\n        {\n            \"text\": \"Carson City is the capital city of the American state of Nevada.\"\n        },\n        {\n            \"text\": \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\"\n        },\n        {\n            \"text\": \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\"\n        },\n        {\n            \"text\": \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\"\n        },\n        {\n            \"text\": \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\"\n        }\n    ],\n    query=\"What is the capital of the United States?\",\n    top_n=3,\n    model=\"rerank-v4.0-pro\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**query:** `str` — The search query\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.List[RerankRequestDocumentsItem]` \n\nA list of document objects or strings to rerank.\nIf a document is provided the text fields is required and all other fields will be preserved in the response.\n\nThe total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.\n\nWe recommend a maximum of 1,000 documents for optimal endpoint performance.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` — The identifier of the model to use, eg `rerank-v3.5`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**top_n:** `typing.Optional[int]` — The number of most relevant documents or indices to return, defaults to the length of the documents\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**rank_fields:** `typing.Optional[typing.List[str]]` — If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**return_documents:** `typing.Optional[bool]` \n\n- If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.\n- If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_chunks_per_doc:** `typing.Optional[int]` — The maximum number of chunks to produce internally from a document\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">classify</a>(...) -> ClassifyResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.\nNote: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client, ClassifyExample\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.classify(\n    examples=[\n        ClassifyExample(\n            text=\"Dermatologists don\\'t like her!\",\n            label=\"Spam\",\n        ),\n        ClassifyExample(\n            text=\"\\'Hello, open to this?\\'\",\n            label=\"Spam\",\n        ),\n        ClassifyExample(\n            text=\"I need help please wire me $1000 right now\",\n            label=\"Spam\",\n        ),\n        ClassifyExample(\n            text=\"Nice to know you ;)\",\n            label=\"Spam\",\n        ),\n        ClassifyExample(\n            text=\"Please help me?\",\n            label=\"Spam\",\n        ),\n        ClassifyExample(\n            text=\"Your parcel will be delivered today\",\n            label=\"Not spam\",\n        ),\n        ClassifyExample(\n            text=\"Review changes to our Terms and Conditions\",\n            label=\"Not spam\",\n        ),\n        ClassifyExample(\n            text=\"Weekly sync notes\",\n            label=\"Not spam\",\n        ),\n        ClassifyExample(\n            text=\"\\'Re: Follow up from today\\'s meeting\\'\",\n            label=\"Not spam\",\n        ),\n        ClassifyExample(\n            text=\"Pre-read for tomorrow\",\n            label=\"Not spam\",\n        )\n    ],\n    inputs=[\n        \"Confirm your email address\",\n        \"hey i need u to send some $\"\n    ],\n    model=\"YOUR-FINE-TUNED-MODEL-ID\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**inputs:** `typing.List[str]` \n\nA list of up to 96 texts to be classified. Each one must be a non-empty string.\nThere is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the \"max tokens\" column [here](https://docs.cohere.com/docs/models).\nNote: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**examples:** `typing.Optional[typing.List[ClassifyExample]]` \n\nAn array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: \"...\",label: \"...\"}`.\nNote: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` — ID of a [Fine-tuned](https://docs.cohere.com/v2/docs/classify-starting-the-training) Classify model\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**preset:** `typing.Optional[str]` — The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[ClassifyRequestTruncate]` \n\nOne of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\nIf `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">summarize</a>(...) -> SummarizeResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n<Warning>\nThis API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n</Warning>\nGenerates a summary in English for a given text.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.summarize(\n    text=\"Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\\n\\nThe meaning of the name \\\"ice cream\\\" varies from one country to another. In some countries, such as the United States, \\\"ice cream\\\" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled \\\"frozen dairy dessert\\\" instead. In other countries, such as Italy and Argentina, one word is used fo\\r all variants. Analogues made from dairy alternatives, such as goat\\'s or sheep\\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**text:** `str` — The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**length:** `typing.Optional[SummarizeRequestLength]` — One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**format:** `typing.Optional[SummarizeRequestFormat]` — One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `typing.Optional[str]` — The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, \"light\" models are faster, while larger models will perform better.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**extractiveness:** `typing.Optional[SummarizeRequestExtractiveness]` — One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` — Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**additional_command:** `typing.Optional[str]` — A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". Eg. \"focusing on the next steps\" or \"written by Yoda\"\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">tokenize</a>(...) -> TokenizeResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.tokenize(\n    text=\"tokenize me! :D\",\n    model=\"command\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**text:** `str` — The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `str` — The input will be tokenized by the tokenizer that is used by this model.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">detokenize</a>(...) -> DetokenizeResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.detokenize(\n    tokens=[\n        10002,\n        2261,\n        2012,\n        8,\n        2792,\n        43\n    ],\n    model=\"command\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**tokens:** `typing.List[int]` — The list of tokens to be detokenized.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `str` — An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.<a href=\"src/cohere/client.py\">check_api_key</a>() -> CheckApiKeyResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nChecks that the api key in the Authorization header is valid and active\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.check_api_key()\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## V2\n<details><summary><code>client.v2.<a href=\"src/cohere/v2/client.py\">chat_stream</a>(...) -> typing.Iterator[bytes]</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nGenerates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\nFollow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client, ChatMessageV2_User\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.v2.chat_stream(\n    model=\"command-a-03-2025\",\n    messages=[\n        ChatMessageV2_User(\n            content=\"Tell me about LLMs\",\n        )\n    ],\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nDefaults to `false`.\n\nWhen `true`, the response will be a SSE stream of events.\n\nStreaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**messages:** `ChatMessages` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tools:** `typing.Optional[typing.List[ToolV2]]` \n\nA list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\nLearn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**strict_tools:** `typing.Optional[bool]` \n\nWhen set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n**Note**: The first few requests with a new set of tools will take longer to process.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.Optional[typing.List[V2ChatStreamRequestDocumentsItem]]` — A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**citation_options:** `typing.Optional[CitationOptions]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**response_format:** `typing.Optional[ResponseFormatV2]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**safety_mode:** `typing.Optional[V2ChatStreamRequestSafetyMode]` \n\nUsed to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\nWhen `OFF` is specified, the safety instruction will be omitted.\n\nSafety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n**Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n**Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n**Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n**Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nDefaults to `0.3`.\n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\nRandomness can be further maximized by increasing the  value of the `p` parameter.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\nUsed to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\nDefaults to `0`, min value of `0`, max value of `500`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**logprobs:** `typing.Optional[bool]` — Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tool_choice:** `typing.Optional[V2ChatStreamRequestToolChoice]` \n\nUsed to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\nWhen `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\nIf tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n**Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**thinking:** `typing.Optional[Thinking]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.v2.<a href=\"src/cohere/v2/client.py\">chat</a>(...) -> V2ChatResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nGenerates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\nFollow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client, ChatMessageV2_User\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.v2.chat_stream(\n    model=\"command-a-03-2025\",\n    messages=[\n        ChatMessageV2_User(\n            content=\"Tell me about LLMs\",\n        )\n    ],\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**stream:** `typing.Literal` \n\nDefaults to `false`.\n\nWhen `true`, the response will be a SSE stream of events.\n\nStreaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**messages:** `ChatMessages` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tools:** `typing.Optional[typing.List[ToolV2]]` \n\nA list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\nLearn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**strict_tools:** `typing.Optional[bool]` \n\nWhen set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n**Note**: The first few requests with a new set of tools will take longer to process.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.Optional[typing.List[V2ChatRequestDocumentsItem]]` — A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**citation_options:** `typing.Optional[CitationOptions]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**response_format:** `typing.Optional[ResponseFormatV2]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**safety_mode:** `typing.Optional[V2ChatRequestSafetyMode]` \n\nUsed to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\nWhen `OFF` is specified, the safety instruction will be omitted.\n\nSafety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n**Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n**Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` \n\nThe maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n**Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n**Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**stop_sequences:** `typing.Optional[typing.List[str]]` — A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` \n\nDefaults to `0.3`.\n\nA non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\nRandomness can be further maximized by increasing the  value of the `p` parameter.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**seed:** `typing.Optional[int]` \n\nIf specified, the backend will make a best effort to sample tokens\ndeterministically, such that repeated requests with the same\nseed and parameters should return the same result. However,\ndeterminism cannot be totally guaranteed.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**frequency_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\nUsed to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**presence_penalty:** `typing.Optional[float]` \n\nDefaults to `0.0`, min value of `0.0`, max value of `1.0`.\nUsed to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**k:** `typing.Optional[int]` \n\nEnsures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\nDefaults to `0`, min value of `0`, max value of `500`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**p:** `typing.Optional[float]` \n\nEnsures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\nDefaults to `0.75`. min value of `0.01`, max value of `0.99`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**logprobs:** `typing.Optional[bool]` — Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**tool_choice:** `typing.Optional[V2ChatRequestToolChoice]` \n\nUsed to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\nWhen `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\nIf tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n**Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**thinking:** `typing.Optional[Thinking]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.v2.<a href=\"src/cohere/v2/client.py\">embed</a>(...) -> EmbedByTypeResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.\n\nEmbeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\nIf you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.v2.embed(\n    texts=[\n        \"hello\",\n        \"goodbye\"\n    ],\n    model=\"embed-v4.0\",\n    input_type=\"classification\",\n    embedding_types=[\n        \"float\"\n    ],\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**model:** `str` — ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**input_type:** `EmbedInputType` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**texts:** `typing.Optional[typing.List[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**images:** `typing.Optional[typing.List[str]]` \n\nAn array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\nThe image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\nImage embeddings are supported with Embed v3.0 and newer models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**inputs:** `typing.Optional[typing.List[EmbedInput]]` — An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens:** `typing.Optional[int]` — The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**output_dimension:** `typing.Optional[int]` \n\nThe number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.\nPossible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**embedding_types:** `typing.Optional[typing.List[EmbeddingType]]` \n\nSpecifies the types of embeddings you want to get back. Can be one or more of the following types.\n\n* `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n* `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n* `\"base64\"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[V2EmbedRequestTruncate]` \n\nOne of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\nIf `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.v2.<a href=\"src/cohere/v2/client.py\">rerank</a>(...) -> V2RerankResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.v2.rerank(\n    documents=[\n        \"Carson City is the capital city of the American state of Nevada.\",\n        \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\",\n        \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\",\n        \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\",\n        \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\"\n    ],\n    query=\"What is the capital of the United States?\",\n    top_n=3,\n    model=\"rerank-v4.0-pro\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**model:** `str` — The identifier of the model to use, eg `rerank-v3.5`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**query:** `str` — The search query\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**documents:** `typing.List[str]` \n\nA list of texts that will be compared to the `query`.\nFor optimal performance we recommend against sending more than 1,000 documents in a single request.\n\n**Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.\n\n**Note**: structured data should be formatted as YAML strings for best performance.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**top_n:** `typing.Optional[int]` — Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**max_tokens_per_doc:** `typing.Optional[int]` — Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## Batches\n<details><summary><code>client.batches.<a href=\"src/cohere/batches/client.py\">list</a>(...) -> ListBatchesResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nList the batches for the current user\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.batches.list(\n    page_size=1,\n    page_token=\"page_token\",\n    order_by=\"order_by\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**page_size:** `typing.Optional[int]` \n\nThe maximum number of batches to return. The service may return fewer than\nthis value.\nIf unspecified, at most 50 batches will be returned.\nThe maximum value is 1000; values above 1000 will be coerced to 1000.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_token:** `typing.Optional[str]` \n\nA page token, received from a previous `ListBatches` call.\nProvide this to retrieve the subsequent page.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**order_by:** `typing.Optional[str]` \n\nBatches can be ordered by creation time or last updated time.\nUse `created_at` for creation time or `updated_at` for last updated time.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.batches.<a href=\"src/cohere/batches/client.py\">create</a>(...) -> CreateBatchResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nCreates and executes a batch from an uploaded dataset of requests\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\nfrom cohere.batches import Batch\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.batches.create(\n    request=Batch(\n        name=\"name\",\n        input_dataset_id=\"input_dataset_id\",\n        model=\"model\",\n    ),\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**request:** `Batch` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.batches.<a href=\"src/cohere/batches/client.py\">retrieve</a>(...) -> GetBatchResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nRetrieves a batch\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.batches.retrieve(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The batch ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.batches.<a href=\"src/cohere/batches/client.py\">cancel</a>(...) -> CancelBatchResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nCancels an in-progress batch\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.batches.cancel(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The batch ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## EmbedJobs\n<details><summary><code>client.embed_jobs.<a href=\"src/cohere/embed_jobs/client.py\">list</a>() -> ListEmbedJobResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThe list embed job endpoint allows users to view all embed jobs history for that specific user.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.embed_jobs.list()\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.embed_jobs.<a href=\"src/cohere/embed_jobs/client.py\">create</a>(...) -> CreateEmbedJobResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.embed_jobs.create(\n    model=\"model\",\n    dataset_id=\"dataset_id\",\n    input_type=\"search_document\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**model:** `str` \n\nID of the embedding model.\n\nAvailable models and corresponding embedding dimensions:\n\n- `embed-english-v3.0` : 1024\n- `embed-multilingual-v3.0` : 1024\n- `embed-english-light-v3.0` : 384\n- `embed-multilingual-light-v3.0` : 384\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**dataset_id:** `str` — ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**input_type:** `EmbedInputType` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**name:** `typing.Optional[str]` — The name of the embed job.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**embedding_types:** `typing.Optional[typing.List[EmbeddingType]]` \n\nSpecifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n* `\"float\"`: Use this when you want to get back the default float embeddings. Valid for all models.\n* `\"int8\"`: Use this when you want to get back signed int8 embeddings. Valid for v3 and newer model versions.\n* `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Valid for v3 and newer model versions.\n* `\"binary\"`: Use this when you want to get back signed binary embeddings. Valid for v3 and newer model versions.\n* `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Valid for v3 and newer model versions.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**truncate:** `typing.Optional[CreateEmbedJobRequestTruncate]` \n\nOne of `START|END` to specify how the API will handle inputs longer than the maximum token length.\n\nPassing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.embed_jobs.<a href=\"src/cohere/embed_jobs/client.py\">get</a>(...) -> EmbedJob</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis API retrieves the details about an embed job started by the same user.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.embed_jobs.get(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the embed job to retrieve.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.embed_jobs.<a href=\"src/cohere/embed_jobs/client.py\">cancel</a>(...)</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nThis API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.embed_jobs.cancel(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the embed job to cancel.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## Datasets\n<details><summary><code>client.datasets.<a href=\"src/cohere/datasets/client.py\">list</a>(...) -> DatasetsListResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nList datasets that have been created.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\nimport datetime\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.datasets.list(\n    dataset_type=\"datasetType\",\n    before=datetime.datetime.fromisoformat(\"2024-01-15T09:30:00+00:00\"),\n    after=datetime.datetime.fromisoformat(\"2024-01-15T09:30:00+00:00\"),\n    limit=1.1,\n    offset=1.1,\n    validation_status=\"unknown\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**dataset_type:** `typing.Optional[str]` — optional filter by dataset type\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**before:** `typing.Optional[datetime.datetime]` — optional filter before a date\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**after:** `typing.Optional[datetime.datetime]` — optional filter after a date\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**limit:** `typing.Optional[float]` — optional limit to number of results\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**offset:** `typing.Optional[float]` — optional offset to start of results\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**validation_status:** `typing.Optional[DatasetValidationStatus]` — optional filter by validation status\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.datasets.<a href=\"src/cohere/datasets/client.py\">create</a>(...) -> DatasetsCreateResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nCreate a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.datasets.create(\n    name=\"name\",\n    type=\"embed-input\",\n    keep_original_file=True,\n    skip_malformed_input=True,\n    text_separator=\"text_separator\",\n    csv_delimiter=\"csv_delimiter\",\n    data=\"example_data\",\n    eval_data=\"example_eval_data\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**name:** `str` — The name of the uploaded dataset.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**type:** `DatasetType` — The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**data:** `core.File` — The file to upload\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**keep_original_file:** `typing.Optional[bool]` — Indicates if the original file should be stored.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**skip_malformed_input:** `typing.Optional[bool]` — Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**keep_fields:** `typing.Optional[typing.Union[str, typing.Sequence[str]]]` — List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**optional_fields:** `typing.Optional[typing.Union[str, typing.Sequence[str]]]` — List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**text_separator:** `typing.Optional[str]` — Raw .txt uploads will be split into entries using the text_separator value.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**csv_delimiter:** `typing.Optional[str]` — The delimiter used for .csv uploads.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**eval_data:** `typing.Optional[core.File]` — An optional evaluation file to upload\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.datasets.<a href=\"src/cohere/datasets/client.py\">get_usage</a>() -> DatasetsGetUsageResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nView the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.datasets.get_usage()\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.datasets.<a href=\"src/cohere/datasets/client.py\">get</a>(...) -> DatasetsGetResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nRetrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.datasets.get(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.datasets.<a href=\"src/cohere/datasets/client.py\">delete</a>(...) -> typing.Dict[str, typing.Any]</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nDelete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.datasets.delete(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## Connectors\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">list</a>(...) -> ListConnectorsResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.list(\n    limit=1.1,\n    offset=1.1,\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**limit:** `typing.Optional[float]` — Maximum number of connectors to return [0, 100].\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**offset:** `typing.Optional[float]` — Number of connectors to skip before returning results [0, inf].\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">create</a>(...) -> CreateConnectorResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nCreates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.create(\n    name=\"name\",\n    url=\"url\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**name:** `str` — A human-readable name for the connector.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**url:** `str` — The URL of the connector that will be used to search for documents.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**description:** `typing.Optional[str]` — A description of the connector.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**excludes:** `typing.Optional[typing.List[str]]` — A list of fields to exclude from the prompt (fields remain in the document).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**active:** `typing.Optional[bool]` — Whether the connector is active or not.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**continue_on_failure:** `typing.Optional[bool]` — Whether a chat request should continue or not if the request to this connector fails.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">get</a>(...) -> GetConnectorResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nRetrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.get(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the connector to retrieve.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">delete</a>(...) -> DeleteConnectorResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nDelete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.delete(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the connector to delete.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">update</a>(...) -> UpdateConnectorResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nUpdate a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.update(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the connector to update.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**name:** `typing.Optional[str]` — A human-readable name for the connector.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**url:** `typing.Optional[str]` — The URL of the connector that will be used to search for documents.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**excludes:** `typing.Optional[typing.List[str]]` — A list of fields to exclude from the prompt (fields remain in the document).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**oauth:** `typing.Optional[CreateConnectorOAuth]` — The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**active:** `typing.Optional[bool]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**continue_on_failure:** `typing.Optional[bool]` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**service_auth:** `typing.Optional[CreateConnectorServiceAuth]` — The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.connectors.<a href=\"src/cohere/connectors/client.py\">o_auth_authorize</a>(...) -> OAuthAuthorizeResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nAuthorize the connector with the given ID for the connector oauth app.  See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.connectors.o_auth_authorize(\n    id=\"id\",\n    after_token_redirect=\"after_token_redirect\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The ID of the connector to authorize.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**after_token_redirect:** `typing.Optional[str]` — The URL to redirect to after the connector has been authorized.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## Models\n<details><summary><code>client.models.<a href=\"src/cohere/models/client.py\">get</a>(...) -> GetModelResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns the details of a model, provided its name.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.models.get(\n    model=\"command-a-03-2025\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**model:** `str` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.models.<a href=\"src/cohere/models/client.py\">list</a>(...) -> ListModelsResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns a list of models available for use.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.models.list(\n    page_size=1.1,\n    page_token=\"page_token\",\n    endpoint=\"chat\",\n    default_only=True,\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**page_size:** `typing.Optional[float]` \n\nMaximum number of models to include in a page\nDefaults to `20`, min value of `1`, max value of `1000`.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_token:** `typing.Optional[str]` — Page token provided in the `next_page_token` field of a previous response.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**endpoint:** `typing.Optional[CompatibleEndpoint]` — When provided, filters the list of models to only those that are compatible with the specified endpoint.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**default_only:** `typing.Optional[bool]` — When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## /finetuning\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">list_finetuned_models</a>(...) -> ListFinetunedModelsResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns a list of fine-tuned models that the user has access to.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.list_finetuned_models(\n    page_size=1,\n    page_token=\"page_token\",\n    order_by=\"order_by\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**page_size:** `typing.Optional[int]` \n\nMaximum number of results to be returned by the server. If 0, defaults to\n50.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_token:** `typing.Optional[str]` — Request a specific page of the list results.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**order_by:** `typing.Optional[str]` \n\nComma separated list of fields. For example: \"created_at,name\". The default\nsorting order is ascending. To specify descending order for a field, append\n\" desc\" to the field name. For example: \"created_at desc,name\".\n\nSupported sorting fields:\n  - created_at (default)\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">create_finetuned_model</a>(...) -> CreateFinetunedModelResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nCreates a new fine-tuned model. The model will be trained on the dataset specified in the request body. The training process may take some time, and the model will be available once the training is complete.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\nfrom cohere.finetuning.finetuning import FinetunedModel, Settings, BaseModel\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.create_finetuned_model(\n    request=FinetunedModel(\n        name=\"name\",\n        settings=Settings(\n            base_model=BaseModel(\n                base_type=\"BASE_TYPE_UNSPECIFIED\",\n            ),\n            dataset_id=\"dataset_id\",\n        ),\n    ),\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**request:** `FinetunedModel` \n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">get_finetuned_model</a>(...) -> GetFinetunedModelResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nRetrieve a fine-tuned model by its ID.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.get_finetuned_model(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The fine-tuned model ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">delete_finetuned_model</a>(...) -> DeleteFinetunedModelResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nDeletes a fine-tuned model. The model will be removed from the system and will no longer be available for use.\nThis operation is irreversible.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.delete_finetuned_model(\n    id=\"id\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — The fine-tuned model ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">update_finetuned_model</a>(...) -> UpdateFinetunedModelResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nUpdates the fine-tuned model with the given ID. The model will be updated with the new settings and name provided in the request body.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\nfrom cohere.finetuning.finetuning import Settings, BaseModel\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.update_finetuned_model(\n    id=\"id\",\n    name=\"name\",\n    settings=Settings(\n        base_model=BaseModel(\n            base_type=\"BASE_TYPE_UNSPECIFIED\",\n        ),\n        dataset_id=\"dataset_id\",\n    ),\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**id:** `str` — FinetunedModel ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**name:** `str` — FinetunedModel name (e.g. `foobar`).\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**settings:** `Settings` — FinetunedModel settings such as dataset, hyperparameters...\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">list_events</a>(...) -> ListEventsResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns a list of events that occurred during the life-cycle of the fine-tuned model.\nThe events are ordered by creation time, with the most recent event first.\nThe list can be paginated using `page_size` and `page_token` parameters.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.list_events(\n    finetuned_model_id=\"finetuned_model_id\",\n    page_size=1,\n    page_token=\"page_token\",\n    order_by=\"order_by\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**finetuned_model_id:** `str` — The parent fine-tuned model ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_size:** `typing.Optional[int]` \n\nMaximum number of results to be returned by the server. If 0, defaults to\n50.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_token:** `typing.Optional[str]` — Request a specific page of the list results.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**order_by:** `typing.Optional[str]` \n\nComma separated list of fields. For example: \"created_at,name\". The default\nsorting order is ascending. To specify descending order for a field, append\n\" desc\" to the field name. For example: \"created_at desc,name\".\n\nSupported sorting fields:\n  - created_at (default)\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n<details><summary><code>client.finetuning.<a href=\"src/cohere/finetuning/client.py\">list_training_step_metrics</a>(...) -> ListTrainingStepMetricsResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nReturns a list of metrics measured during the training of a fine-tuned model.\nThe metrics are ordered by step number, with the most recent step first.\nThe list can be paginated using `page_size` and `page_token` parameters.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.finetuning.list_training_step_metrics(\n    finetuned_model_id=\"finetuned_model_id\",\n    page_size=1,\n    page_token=\"page_token\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**finetuned_model_id:** `str` — The parent fine-tuned model ID.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_size:** `typing.Optional[int]` \n\nMaximum number of results to be returned by the server. If 0, defaults to\n50.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**page_token:** `typing.Optional[str]` — Request a specific page of the list results.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n## Audio Transcriptions\n<details><summary><code>client.audio.transcriptions.<a href=\"src/cohere/audio/transcriptions/client.py\">create</a>(...) -> AudioTranscriptionsCreateResponse</code></summary>\n<dl>\n<dd>\n\n#### 📝 Description\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\nTranscribe an audio file.\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### 🔌 Usage\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n```python\nfrom cohere import Client\nfrom cohere.environment import ClientEnvironment\n\nclient = Client(\n    token=\"<token>\",\n    environment=ClientEnvironment.PRODUCTION,\n)\n\nclient.audio.transcriptions.create(\n    file=\"example_file\",\n    model=\"model\",\n    language=\"language\",\n)\n\n```\n</dd>\n</dl>\n</dd>\n</dl>\n\n#### ⚙️ Parameters\n\n<dl>\n<dd>\n\n<dl>\n<dd>\n\n**model:** `str` — ID of the model to use.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**language:** `str` — The language of the input audio, supplied in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**file:** `core.File` — The audio file object to transcribe. Supported file extensions are flac, mp3, mpeg, mpga, ogg, and wav.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**temperature:** `typing.Optional[float]` — The sampling temperature, between 0 and 1. Higher values like 0.8 make the output more random, while lower values like 0.2 make it more focused and deterministic.\n    \n</dd>\n</dl>\n\n<dl>\n<dd>\n\n**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.\n    \n</dd>\n</dl>\n</dd>\n</dl>\n\n\n</dd>\n</dl>\n</details>\n\n"
  },
  {
    "path": "requirements.txt",
    "content": "fastavro==1.9.4\nhttpx>=0.21.2\npydantic>= 1.9.2\npydantic-core>=2.18.2,<2.44.0\nrequests==2.0.0\ntokenizers>=0.15,<1\ntypes-requests==2.0.0\ntyping_extensions>= 4.0.0\n"
  },
  {
    "path": "src/cohere/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import (\n        ApiMeta,\n        ApiMetaApiVersion,\n        ApiMetaBilledUnits,\n        ApiMetaTokens,\n        AssistantChatMessageV2,\n        AssistantMessage,\n        AssistantMessageResponse,\n        AssistantMessageResponseContentItem,\n        AssistantMessageV2Content,\n        AssistantMessageV2ContentOneItem,\n        AuthTokenType,\n        ChatCitation,\n        ChatCitationGenerationEvent,\n        ChatCitationType,\n        ChatConnector,\n        ChatContentDeltaEvent,\n        ChatContentDeltaEventDelta,\n        ChatContentDeltaEventDeltaMessage,\n        ChatContentDeltaEventDeltaMessageContent,\n        ChatContentEndEvent,\n        ChatContentStartEvent,\n        ChatContentStartEventDelta,\n        ChatContentStartEventDeltaMessage,\n        ChatContentStartEventDeltaMessageContent,\n        ChatContentStartEventDeltaMessageContentType,\n        ChatDataMetrics,\n        ChatDebugEvent,\n        ChatDocument,\n        ChatDocumentSource,\n        ChatFinishReason,\n        ChatMessage,\n        ChatMessageEndEvent,\n        ChatMessageEndEventDelta,\n        ChatMessageStartEvent,\n        ChatMessageStartEventDelta,\n        ChatMessageStartEventDeltaMessage,\n        ChatMessageV2,\n        ChatMessages,\n        ChatRequestCitationQuality,\n        ChatRequestPromptTruncation,\n        ChatRequestSafetyMode,\n        ChatSearchQueriesGenerationEvent,\n        ChatSearchQuery,\n        ChatSearchResult,\n        ChatSearchResultConnector,\n        ChatSearchResultsEvent,\n        ChatStreamEndEvent,\n        ChatStreamEndEventFinishReason,\n        ChatStreamEvent,\n        ChatStreamEventType,\n        ChatStreamRequestCitationQuality,\n        ChatStreamRequestPromptTruncation,\n        ChatStreamRequestSafetyMode,\n        ChatStreamStartEvent,\n        ChatTextContent,\n        ChatTextGenerationEvent,\n        ChatTextResponseFormat,\n        ChatTextResponseFormatV2,\n        ChatThinkingContent,\n        ChatToolCallDeltaEvent,\n        ChatToolCallDeltaEventDelta,\n        ChatToolCallDeltaEventDeltaMessage,\n        ChatToolCallDeltaEventDeltaMessageToolCalls,\n        ChatToolCallDeltaEventDeltaMessageToolCallsFunction,\n        ChatToolCallEndEvent,\n        ChatToolCallStartEvent,\n        ChatToolCallStartEventDelta,\n        ChatToolCallStartEventDeltaMessage,\n        ChatToolCallsChunkEvent,\n        ChatToolCallsGenerationEvent,\n        ChatToolMessage,\n        ChatToolPlanDeltaEvent,\n        ChatToolPlanDeltaEventDelta,\n        ChatToolPlanDeltaEventDeltaMessage,\n        ChatToolSource,\n        ChatbotMessage,\n        CheckApiKeyResponse,\n        Citation,\n        CitationEndEvent,\n        CitationGenerationStreamedChatResponse,\n        CitationOptions,\n        CitationOptionsMode,\n        CitationStartEvent,\n        CitationStartEventDelta,\n        CitationStartEventDeltaMessage,\n        CitationType,\n        ClassifyDataMetrics,\n        ClassifyExample,\n        ClassifyRequestTruncate,\n        ClassifyResponse,\n        ClassifyResponseClassificationsItem,\n        ClassifyResponseClassificationsItemClassificationType,\n        ClassifyResponseClassificationsItemLabelsValue,\n        CompatibleEndpoint,\n        Connector,\n        ConnectorAuthStatus,\n        ConnectorOAuth,\n        Content,\n        CreateConnectorOAuth,\n        CreateConnectorResponse,\n        CreateConnectorServiceAuth,\n        CreateEmbedJobResponse,\n        Dataset,\n        DatasetPart,\n        DatasetType,\n        DatasetValidationStatus,\n        DebugStreamedChatResponse,\n        DeleteConnectorResponse,\n        DetokenizeResponse,\n        Document,\n        DocumentContent,\n        DocumentSource,\n        DocumentToolContent,\n        EmbedByTypeResponse,\n        EmbedByTypeResponseEmbeddings,\n        EmbedByTypeResponseResponseType,\n        EmbedContent,\n        EmbedFloatsResponse,\n        EmbedImage,\n        EmbedImageUrl,\n        EmbedInput,\n        EmbedInputType,\n        EmbedJob,\n        EmbedJobStatus,\n        EmbedJobTruncate,\n        EmbedRequestTruncate,\n        EmbedResponse,\n        EmbedText,\n        EmbeddingType,\n        EmbeddingsByTypeEmbedResponse,\n        EmbeddingsFloatsEmbedResponse,\n        FinetuneDatasetMetrics,\n        FinishReason,\n        GenerateRequestReturnLikelihoods,\n        GenerateRequestTruncate,\n        GenerateStreamEnd,\n        GenerateStreamEndResponse,\n        GenerateStreamError,\n        GenerateStreamEvent,\n        GenerateStreamRequestReturnLikelihoods,\n        GenerateStreamRequestTruncate,\n        GenerateStreamText,\n        GenerateStreamedResponse,\n        Generation,\n        GetConnectorResponse,\n        GetModelResponse,\n        GetModelResponseSamplingDefaults,\n        Image,\n        ImageContent,\n        ImageUrl,\n        ImageUrlContent,\n        ImageUrlDetail,\n        ImageUrlEmbedContent,\n        JsonObjectResponseFormat,\n        JsonObjectResponseFormatV2,\n        JsonResponseFormat,\n        JsonResponseFormatV2,\n        LabelMetric,\n        ListConnectorsResponse,\n        ListEmbedJobResponse,\n        ListModelsResponse,\n        LogprobItem,\n        Message,\n        Metrics,\n        NonStreamedChatResponse,\n        OAuthAuthorizeResponse,\n        ParseInfo,\n        RerankDocument,\n        RerankRequestDocumentsItem,\n        RerankResponse,\n        RerankResponseResultsItem,\n        RerankResponseResultsItemDocument,\n        RerankerDataMetrics,\n        ResponseFormat,\n        ResponseFormatV2,\n        SearchQueriesGenerationStreamedChatResponse,\n        SearchResultsStreamedChatResponse,\n        SingleGeneration,\n        SingleGenerationInStream,\n        SingleGenerationTokenLikelihoodsItem,\n        Source,\n        StreamEndGenerateStreamedResponse,\n        StreamEndStreamedChatResponse,\n        StreamErrorGenerateStreamedResponse,\n        StreamStartStreamedChatResponse,\n        StreamedChatResponse,\n        SummarizeRequestExtractiveness,\n        SummarizeRequestFormat,\n        SummarizeRequestLength,\n        SummarizeResponse,\n        SystemChatMessageV2,\n        SystemMessage,\n        SystemMessageV2,\n        SystemMessageV2Content,\n        SystemMessageV2ContentOneItem,\n        TextAssistantMessageResponseContentItem,\n        TextAssistantMessageV2ContentOneItem,\n        TextContent,\n        TextEmbedContent,\n        TextGenerationGenerateStreamedResponse,\n        TextGenerationStreamedChatResponse,\n        TextResponseFormat,\n        TextResponseFormatV2,\n        TextSystemMessageV2ContentOneItem,\n        TextToolContent,\n        Thinking,\n        ThinkingAssistantMessageResponseContentItem,\n        ThinkingAssistantMessageV2ContentOneItem,\n        ThinkingType,\n        TokenizeResponse,\n        Tool,\n        ToolCall,\n        ToolCallDelta,\n        ToolCallV2,\n        ToolCallV2Function,\n        ToolCallsChunkStreamedChatResponse,\n        ToolCallsGenerationStreamedChatResponse,\n        ToolChatMessageV2,\n        ToolContent,\n        ToolMessage,\n        ToolMessageV2,\n        ToolMessageV2Content,\n        ToolParameterDefinitionsValue,\n        ToolResult,\n        ToolSource,\n        ToolV2,\n        ToolV2Function,\n        UpdateConnectorResponse,\n        Usage,\n        UsageBilledUnits,\n        UsageTokens,\n        UserChatMessageV2,\n        UserMessage,\n        UserMessageV2,\n        UserMessageV2Content,\n    )\n    from .errors import (\n        BadRequestError,\n        ClientClosedRequestError,\n        ForbiddenError,\n        GatewayTimeoutError,\n        InternalServerError,\n        InvalidTokenError,\n        NotFoundError,\n        NotImplementedError,\n        ServiceUnavailableError,\n        TooManyRequestsError,\n        UnauthorizedError,\n        UnprocessableEntityError,\n    )\n    from . import audio, batches, connectors, datasets, embed_jobs, finetuning, models, v2\n    from ._default_clients import DefaultAioHttpClient, DefaultAsyncHttpxClient\n    from .aliases import (\n        ChatResponse,\n        ContentDeltaStreamedChatResponseV2,\n        ContentEndStreamedChatResponseV2,\n        ContentStartStreamedChatResponseV2,\n        MessageEndStreamedChatResponseV2,\n        MessageStartStreamedChatResponseV2,\n        StreamedChatResponseV2,\n        ToolCallDeltaStreamedChatResponseV2,\n        ToolCallEndStreamedChatResponseV2,\n        ToolCallStartStreamedChatResponseV2,\n    )\n    from .aws_client import AwsClient\n    from .batches import (\n        Batch,\n        BatchStatus,\n        CancelBatchResponse,\n        CreateBatchResponse,\n        GetBatchResponse,\n        ListBatchesResponse,\n    )\n    from .bedrock_client import BedrockClient, BedrockClientV2\n    from .client import AsyncClient, Client\n    from .client_v2 import AsyncClientV2, ClientV2\n    from .datasets import DatasetsCreateResponse, DatasetsGetResponse, DatasetsGetUsageResponse, DatasetsListResponse\n    from .embed_jobs import CreateEmbedJobRequestTruncate\n    from .environment import ClientEnvironment\n    from .oci_client import OciClient, OciClientV2\n    from .sagemaker_client import SagemakerClient, SagemakerClientV2\n    from .v2 import (\n        CitationEndV2ChatStreamResponse,\n        CitationStartV2ChatStreamResponse,\n        ContentDeltaV2ChatStreamResponse,\n        ContentEndV2ChatStreamResponse,\n        ContentStartV2ChatStreamResponse,\n        DebugV2ChatStreamResponse,\n        MessageEndV2ChatStreamResponse,\n        MessageStartV2ChatStreamResponse,\n        ToolCallDeltaV2ChatStreamResponse,\n        ToolCallEndV2ChatStreamResponse,\n        ToolCallStartV2ChatStreamResponse,\n        ToolPlanDeltaV2ChatStreamResponse,\n        V2ChatRequestDocumentsItem,\n        V2ChatRequestSafetyMode,\n        V2ChatRequestToolChoice,\n        V2ChatResponse,\n        V2ChatStreamRequestDocumentsItem,\n        V2ChatStreamRequestSafetyMode,\n        V2ChatStreamRequestToolChoice,\n        V2ChatStreamResponse,\n        V2EmbedRequestTruncate,\n        V2RerankResponse,\n        V2RerankResponseResultsItem,\n    )\n    from .version import __version__\n_dynamic_imports: typing.Dict[str, str] = {\n    \"ApiMeta\": \".types\",\n    \"ApiMetaApiVersion\": \".types\",\n    \"ApiMetaBilledUnits\": \".types\",\n    \"ApiMetaTokens\": \".types\",\n    \"AssistantChatMessageV2\": \".types\",\n    \"AssistantMessage\": \".types\",\n    \"AssistantMessageResponse\": \".types\",\n    \"AssistantMessageResponseContentItem\": \".types\",\n    \"AssistantMessageV2Content\": \".types\",\n    \"AssistantMessageV2ContentOneItem\": \".types\",\n    \"AsyncClient\": \".client\",\n    \"AsyncClientV2\": \".client_v2\",\n    \"AuthTokenType\": \".types\",\n    \"AwsClient\": \".aws_client\",\n    \"BadRequestError\": \".errors\",\n    \"Batch\": \".batches\",\n    \"BatchStatus\": \".batches\",\n    \"BedrockClient\": \".bedrock_client\",\n    \"BedrockClientV2\": \".bedrock_client\",\n    \"CancelBatchResponse\": \".batches\",\n    \"ChatCitation\": \".types\",\n    \"ChatCitationGenerationEvent\": \".types\",\n    \"ChatCitationType\": \".types\",\n    \"ChatConnector\": \".types\",\n    \"ChatContentDeltaEvent\": \".types\",\n    \"ChatContentDeltaEventDelta\": \".types\",\n    \"ChatContentDeltaEventDeltaMessage\": \".types\",\n    \"ChatContentDeltaEventDeltaMessageContent\": \".types\",\n    \"ChatContentEndEvent\": \".types\",\n    \"ChatContentStartEvent\": \".types\",\n    \"ChatContentStartEventDelta\": \".types\",\n    \"ChatContentStartEventDeltaMessage\": \".types\",\n    \"ChatContentStartEventDeltaMessageContent\": \".types\",\n    \"ChatContentStartEventDeltaMessageContentType\": \".types\",\n    \"ChatDataMetrics\": \".types\",\n    \"ChatDebugEvent\": \".types\",\n    \"ChatDocument\": \".types\",\n    \"ChatDocumentSource\": \".types\",\n    \"ChatFinishReason\": \".types\",\n    \"ChatMessage\": \".types\",\n    \"ChatMessageEndEvent\": \".types\",\n    \"ChatMessageEndEventDelta\": \".types\",\n    \"ChatMessageStartEvent\": \".types\",\n    \"ChatMessageStartEventDelta\": \".types\",\n    \"ChatMessageStartEventDeltaMessage\": \".types\",\n    \"ChatMessageV2\": \".types\",\n    \"ChatMessages\": \".types\",\n    \"ChatRequestCitationQuality\": \".types\",\n    \"ChatRequestPromptTruncation\": \".types\",\n    \"ChatRequestSafetyMode\": \".types\",\n    \"ChatResponse\": \".aliases\",\n    \"ChatSearchQueriesGenerationEvent\": \".types\",\n    \"ChatSearchQuery\": \".types\",\n    \"ChatSearchResult\": \".types\",\n    \"ChatSearchResultConnector\": \".types\",\n    \"ChatSearchResultsEvent\": \".types\",\n    \"ChatStreamEndEvent\": \".types\",\n    \"ChatStreamEndEventFinishReason\": \".types\",\n    \"ChatStreamEvent\": \".types\",\n    \"ChatStreamEventType\": \".types\",\n    \"ChatStreamRequestCitationQuality\": \".types\",\n    \"ChatStreamRequestPromptTruncation\": \".types\",\n    \"ChatStreamRequestSafetyMode\": \".types\",\n    \"ChatStreamStartEvent\": \".types\",\n    \"ChatTextContent\": \".types\",\n    \"ChatTextGenerationEvent\": \".types\",\n    \"ChatTextResponseFormat\": \".types\",\n    \"ChatTextResponseFormatV2\": \".types\",\n    \"ChatThinkingContent\": \".types\",\n    \"ChatToolCallDeltaEvent\": \".types\",\n    \"ChatToolCallDeltaEventDelta\": \".types\",\n    \"ChatToolCallDeltaEventDeltaMessage\": \".types\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCalls\": \".types\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCallsFunction\": \".types\",\n    \"ChatToolCallEndEvent\": \".types\",\n    \"ChatToolCallStartEvent\": \".types\",\n    \"ChatToolCallStartEventDelta\": \".types\",\n    \"ChatToolCallStartEventDeltaMessage\": \".types\",\n    \"ChatToolCallsChunkEvent\": \".types\",\n    \"ChatToolCallsGenerationEvent\": \".types\",\n    \"ChatToolMessage\": \".types\",\n    \"ChatToolPlanDeltaEvent\": \".types\",\n    \"ChatToolPlanDeltaEventDelta\": \".types\",\n    \"ChatToolPlanDeltaEventDeltaMessage\": \".types\",\n    \"ChatToolSource\": \".types\",\n    \"ChatbotMessage\": \".types\",\n    \"CheckApiKeyResponse\": \".types\",\n    \"Citation\": \".types\",\n    \"CitationEndEvent\": \".types\",\n    \"CitationEndV2ChatStreamResponse\": \".v2\",\n    \"CitationGenerationStreamedChatResponse\": \".types\",\n    \"CitationOptions\": \".types\",\n    \"CitationOptionsMode\": \".types\",\n    \"CitationStartEvent\": \".types\",\n    \"CitationStartEventDelta\": \".types\",\n    \"CitationStartEventDeltaMessage\": \".types\",\n    \"CitationStartV2ChatStreamResponse\": \".v2\",\n    \"CitationType\": \".types\",\n    \"ClassifyDataMetrics\": \".types\",\n    \"ClassifyExample\": \".types\",\n    \"ClassifyRequestTruncate\": \".types\",\n    \"ClassifyResponse\": \".types\",\n    \"ClassifyResponseClassificationsItem\": \".types\",\n    \"ClassifyResponseClassificationsItemClassificationType\": \".types\",\n    \"ClassifyResponseClassificationsItemLabelsValue\": \".types\",\n    \"Client\": \".client\",\n    \"ClientClosedRequestError\": \".errors\",\n    \"ClientEnvironment\": \".environment\",\n    \"ClientV2\": \".client_v2\",\n    \"CompatibleEndpoint\": \".types\",\n    \"Connector\": \".types\",\n    \"ConnectorAuthStatus\": \".types\",\n    \"ConnectorOAuth\": \".types\",\n    \"Content\": \".types\",\n    \"ContentDeltaStreamedChatResponseV2\": \".aliases\",\n    \"ContentDeltaV2ChatStreamResponse\": \".v2\",\n    \"ContentEndStreamedChatResponseV2\": \".aliases\",\n    \"ContentEndV2ChatStreamResponse\": \".v2\",\n    \"ContentStartStreamedChatResponseV2\": \".aliases\",\n    \"ContentStartV2ChatStreamResponse\": \".v2\",\n    \"CreateBatchResponse\": \".batches\",\n    \"CreateConnectorOAuth\": \".types\",\n    \"CreateConnectorResponse\": \".types\",\n    \"CreateConnectorServiceAuth\": \".types\",\n    \"CreateEmbedJobRequestTruncate\": \".embed_jobs\",\n    \"CreateEmbedJobResponse\": \".types\",\n    \"Dataset\": \".types\",\n    \"DatasetPart\": \".types\",\n    \"DatasetType\": \".types\",\n    \"DatasetValidationStatus\": \".types\",\n    \"DatasetsCreateResponse\": \".datasets\",\n    \"DatasetsGetResponse\": \".datasets\",\n    \"DatasetsGetUsageResponse\": \".datasets\",\n    \"DatasetsListResponse\": \".datasets\",\n    \"DebugStreamedChatResponse\": \".types\",\n    \"DebugV2ChatStreamResponse\": \".v2\",\n    \"DefaultAioHttpClient\": \"._default_clients\",\n    \"DefaultAsyncHttpxClient\": \"._default_clients\",\n    \"DeleteConnectorResponse\": \".types\",\n    \"DetokenizeResponse\": \".types\",\n    \"Document\": \".types\",\n    \"DocumentContent\": \".types\",\n    \"DocumentSource\": \".types\",\n    \"DocumentToolContent\": \".types\",\n    \"EmbedByTypeResponse\": \".types\",\n    \"EmbedByTypeResponseEmbeddings\": \".types\",\n    \"EmbedByTypeResponseResponseType\": \".types\",\n    \"EmbedContent\": \".types\",\n    \"EmbedFloatsResponse\": \".types\",\n    \"EmbedImage\": \".types\",\n    \"EmbedImageUrl\": \".types\",\n    \"EmbedInput\": \".types\",\n    \"EmbedInputType\": \".types\",\n    \"EmbedJob\": \".types\",\n    \"EmbedJobStatus\": \".types\",\n    \"EmbedJobTruncate\": \".types\",\n    \"EmbedRequestTruncate\": \".types\",\n    \"EmbedResponse\": \".types\",\n    \"EmbedText\": \".types\",\n    \"EmbeddingType\": \".types\",\n    \"EmbeddingsByTypeEmbedResponse\": \".types\",\n    \"EmbeddingsFloatsEmbedResponse\": \".types\",\n    \"FinetuneDatasetMetrics\": \".types\",\n    \"FinishReason\": \".types\",\n    \"ForbiddenError\": \".errors\",\n    \"GatewayTimeoutError\": \".errors\",\n    \"GenerateRequestReturnLikelihoods\": \".types\",\n    \"GenerateRequestTruncate\": \".types\",\n    \"GenerateStreamEnd\": \".types\",\n    \"GenerateStreamEndResponse\": \".types\",\n    \"GenerateStreamError\": \".types\",\n    \"GenerateStreamEvent\": \".types\",\n    \"GenerateStreamRequestReturnLikelihoods\": \".types\",\n    \"GenerateStreamRequestTruncate\": \".types\",\n    \"GenerateStreamText\": \".types\",\n    \"GenerateStreamedResponse\": \".types\",\n    \"Generation\": \".types\",\n    \"GetBatchResponse\": \".batches\",\n    \"GetConnectorResponse\": \".types\",\n    \"GetModelResponse\": \".types\",\n    \"GetModelResponseSamplingDefaults\": \".types\",\n    \"Image\": \".types\",\n    \"ImageContent\": \".types\",\n    \"ImageUrl\": \".types\",\n    \"ImageUrlContent\": \".types\",\n    \"ImageUrlDetail\": \".types\",\n    \"ImageUrlEmbedContent\": \".types\",\n    \"InternalServerError\": \".errors\",\n    \"InvalidTokenError\": \".errors\",\n    \"JsonObjectResponseFormat\": \".types\",\n    \"JsonObjectResponseFormatV2\": \".types\",\n    \"JsonResponseFormat\": \".types\",\n    \"JsonResponseFormatV2\": \".types\",\n    \"LabelMetric\": \".types\",\n    \"ListBatchesResponse\": \".batches\",\n    \"ListConnectorsResponse\": \".types\",\n    \"ListEmbedJobResponse\": \".types\",\n    \"ListModelsResponse\": \".types\",\n    \"LogprobItem\": \".types\",\n    \"Message\": \".types\",\n    \"MessageEndStreamedChatResponseV2\": \".aliases\",\n    \"MessageEndV2ChatStreamResponse\": \".v2\",\n    \"MessageStartStreamedChatResponseV2\": \".aliases\",\n    \"MessageStartV2ChatStreamResponse\": \".v2\",\n    \"Metrics\": \".types\",\n    \"NonStreamedChatResponse\": \".types\",\n    \"NotFoundError\": \".errors\",\n    \"NotImplementedError\": \".errors\",\n    \"OAuthAuthorizeResponse\": \".types\",\n    \"OciClient\": \".oci_client\",\n    \"OciClientV2\": \".oci_client\",\n    \"ParseInfo\": \".types\",\n    \"RerankDocument\": \".types\",\n    \"RerankRequestDocumentsItem\": \".types\",\n    \"RerankResponse\": \".types\",\n    \"RerankResponseResultsItem\": \".types\",\n    \"RerankResponseResultsItemDocument\": \".types\",\n    \"RerankerDataMetrics\": \".types\",\n    \"ResponseFormat\": \".types\",\n    \"ResponseFormatV2\": \".types\",\n    \"SagemakerClient\": \".sagemaker_client\",\n    \"SagemakerClientV2\": \".sagemaker_client\",\n    \"SearchQueriesGenerationStreamedChatResponse\": \".types\",\n    \"SearchResultsStreamedChatResponse\": \".types\",\n    \"ServiceUnavailableError\": \".errors\",\n    \"SingleGeneration\": \".types\",\n    \"SingleGenerationInStream\": \".types\",\n    \"SingleGenerationTokenLikelihoodsItem\": \".types\",\n    \"Source\": \".types\",\n    \"StreamEndGenerateStreamedResponse\": \".types\",\n    \"StreamEndStreamedChatResponse\": \".types\",\n    \"StreamErrorGenerateStreamedResponse\": \".types\",\n    \"StreamStartStreamedChatResponse\": \".types\",\n    \"StreamedChatResponse\": \".types\",\n    \"StreamedChatResponseV2\": \".aliases\",\n    \"SummarizeRequestExtractiveness\": \".types\",\n    \"SummarizeRequestFormat\": \".types\",\n    \"SummarizeRequestLength\": \".types\",\n    \"SummarizeResponse\": \".types\",\n    \"SystemChatMessageV2\": \".types\",\n    \"SystemMessage\": \".types\",\n    \"SystemMessageV2\": \".types\",\n    \"SystemMessageV2Content\": \".types\",\n    \"SystemMessageV2ContentOneItem\": \".types\",\n    \"TextAssistantMessageResponseContentItem\": \".types\",\n    \"TextAssistantMessageV2ContentOneItem\": \".types\",\n    \"TextContent\": \".types\",\n    \"TextEmbedContent\": \".types\",\n    \"TextGenerationGenerateStreamedResponse\": \".types\",\n    \"TextGenerationStreamedChatResponse\": \".types\",\n    \"TextResponseFormat\": \".types\",\n    \"TextResponseFormatV2\": \".types\",\n    \"TextSystemMessageV2ContentOneItem\": \".types\",\n    \"TextToolContent\": \".types\",\n    \"Thinking\": \".types\",\n    \"ThinkingAssistantMessageResponseContentItem\": \".types\",\n    \"ThinkingAssistantMessageV2ContentOneItem\": \".types\",\n    \"ThinkingType\": \".types\",\n    \"TokenizeResponse\": \".types\",\n    \"TooManyRequestsError\": \".errors\",\n    \"Tool\": \".types\",\n    \"ToolCall\": \".types\",\n    \"ToolCallDelta\": \".types\",\n    \"ToolCallDeltaStreamedChatResponseV2\": \".aliases\",\n    \"ToolCallDeltaV2ChatStreamResponse\": \".v2\",\n    \"ToolCallEndStreamedChatResponseV2\": \".aliases\",\n    \"ToolCallEndV2ChatStreamResponse\": \".v2\",\n    \"ToolCallStartStreamedChatResponseV2\": \".aliases\",\n    \"ToolCallStartV2ChatStreamResponse\": \".v2\",\n    \"ToolCallV2\": \".types\",\n    \"ToolCallV2Function\": \".types\",\n    \"ToolCallsChunkStreamedChatResponse\": \".types\",\n    \"ToolCallsGenerationStreamedChatResponse\": \".types\",\n    \"ToolChatMessageV2\": \".types\",\n    \"ToolContent\": \".types\",\n    \"ToolMessage\": \".types\",\n    \"ToolMessageV2\": \".types\",\n    \"ToolMessageV2Content\": \".types\",\n    \"ToolParameterDefinitionsValue\": \".types\",\n    \"ToolPlanDeltaV2ChatStreamResponse\": \".v2\",\n    \"ToolResult\": \".types\",\n    \"ToolSource\": \".types\",\n    \"ToolV2\": \".types\",\n    \"ToolV2Function\": \".types\",\n    \"UnauthorizedError\": \".errors\",\n    \"UnprocessableEntityError\": \".errors\",\n    \"UpdateConnectorResponse\": \".types\",\n    \"Usage\": \".types\",\n    \"UsageBilledUnits\": \".types\",\n    \"UsageTokens\": \".types\",\n    \"UserChatMessageV2\": \".types\",\n    \"UserMessage\": \".types\",\n    \"UserMessageV2\": \".types\",\n    \"UserMessageV2Content\": \".types\",\n    \"V2ChatRequestDocumentsItem\": \".v2\",\n    \"V2ChatRequestSafetyMode\": \".v2\",\n    \"V2ChatRequestToolChoice\": \".v2\",\n    \"V2ChatResponse\": \".v2\",\n    \"V2ChatStreamRequestDocumentsItem\": \".v2\",\n    \"V2ChatStreamRequestSafetyMode\": \".v2\",\n    \"V2ChatStreamRequestToolChoice\": \".v2\",\n    \"V2ChatStreamResponse\": \".v2\",\n    \"V2EmbedRequestTruncate\": \".v2\",\n    \"V2RerankResponse\": \".v2\",\n    \"V2RerankResponseResultsItem\": \".v2\",\n    \"__version__\": \".version\",\n    \"audio\": \".audio\",\n    \"batches\": \".batches\",\n    \"connectors\": \".connectors\",\n    \"datasets\": \".datasets\",\n    \"embed_jobs\": \".embed_jobs\",\n    \"finetuning\": \".finetuning\",\n    \"models\": \".models\",\n    \"v2\": \".v2\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"ApiMeta\",\n    \"ApiMetaApiVersion\",\n    \"ApiMetaBilledUnits\",\n    \"ApiMetaTokens\",\n    \"AssistantChatMessageV2\",\n    \"AssistantMessage\",\n    \"AssistantMessageResponse\",\n    \"AssistantMessageResponseContentItem\",\n    \"AssistantMessageV2Content\",\n    \"AssistantMessageV2ContentOneItem\",\n    \"AsyncClient\",\n    \"AsyncClientV2\",\n    \"AuthTokenType\",\n    \"AwsClient\",\n    \"BadRequestError\",\n    \"Batch\",\n    \"BatchStatus\",\n    \"BedrockClient\",\n    \"BedrockClientV2\",\n    \"CancelBatchResponse\",\n    \"ChatCitation\",\n    \"ChatCitationGenerationEvent\",\n    \"ChatCitationType\",\n    \"ChatConnector\",\n    \"ChatContentDeltaEvent\",\n    \"ChatContentDeltaEventDelta\",\n    \"ChatContentDeltaEventDeltaMessage\",\n    \"ChatContentDeltaEventDeltaMessageContent\",\n    \"ChatContentEndEvent\",\n    \"ChatContentStartEvent\",\n    \"ChatContentStartEventDelta\",\n    \"ChatContentStartEventDeltaMessage\",\n    \"ChatContentStartEventDeltaMessageContent\",\n    \"ChatContentStartEventDeltaMessageContentType\",\n    \"ChatDataMetrics\",\n    \"ChatDebugEvent\",\n    \"ChatDocument\",\n    \"ChatDocumentSource\",\n    \"ChatFinishReason\",\n    \"ChatMessage\",\n    \"ChatMessageEndEvent\",\n    \"ChatMessageEndEventDelta\",\n    \"ChatMessageStartEvent\",\n    \"ChatMessageStartEventDelta\",\n    \"ChatMessageStartEventDeltaMessage\",\n    \"ChatMessageV2\",\n    \"ChatMessages\",\n    \"ChatRequestCitationQuality\",\n    \"ChatRequestPromptTruncation\",\n    \"ChatRequestSafetyMode\",\n    \"ChatResponse\",\n    \"ChatSearchQueriesGenerationEvent\",\n    \"ChatSearchQuery\",\n    \"ChatSearchResult\",\n    \"ChatSearchResultConnector\",\n    \"ChatSearchResultsEvent\",\n    \"ChatStreamEndEvent\",\n    \"ChatStreamEndEventFinishReason\",\n    \"ChatStreamEvent\",\n    \"ChatStreamEventType\",\n    \"ChatStreamRequestCitationQuality\",\n    \"ChatStreamRequestPromptTruncation\",\n    \"ChatStreamRequestSafetyMode\",\n    \"ChatStreamStartEvent\",\n    \"ChatTextContent\",\n    \"ChatTextGenerationEvent\",\n    \"ChatTextResponseFormat\",\n    \"ChatTextResponseFormatV2\",\n    \"ChatThinkingContent\",\n    \"ChatToolCallDeltaEvent\",\n    \"ChatToolCallDeltaEventDelta\",\n    \"ChatToolCallDeltaEventDeltaMessage\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCalls\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCallsFunction\",\n    \"ChatToolCallEndEvent\",\n    \"ChatToolCallStartEvent\",\n    \"ChatToolCallStartEventDelta\",\n    \"ChatToolCallStartEventDeltaMessage\",\n    \"ChatToolCallsChunkEvent\",\n    \"ChatToolCallsGenerationEvent\",\n    \"ChatToolMessage\",\n    \"ChatToolPlanDeltaEvent\",\n    \"ChatToolPlanDeltaEventDelta\",\n    \"ChatToolPlanDeltaEventDeltaMessage\",\n    \"ChatToolSource\",\n    \"ChatbotMessage\",\n    \"CheckApiKeyResponse\",\n    \"Citation\",\n    \"CitationEndEvent\",\n    \"CitationEndV2ChatStreamResponse\",\n    \"CitationGenerationStreamedChatResponse\",\n    \"CitationOptions\",\n    \"CitationOptionsMode\",\n    \"CitationStartEvent\",\n    \"CitationStartEventDelta\",\n    \"CitationStartEventDeltaMessage\",\n    \"CitationStartV2ChatStreamResponse\",\n    \"CitationType\",\n    \"ClassifyDataMetrics\",\n    \"ClassifyExample\",\n    \"ClassifyRequestTruncate\",\n    \"ClassifyResponse\",\n    \"ClassifyResponseClassificationsItem\",\n    \"ClassifyResponseClassificationsItemClassificationType\",\n    \"ClassifyResponseClassificationsItemLabelsValue\",\n    \"Client\",\n    \"ClientClosedRequestError\",\n    \"ClientEnvironment\",\n    \"ClientV2\",\n    \"CompatibleEndpoint\",\n    \"Connector\",\n    \"ConnectorAuthStatus\",\n    \"ConnectorOAuth\",\n    \"Content\",\n    \"ContentDeltaStreamedChatResponseV2\",\n    \"ContentDeltaV2ChatStreamResponse\",\n    \"ContentEndStreamedChatResponseV2\",\n    \"ContentEndV2ChatStreamResponse\",\n    \"ContentStartStreamedChatResponseV2\",\n    \"ContentStartV2ChatStreamResponse\",\n    \"CreateBatchResponse\",\n    \"CreateConnectorOAuth\",\n    \"CreateConnectorResponse\",\n    \"CreateConnectorServiceAuth\",\n    \"CreateEmbedJobRequestTruncate\",\n    \"CreateEmbedJobResponse\",\n    \"Dataset\",\n    \"DatasetPart\",\n    \"DatasetType\",\n    \"DatasetValidationStatus\",\n    \"DatasetsCreateResponse\",\n    \"DatasetsGetResponse\",\n    \"DatasetsGetUsageResponse\",\n    \"DatasetsListResponse\",\n    \"DebugStreamedChatResponse\",\n    \"DebugV2ChatStreamResponse\",\n    \"DefaultAioHttpClient\",\n    \"DefaultAsyncHttpxClient\",\n    \"DeleteConnectorResponse\",\n    \"DetokenizeResponse\",\n    \"Document\",\n    \"DocumentContent\",\n    \"DocumentSource\",\n    \"DocumentToolContent\",\n    \"EmbedByTypeResponse\",\n    \"EmbedByTypeResponseEmbeddings\",\n    \"EmbedByTypeResponseResponseType\",\n    \"EmbedContent\",\n    \"EmbedFloatsResponse\",\n    \"EmbedImage\",\n    \"EmbedImageUrl\",\n    \"EmbedInput\",\n    \"EmbedInputType\",\n    \"EmbedJob\",\n    \"EmbedJobStatus\",\n    \"EmbedJobTruncate\",\n    \"EmbedRequestTruncate\",\n    \"EmbedResponse\",\n    \"EmbedText\",\n    \"EmbeddingType\",\n    \"EmbeddingsByTypeEmbedResponse\",\n    \"EmbeddingsFloatsEmbedResponse\",\n    \"FinetuneDatasetMetrics\",\n    \"FinishReason\",\n    \"ForbiddenError\",\n    \"GatewayTimeoutError\",\n    \"GenerateRequestReturnLikelihoods\",\n    \"GenerateRequestTruncate\",\n    \"GenerateStreamEnd\",\n    \"GenerateStreamEndResponse\",\n    \"GenerateStreamError\",\n    \"GenerateStreamEvent\",\n    \"GenerateStreamRequestReturnLikelihoods\",\n    \"GenerateStreamRequestTruncate\",\n    \"GenerateStreamText\",\n    \"GenerateStreamedResponse\",\n    \"Generation\",\n    \"GetBatchResponse\",\n    \"GetConnectorResponse\",\n    \"GetModelResponse\",\n    \"GetModelResponseSamplingDefaults\",\n    \"Image\",\n    \"ImageContent\",\n    \"ImageUrl\",\n    \"ImageUrlContent\",\n    \"ImageUrlDetail\",\n    \"ImageUrlEmbedContent\",\n    \"InternalServerError\",\n    \"InvalidTokenError\",\n    \"JsonObjectResponseFormat\",\n    \"JsonObjectResponseFormatV2\",\n    \"JsonResponseFormat\",\n    \"JsonResponseFormatV2\",\n    \"LabelMetric\",\n    \"ListBatchesResponse\",\n    \"ListConnectorsResponse\",\n    \"ListEmbedJobResponse\",\n    \"ListModelsResponse\",\n    \"LogprobItem\",\n    \"Message\",\n    \"MessageEndStreamedChatResponseV2\",\n    \"MessageEndV2ChatStreamResponse\",\n    \"MessageStartStreamedChatResponseV2\",\n    \"MessageStartV2ChatStreamResponse\",\n    \"Metrics\",\n    \"NonStreamedChatResponse\",\n    \"NotFoundError\",\n    \"NotImplementedError\",\n    \"OAuthAuthorizeResponse\",\n    \"OciClient\",\n    \"OciClientV2\",\n    \"ParseInfo\",\n    \"RerankDocument\",\n    \"RerankRequestDocumentsItem\",\n    \"RerankResponse\",\n    \"RerankResponseResultsItem\",\n    \"RerankResponseResultsItemDocument\",\n    \"RerankerDataMetrics\",\n    \"ResponseFormat\",\n    \"ResponseFormatV2\",\n    \"SagemakerClient\",\n    \"SagemakerClientV2\",\n    \"SearchQueriesGenerationStreamedChatResponse\",\n    \"SearchResultsStreamedChatResponse\",\n    \"ServiceUnavailableError\",\n    \"SingleGeneration\",\n    \"SingleGenerationInStream\",\n    \"SingleGenerationTokenLikelihoodsItem\",\n    \"Source\",\n    \"StreamEndGenerateStreamedResponse\",\n    \"StreamEndStreamedChatResponse\",\n    \"StreamErrorGenerateStreamedResponse\",\n    \"StreamStartStreamedChatResponse\",\n    \"StreamedChatResponse\",\n    \"StreamedChatResponseV2\",\n    \"SummarizeRequestExtractiveness\",\n    \"SummarizeRequestFormat\",\n    \"SummarizeRequestLength\",\n    \"SummarizeResponse\",\n    \"SystemChatMessageV2\",\n    \"SystemMessage\",\n    \"SystemMessageV2\",\n    \"SystemMessageV2Content\",\n    \"SystemMessageV2ContentOneItem\",\n    \"TextAssistantMessageResponseContentItem\",\n    \"TextAssistantMessageV2ContentOneItem\",\n    \"TextContent\",\n    \"TextEmbedContent\",\n    \"TextGenerationGenerateStreamedResponse\",\n    \"TextGenerationStreamedChatResponse\",\n    \"TextResponseFormat\",\n    \"TextResponseFormatV2\",\n    \"TextSystemMessageV2ContentOneItem\",\n    \"TextToolContent\",\n    \"Thinking\",\n    \"ThinkingAssistantMessageResponseContentItem\",\n    \"ThinkingAssistantMessageV2ContentOneItem\",\n    \"ThinkingType\",\n    \"TokenizeResponse\",\n    \"TooManyRequestsError\",\n    \"Tool\",\n    \"ToolCall\",\n    \"ToolCallDelta\",\n    \"ToolCallDeltaStreamedChatResponseV2\",\n    \"ToolCallDeltaV2ChatStreamResponse\",\n    \"ToolCallEndStreamedChatResponseV2\",\n    \"ToolCallEndV2ChatStreamResponse\",\n    \"ToolCallStartStreamedChatResponseV2\",\n    \"ToolCallStartV2ChatStreamResponse\",\n    \"ToolCallV2\",\n    \"ToolCallV2Function\",\n    \"ToolCallsChunkStreamedChatResponse\",\n    \"ToolCallsGenerationStreamedChatResponse\",\n    \"ToolChatMessageV2\",\n    \"ToolContent\",\n    \"ToolMessage\",\n    \"ToolMessageV2\",\n    \"ToolMessageV2Content\",\n    \"ToolParameterDefinitionsValue\",\n    \"ToolPlanDeltaV2ChatStreamResponse\",\n    \"ToolResult\",\n    \"ToolSource\",\n    \"ToolV2\",\n    \"ToolV2Function\",\n    \"UnauthorizedError\",\n    \"UnprocessableEntityError\",\n    \"UpdateConnectorResponse\",\n    \"Usage\",\n    \"UsageBilledUnits\",\n    \"UsageTokens\",\n    \"UserChatMessageV2\",\n    \"UserMessage\",\n    \"UserMessageV2\",\n    \"UserMessageV2Content\",\n    \"V2ChatRequestDocumentsItem\",\n    \"V2ChatRequestSafetyMode\",\n    \"V2ChatRequestToolChoice\",\n    \"V2ChatResponse\",\n    \"V2ChatStreamRequestDocumentsItem\",\n    \"V2ChatStreamRequestSafetyMode\",\n    \"V2ChatStreamRequestToolChoice\",\n    \"V2ChatStreamResponse\",\n    \"V2EmbedRequestTruncate\",\n    \"V2RerankResponse\",\n    \"V2RerankResponseResultsItem\",\n    \"__version__\",\n    \"audio\",\n    \"batches\",\n    \"connectors\",\n    \"datasets\",\n    \"embed_jobs\",\n    \"finetuning\",\n    \"models\",\n    \"v2\",\n]\n"
  },
  {
    "path": "src/cohere/_default_clients.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport httpx\n\nSDK_DEFAULT_TIMEOUT = 60\n\ntry:\n    import httpx_aiohttp  # type: ignore[import-not-found]\nexcept ImportError:\n\n    class DefaultAioHttpClient(httpx.AsyncClient):  # type: ignore\n        def __init__(self, **kwargs: typing.Any) -> None:\n            raise RuntimeError(\"To use the aiohttp client, install the aiohttp extra: pip install cohere[aiohttp]\")\n\nelse:\n\n    class DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore\n        def __init__(self, **kwargs: typing.Any) -> None:\n            kwargs.setdefault(\"timeout\", SDK_DEFAULT_TIMEOUT)\n            kwargs.setdefault(\"follow_redirects\", True)\n            super().__init__(**kwargs)\n\n\nclass DefaultAsyncHttpxClient(httpx.AsyncClient):\n    def __init__(self, **kwargs: typing.Any) -> None:\n        kwargs.setdefault(\"timeout\", SDK_DEFAULT_TIMEOUT)\n        kwargs.setdefault(\"follow_redirects\", True)\n        super().__init__(**kwargs)\n"
  },
  {
    "path": "src/cohere/aliases.py",
    "content": "# Import overrides early to ensure they're applied before types are used\n# This is necessary for backwards compatibility patches like ToolCallV2.id being optional\nfrom . import overrides  # noqa: F401\n\nfrom .v2 import (\n    ContentDeltaV2ChatStreamResponse,\n    ContentEndV2ChatStreamResponse,\n    ContentStartV2ChatStreamResponse,\n    MessageEndV2ChatStreamResponse,\n    MessageStartV2ChatStreamResponse,\n    ToolCallDeltaV2ChatStreamResponse,\n    ToolCallEndV2ChatStreamResponse,\n    ToolCallStartV2ChatStreamResponse,\n    V2ChatStreamResponse,\n    V2ChatResponse\n)\n\n# alias classes\nStreamedChatResponseV2 = V2ChatStreamResponse\nMessageStartStreamedChatResponseV2 = MessageStartV2ChatStreamResponse\nMessageEndStreamedChatResponseV2 = MessageEndV2ChatStreamResponse\nContentStartStreamedChatResponseV2 = ContentStartV2ChatStreamResponse\nContentDeltaStreamedChatResponseV2 = ContentDeltaV2ChatStreamResponse\nContentEndStreamedChatResponseV2 = ContentEndV2ChatStreamResponse\nToolCallStartStreamedChatResponseV2 = ToolCallStartV2ChatStreamResponse\nToolCallDeltaStreamedChatResponseV2 = ToolCallDeltaV2ChatStreamResponse\nToolCallEndStreamedChatResponseV2 = ToolCallEndV2ChatStreamResponse\nChatResponse = V2ChatResponse\n"
  },
  {
    "path": "src/cohere/audio/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from . import transcriptions\n    from .transcriptions import AudioTranscriptionsCreateResponse\n_dynamic_imports: typing.Dict[str, str] = {\n    \"AudioTranscriptionsCreateResponse\": \".transcriptions\",\n    \"transcriptions\": \".transcriptions\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"AudioTranscriptionsCreateResponse\", \"transcriptions\"]\n"
  },
  {
    "path": "src/cohere/audio/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom .raw_client import AsyncRawAudioClient, RawAudioClient\n\nif typing.TYPE_CHECKING:\n    from .transcriptions.client import AsyncTranscriptionsClient, TranscriptionsClient\n\n\nclass AudioClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawAudioClient(client_wrapper=client_wrapper)\n        self._client_wrapper = client_wrapper\n        self._transcriptions: typing.Optional[TranscriptionsClient] = None\n\n    @property\n    def with_raw_response(self) -> RawAudioClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawAudioClient\n        \"\"\"\n        return self._raw_client\n\n    @property\n    def transcriptions(self):\n        if self._transcriptions is None:\n            from .transcriptions.client import TranscriptionsClient  # noqa: E402\n\n            self._transcriptions = TranscriptionsClient(client_wrapper=self._client_wrapper)\n        return self._transcriptions\n\n\nclass AsyncAudioClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawAudioClient(client_wrapper=client_wrapper)\n        self._client_wrapper = client_wrapper\n        self._transcriptions: typing.Optional[AsyncTranscriptionsClient] = None\n\n    @property\n    def with_raw_response(self) -> AsyncRawAudioClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawAudioClient\n        \"\"\"\n        return self._raw_client\n\n    @property\n    def transcriptions(self):\n        if self._transcriptions is None:\n            from .transcriptions.client import AsyncTranscriptionsClient  # noqa: E402\n\n            self._transcriptions = AsyncTranscriptionsClient(client_wrapper=self._client_wrapper)\n        return self._transcriptions\n"
  },
  {
    "path": "src/cohere/audio/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\n\n\nclass RawAudioClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n\nclass AsyncRawAudioClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n"
  },
  {
    "path": "src/cohere/audio/transcriptions/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import AudioTranscriptionsCreateResponse\n_dynamic_imports: typing.Dict[str, str] = {\"AudioTranscriptionsCreateResponse\": \".types\"}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"AudioTranscriptionsCreateResponse\"]\n"
  },
  {
    "path": "src/cohere/audio/transcriptions/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ... import core\nfrom ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ...core.request_options import RequestOptions\nfrom .raw_client import AsyncRawTranscriptionsClient, RawTranscriptionsClient\nfrom .types.audio_transcriptions_create_response import AudioTranscriptionsCreateResponse\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass TranscriptionsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawTranscriptionsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawTranscriptionsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawTranscriptionsClient\n        \"\"\"\n        return self._raw_client\n\n    def create(\n        self,\n        *,\n        model: str,\n        language: str,\n        file: core.File,\n        temperature: typing.Optional[float] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AudioTranscriptionsCreateResponse:\n        \"\"\"\n        Transcribe an audio file.\n\n        Parameters\n        ----------\n        model : str\n            ID of the model to use.\n\n        language : str\n            The language of the input audio, supplied in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format.\n\n        file : core.File\n            See core.File for more documentation\n\n        temperature : typing.Optional[float]\n            The sampling temperature, between 0 and 1. Higher values like 0.8 make the output more random, while lower values like 0.2 make it more focused and deterministic.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AudioTranscriptionsCreateResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.audio.transcriptions.create(\n            model=\"model\",\n            language=\"language\",\n        )\n        \"\"\"\n        _response = self._raw_client.create(\n            model=model, language=language, file=file, temperature=temperature, request_options=request_options\n        )\n        return _response.data\n\n\nclass AsyncTranscriptionsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawTranscriptionsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawTranscriptionsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawTranscriptionsClient\n        \"\"\"\n        return self._raw_client\n\n    async def create(\n        self,\n        *,\n        model: str,\n        language: str,\n        file: core.File,\n        temperature: typing.Optional[float] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AudioTranscriptionsCreateResponse:\n        \"\"\"\n        Transcribe an audio file.\n\n        Parameters\n        ----------\n        model : str\n            ID of the model to use.\n\n        language : str\n            The language of the input audio, supplied in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format.\n\n        file : core.File\n            See core.File for more documentation\n\n        temperature : typing.Optional[float]\n            The sampling temperature, between 0 and 1. Higher values like 0.8 make the output more random, while lower values like 0.2 make it more focused and deterministic.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AudioTranscriptionsCreateResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.audio.transcriptions.create(\n                model=\"model\",\n                language=\"language\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create(\n            model=model, language=language, file=file, temperature=temperature, request_options=request_options\n        )\n        return _response.data\n"
  },
  {
    "path": "src/cohere/audio/transcriptions/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ... import core\nfrom ...core.api_error import ApiError\nfrom ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ...core.http_response import AsyncHttpResponse, HttpResponse\nfrom ...core.parse_error import ParsingError\nfrom ...core.request_options import RequestOptions\nfrom ...core.unchecked_base_model import construct_type\nfrom ...errors.bad_request_error import BadRequestError\nfrom ...errors.client_closed_request_error import ClientClosedRequestError\nfrom ...errors.forbidden_error import ForbiddenError\nfrom ...errors.gateway_timeout_error import GatewayTimeoutError\nfrom ...errors.internal_server_error import InternalServerError\nfrom ...errors.invalid_token_error import InvalidTokenError\nfrom ...errors.not_found_error import NotFoundError\nfrom ...errors.not_implemented_error import NotImplementedError\nfrom ...errors.service_unavailable_error import ServiceUnavailableError\nfrom ...errors.too_many_requests_error import TooManyRequestsError\nfrom ...errors.unauthorized_error import UnauthorizedError\nfrom ...errors.unprocessable_entity_error import UnprocessableEntityError\nfrom .types.audio_transcriptions_create_response import AudioTranscriptionsCreateResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawTranscriptionsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def create(\n        self,\n        *,\n        model: str,\n        language: str,\n        file: core.File,\n        temperature: typing.Optional[float] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[AudioTranscriptionsCreateResponse]:\n        \"\"\"\n        Transcribe an audio file.\n\n        Parameters\n        ----------\n        model : str\n            ID of the model to use.\n\n        language : str\n            The language of the input audio, supplied in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format.\n\n        file : core.File\n            See core.File for more documentation\n\n        temperature : typing.Optional[float]\n            The sampling temperature, between 0 and 1. Higher values like 0.8 make the output more random, while lower values like 0.2 make it more focused and deterministic.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[AudioTranscriptionsCreateResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/audio/transcriptions\",\n            method=\"POST\",\n            data={\n                \"model\": model,\n                \"language\": language,\n                \"temperature\": temperature,\n            },\n            files={\n                \"file\": file,\n            },\n            request_options=request_options,\n            omit=OMIT,\n            force_multipart=True,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    AudioTranscriptionsCreateResponse,\n                    construct_type(\n                        type_=AudioTranscriptionsCreateResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawTranscriptionsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def create(\n        self,\n        *,\n        model: str,\n        language: str,\n        file: core.File,\n        temperature: typing.Optional[float] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[AudioTranscriptionsCreateResponse]:\n        \"\"\"\n        Transcribe an audio file.\n\n        Parameters\n        ----------\n        model : str\n            ID of the model to use.\n\n        language : str\n            The language of the input audio, supplied in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) format.\n\n        file : core.File\n            See core.File for more documentation\n\n        temperature : typing.Optional[float]\n            The sampling temperature, between 0 and 1. Higher values like 0.8 make the output more random, while lower values like 0.2 make it more focused and deterministic.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[AudioTranscriptionsCreateResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/audio/transcriptions\",\n            method=\"POST\",\n            data={\n                \"model\": model,\n                \"language\": language,\n                \"temperature\": temperature,\n            },\n            files={\n                \"file\": file,\n            },\n            request_options=request_options,\n            omit=OMIT,\n            force_multipart=True,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    AudioTranscriptionsCreateResponse,\n                    construct_type(\n                        type_=AudioTranscriptionsCreateResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/audio/transcriptions/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .audio_transcriptions_create_response import AudioTranscriptionsCreateResponse\n_dynamic_imports: typing.Dict[str, str] = {\"AudioTranscriptionsCreateResponse\": \".audio_transcriptions_create_response\"}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"AudioTranscriptionsCreateResponse\"]\n"
  },
  {
    "path": "src/cohere/audio/transcriptions/types/audio_transcriptions_create_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\n\n\nclass AudioTranscriptionsCreateResponse(UncheckedBaseModel):\n    text: str = pydantic.Field()\n    \"\"\"\n    The transcribed text.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/aws_client.py",
    "content": "import base64\nimport json\nimport re\nimport typing\n\nimport httpx\nfrom httpx import URL, SyncByteStream, ByteStream\n\nfrom . import GenerateStreamedResponse, Generation, \\\n    NonStreamedChatResponse, EmbedResponse, StreamedChatResponse, RerankResponse, ApiMeta, ApiMetaTokens, \\\n    ApiMetaBilledUnits\nfrom .client import Client, ClientEnvironment\nfrom .core import construct_type\nfrom .manually_maintained.lazy_aws_deps import lazy_boto3, lazy_botocore\nfrom .client_v2 import ClientV2\n\nclass AwsClient(Client):\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n            service: typing.Union[typing.Literal[\"bedrock\"], typing.Literal[\"sagemaker\"]],\n    ):\n        Client.__init__(\n            self,\n            base_url=\"https://api.cohere.com\",  # this url is unused for BedrockClient\n            environment=ClientEnvironment.PRODUCTION,\n            client_name=\"n/a\",\n            timeout=timeout,\n            api_key=\"n/a\",\n            httpx_client=httpx.Client(\n                event_hooks=get_event_hooks(\n                    service=service,\n                    aws_access_key=aws_access_key,\n                    aws_secret_key=aws_secret_key,\n                    aws_session_token=aws_session_token,\n                    aws_region=aws_region,\n                ),\n                timeout=timeout,\n            ),\n        )\n\n\nclass AwsClientV2(ClientV2):\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n            service: typing.Union[typing.Literal[\"bedrock\"], typing.Literal[\"sagemaker\"]],\n    ):\n        ClientV2.__init__(\n            self,\n            base_url=\"https://api.cohere.com\",  # this url is unused for BedrockClient\n            environment=ClientEnvironment.PRODUCTION,\n            client_name=\"n/a\",\n            timeout=timeout,\n            api_key=\"n/a\",\n            httpx_client=httpx.Client(\n                event_hooks=get_event_hooks(\n                    service=service,\n                    aws_access_key=aws_access_key,\n                    aws_secret_key=aws_secret_key,\n                    aws_session_token=aws_session_token,\n                    aws_region=aws_region,\n                ),\n                timeout=timeout,\n            ),\n        )\n\n\nEventHook = typing.Callable[..., typing.Any]\n\n\ndef get_event_hooks(\n        service: str,\n        aws_access_key: typing.Optional[str] = None,\n        aws_secret_key: typing.Optional[str] = None,\n        aws_session_token: typing.Optional[str] = None,\n        aws_region: typing.Optional[str] = None,\n) -> typing.Dict[str, typing.List[EventHook]]:\n    return {\n        \"request\": [\n            map_request_to_bedrock(\n                service=service,\n                aws_access_key=aws_access_key,\n                aws_secret_key=aws_secret_key,\n                aws_session_token=aws_session_token,\n                aws_region=aws_region,\n            ),\n        ],\n        \"response\": [\n            map_response_from_bedrock()\n        ],\n    }\n\n\nTextGeneration = typing.TypedDict('TextGeneration',\n                                  {\"text\": str, \"is_finished\": str, \"event_type\": typing.Literal[\"text-generation\"]})\nStreamEnd = typing.TypedDict('StreamEnd',\n                             {\"is_finished\": str, \"event_type\": typing.Literal[\"stream-end\"], \"finish_reason\": str,\n                              # \"amazon-bedrock-invocationMetrics\": {\n                              #     \"inputTokenCount\": int, \"outputTokenCount\": int, \"invocationLatency\": int,\n                              #     \"firstByteLatency\": int}\n                              })\n\n\nclass Streamer(SyncByteStream):\n    lines: typing.Iterator[bytes]\n\n    def __init__(self, lines: typing.Iterator[bytes]):\n        self.lines = lines\n\n    def __iter__(self) -> typing.Iterator[bytes]:\n        return self.lines\n\n\nresponse_mapping: typing.Dict[str, typing.Any] = {\n    \"chat\": NonStreamedChatResponse,\n    \"embed\": EmbedResponse,\n    \"generate\": Generation,\n    \"rerank\": RerankResponse\n}\n\nstream_response_mapping: typing.Dict[str, typing.Any] = {\n    \"chat\": StreamedChatResponse,\n    \"generate\": GenerateStreamedResponse,\n}\n\n\ndef stream_generator(response: httpx.Response, endpoint: str) -> typing.Iterator[bytes]:\n    regex = r\"{[^\\}]*}\"\n\n    for _text in response.iter_lines():\n        match = re.search(regex, _text)\n        if match:\n            obj = json.loads(match.group())\n            if \"bytes\" in obj:\n                base64_payload = base64.b64decode(obj[\"bytes\"]).decode(\"utf-8\")\n                streamed_obj = json.loads(base64_payload)\n                if \"event_type\" in streamed_obj:\n                    response_type = stream_response_mapping[endpoint]\n                    parsed = typing.cast(response_type,  # type: ignore\n                                         construct_type(type_=response_type, object_=streamed_obj))\n                    yield (json.dumps(parsed.dict()) + \"\\n\").encode(\"utf-8\")  # type: ignore\n\n\ndef map_token_counts(response: httpx.Response) -> ApiMeta:\n    input_tokens = int(response.headers.get(\"X-Amzn-Bedrock-Input-Token-Count\", -1))\n    output_tokens = int(response.headers.get(\"X-Amzn-Bedrock-Output-Token-Count\", -1))\n    return ApiMeta(\n        tokens=ApiMetaTokens(input_tokens=input_tokens, output_tokens=output_tokens),\n        billed_units=ApiMetaBilledUnits(input_tokens=input_tokens, output_tokens=output_tokens),\n    )\n\n\ndef map_response_from_bedrock():\n    def _hook(\n            response: httpx.Response,\n    ) -> None:\n        stream = response.headers[\"content-type\"] == \"application/vnd.amazon.eventstream\"\n        endpoint = response.request.extensions[\"endpoint\"]\n        output: typing.Iterator[bytes]\n\n        if stream:\n            output = stream_generator(httpx.Response(\n                stream=response.stream,\n                status_code=response.status_code,\n            ), endpoint)\n        else:\n            response_type = response_mapping[endpoint]\n            response_obj = json.loads(response.read())\n            response_obj[\"meta\"] = map_token_counts(response).dict()\n            cast_obj: typing.Any = typing.cast(response_type,  # type: ignore\n                                   construct_type(\n                                       type_=response_type,\n                                       # type: ignore\n                                       object_=response_obj))\n\n            output = iter([json.dumps(cast_obj.dict()).encode(\"utf-8\")])\n\n        response.stream = Streamer(output)\n        \n        # reset response object to allow for re-reading\n        if hasattr(response, \"_content\"):\n            del response._content\n        response.is_stream_consumed = False\n        response.is_closed = False\n\n    return _hook\n\ndef get_boto3_session(\n    **kwargs: typing.Any,  \n):\n    non_none_args = {k: v for k, v in kwargs.items() if v is not None}\n    return lazy_boto3().Session(**non_none_args)\n\n\n\ndef map_request_to_bedrock(\n        service: str,\n        aws_access_key: typing.Optional[str] = None,\n        aws_secret_key: typing.Optional[str] = None,\n        aws_session_token: typing.Optional[str] = None,\n        aws_region: typing.Optional[str] = None,\n) -> EventHook:\n    session = get_boto3_session(\n        region_name=aws_region,\n        aws_access_key_id=aws_access_key,\n        aws_secret_access_key=aws_secret_key,\n        aws_session_token=aws_session_token,\n    )\n    aws_region = session.region_name\n    credentials = session.get_credentials()\n    signer = lazy_botocore().auth.SigV4Auth(credentials, service, aws_region)\n\n    def _event_hook(request: httpx.Request) -> None:\n        headers = request.headers.copy()\n        del headers[\"connection\"]\n\n\n        api_version = request.url.path.split(\"/\")[-2]\n        endpoint = request.url.path.split(\"/\")[-1]\n        body = json.loads(request.read())\n        model = body[\"model\"]\n\n        url = get_url(\n            platform=service,\n            aws_region=aws_region,\n            model=model,  # type: ignore\n            stream=\"stream\" in body and body[\"stream\"],\n        )\n        request.url = URL(url)\n        request.headers[\"host\"] = request.url.host\n        headers[\"host\"] = request.url.host\n\n        if endpoint == \"rerank\":\n            body[\"api_version\"] = get_api_version(version=api_version)\n\n        if \"stream\" in body:\n            del body[\"stream\"]\n\n        if \"model\" in body:\n            del body[\"model\"]\n\n        new_body = json.dumps(body).encode(\"utf-8\")\n        request.stream = ByteStream(new_body)\n        request._content = new_body\n        headers[\"content-length\"] = str(len(new_body))\n\n        aws_request = lazy_botocore().awsrequest.AWSRequest(\n            method=request.method,\n            url=url,\n            headers=headers,\n            data=request.read(),\n        )\n        signer.add_auth(aws_request)\n\n        request.headers = httpx.Headers(aws_request.prepare().headers)\n        request.extensions[\"endpoint\"] = endpoint\n\n    return _event_hook\n\n\ndef get_url(\n        *,\n        platform: str,\n        aws_region: typing.Optional[str],\n        model: str,\n        stream: bool,\n) -> str:\n    if platform == \"bedrock\":\n        endpoint = \"invoke\" if not stream else \"invoke-with-response-stream\"\n        return f\"https://{platform}-runtime.{aws_region}.amazonaws.com/model/{model}/{endpoint}\"\n    elif platform == \"sagemaker\":\n        endpoint = \"invocations\" if not stream else \"invocations-response-stream\"\n        return f\"https://runtime.sagemaker.{aws_region}.amazonaws.com/endpoints/{model}/{endpoint}\"\n    return \"\"\n\n\ndef get_api_version(*, version: str):\n    int_version = {\n        \"v1\": 1,\n        \"v2\": 2,\n    }\n\n    return int_version.get(version, 1)"
  },
  {
    "path": "src/cohere/base_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport os\nimport typing\n\nimport httpx\nfrom .core.api_error import ApiError\nfrom .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom .core.logging import LogConfig, Logger\nfrom .core.request_options import RequestOptions\nfrom .environment import ClientEnvironment\nfrom .raw_base_client import AsyncRawBaseCohere, RawBaseCohere\nfrom .types.chat_connector import ChatConnector\nfrom .types.chat_document import ChatDocument\nfrom .types.chat_request_citation_quality import ChatRequestCitationQuality\nfrom .types.chat_request_prompt_truncation import ChatRequestPromptTruncation\nfrom .types.chat_request_safety_mode import ChatRequestSafetyMode\nfrom .types.chat_stream_request_citation_quality import ChatStreamRequestCitationQuality\nfrom .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation\nfrom .types.chat_stream_request_safety_mode import ChatStreamRequestSafetyMode\nfrom .types.check_api_key_response import CheckApiKeyResponse\nfrom .types.classify_example import ClassifyExample\nfrom .types.classify_request_truncate import ClassifyRequestTruncate\nfrom .types.classify_response import ClassifyResponse\nfrom .types.detokenize_response import DetokenizeResponse\nfrom .types.embed_input_type import EmbedInputType\nfrom .types.embed_request_truncate import EmbedRequestTruncate\nfrom .types.embed_response import EmbedResponse\nfrom .types.embedding_type import EmbeddingType\nfrom .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods\nfrom .types.generate_request_truncate import GenerateRequestTruncate\nfrom .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods\nfrom .types.generate_stream_request_truncate import GenerateStreamRequestTruncate\nfrom .types.generate_streamed_response import GenerateStreamedResponse\nfrom .types.generation import Generation\nfrom .types.message import Message\nfrom .types.non_streamed_chat_response import NonStreamedChatResponse\nfrom .types.rerank_request_documents_item import RerankRequestDocumentsItem\nfrom .types.rerank_response import RerankResponse\nfrom .types.response_format import ResponseFormat\nfrom .types.streamed_chat_response import StreamedChatResponse\nfrom .types.summarize_request_extractiveness import SummarizeRequestExtractiveness\nfrom .types.summarize_request_format import SummarizeRequestFormat\nfrom .types.summarize_request_length import SummarizeRequestLength\nfrom .types.summarize_response import SummarizeResponse\nfrom .types.tokenize_response import TokenizeResponse\nfrom .types.tool import Tool\nfrom .types.tool_result import ToolResult\n\nif typing.TYPE_CHECKING:\n    from .audio.client import AsyncAudioClient, AudioClient\n    from .batches.client import AsyncBatchesClient, BatchesClient\n    from .connectors.client import AsyncConnectorsClient, ConnectorsClient\n    from .datasets.client import AsyncDatasetsClient, DatasetsClient\n    from .embed_jobs.client import AsyncEmbedJobsClient, EmbedJobsClient\n    from .finetuning.client import AsyncFinetuningClient, FinetuningClient\n    from .models.client import AsyncModelsClient, ModelsClient\n    from .v2.client import AsyncV2Client, V2Client\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass BaseCohere:\n    \"\"\"\n    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.\n\n    Parameters\n    ----------\n    base_url : typing.Optional[str]\n        The base url to use for requests from the client.\n\n    environment : ClientEnvironment\n        The environment to use for requests from the client. from .environment import ClientEnvironment\n\n\n\n        Defaults to ClientEnvironment.PRODUCTION\n\n\n\n    client_name : typing.Optional[str]\n    token : typing.Optional[typing.Union[str, typing.Callable[[], str]]]\n    headers : typing.Optional[typing.Dict[str, str]]\n        Additional headers to send with every request.\n\n    timeout : typing.Optional[float]\n        The timeout to be used, in seconds, for requests. By default the timeout is 300 seconds, unless a custom httpx client is used, in which case this default is not enforced.\n\n    follow_redirects : typing.Optional[bool]\n        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.\n\n    httpx_client : typing.Optional[httpx.Client]\n        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.\n\n    logging : typing.Optional[typing.Union[LogConfig, Logger]]\n        Configure logging for the SDK. Accepts a LogConfig dict with 'level' (debug/info/warn/error), 'logger' (custom logger implementation), and 'silent' (boolean, defaults to True) fields. You can also pass a pre-configured Logger instance.\n\n    Examples\n    --------\n    from cohere import Client\n\n    client = Client(\n        client_name=\"YOUR_CLIENT_NAME\",\n        token=\"YOUR_TOKEN\",\n    )\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        base_url: typing.Optional[str] = None,\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv(\"CO_API_KEY\"),\n        headers: typing.Optional[typing.Dict[str, str]] = None,\n        timeout: typing.Optional[float] = None,\n        follow_redirects: typing.Optional[bool] = True,\n        httpx_client: typing.Optional[httpx.Client] = None,\n        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n    ):\n        _defaulted_timeout = (\n            timeout if timeout is not None else 300 if httpx_client is None else httpx_client.timeout.read\n        )\n        if token is None:\n            raise ApiError(body=\"The client must be instantiated be either passing in token or setting CO_API_KEY\")\n        self._client_wrapper = SyncClientWrapper(\n            base_url=_get_base_url(base_url=base_url, environment=environment),\n            client_name=client_name,\n            token=token,\n            headers=headers,\n            httpx_client=httpx_client\n            if httpx_client is not None\n            else httpx.Client(timeout=_defaulted_timeout, follow_redirects=follow_redirects)\n            if follow_redirects is not None\n            else httpx.Client(timeout=_defaulted_timeout),\n            timeout=_defaulted_timeout,\n            logging=logging,\n        )\n        self._raw_client = RawBaseCohere(client_wrapper=self._client_wrapper)\n        self._v2: typing.Optional[V2Client] = None\n        self._batches: typing.Optional[BatchesClient] = None\n        self._embed_jobs: typing.Optional[EmbedJobsClient] = None\n        self._datasets: typing.Optional[DatasetsClient] = None\n        self._connectors: typing.Optional[ConnectorsClient] = None\n        self._models: typing.Optional[ModelsClient] = None\n        self._finetuning: typing.Optional[FinetuningClient] = None\n        self._audio: typing.Optional[AudioClient] = None\n\n    @property\n    def with_raw_response(self) -> RawBaseCohere:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawBaseCohere\n        \"\"\"\n        return self._raw_client\n\n    def chat_stream(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[StreamedChatResponse]:\n        \"\"\"\n        Generates a streamed text response to a user message.\n\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[StreamedChatResponse]\n\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        response = client.chat_stream(\n            model=\"command-a-03-2025\",\n            message=\"hello!\",\n        )\n        for chunk in response:\n            yield chunk\n        \"\"\"\n        with self._raw_client.chat_stream(\n            message=message,\n            accepts=accepts,\n            model=model,\n            preamble=preamble,\n            chat_history=chat_history,\n            conversation_id=conversation_id,\n            prompt_truncation=prompt_truncation,\n            connectors=connectors,\n            search_queries_only=search_queries_only,\n            documents=documents,\n            citation_quality=citation_quality,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            max_input_tokens=max_input_tokens,\n            k=k,\n            p=p,\n            seed=seed,\n            stop_sequences=stop_sequences,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            raw_prompting=raw_prompting,\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=force_single_step,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            request_options=request_options,\n        ) as r:\n            yield from r.data\n\n    def chat(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> NonStreamedChatResponse:\n        \"\"\"\n        Generates a text response to a user message.\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        NonStreamedChatResponse\n\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.chat(\n            model=\"command-a-03-2025\",\n            message=\"Tell me about LLMs\",\n        )\n        \"\"\"\n        _response = self._raw_client.chat(\n            message=message,\n            accepts=accepts,\n            model=model,\n            preamble=preamble,\n            chat_history=chat_history,\n            conversation_id=conversation_id,\n            prompt_truncation=prompt_truncation,\n            connectors=connectors,\n            search_queries_only=search_queries_only,\n            documents=documents,\n            citation_quality=citation_quality,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            max_input_tokens=max_input_tokens,\n            k=k,\n            p=p,\n            seed=seed,\n            stop_sequences=stop_sequences,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            raw_prompting=raw_prompting,\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=force_single_step,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def generate_stream(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[GenerateStreamedResponse]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat with Streaming API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateStreamRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[GenerateStreamedResponse]\n\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        response = client.generate_stream(\n            prompt=\"Please explain to me how LLMs work\",\n        )\n        for chunk in response:\n            yield chunk\n        \"\"\"\n        with self._raw_client.generate_stream(\n            prompt=prompt,\n            model=model,\n            num_generations=num_generations,\n            max_tokens=max_tokens,\n            truncate=truncate,\n            temperature=temperature,\n            seed=seed,\n            preset=preset,\n            end_sequences=end_sequences,\n            stop_sequences=stop_sequences,\n            k=k,\n            p=p,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            return_likelihoods=return_likelihoods,\n            raw_prompting=raw_prompting,\n            request_options=request_options,\n        ) as r:\n            yield from r.data\n\n    def generate(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> Generation:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        Generation\n\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.generate(\n            prompt=\"Please explain to me how LLMs work\",\n        )\n        \"\"\"\n        _response = self._raw_client.generate(\n            prompt=prompt,\n            model=model,\n            num_generations=num_generations,\n            max_tokens=max_tokens,\n            truncate=truncate,\n            temperature=temperature,\n            seed=seed,\n            preset=preset,\n            end_sequences=end_sequences,\n            stop_sequences=stop_sequences,\n            k=k,\n            p=p,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            return_likelihoods=return_likelihoods,\n            raw_prompting=raw_prompting,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> EmbedResponse:\n        \"\"\"\n        This endpoint returns text and image embeddings. An embedding is a list of floating point numbers that captures semantic information about the content that it represents.\n\n        Embeddings can be used to create classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Images are only supported with Embed v3.0 and newer models.\n\n        model : typing.Optional[str]\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : typing.Optional[EmbedInputType]\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.embed(\n            texts=[\"hello\", \"goodbye\"],\n            model=\"embed-v4.0\",\n            input_type=\"classification\",\n        )\n        \"\"\"\n        _response = self._raw_client.embed(\n            texts=texts,\n            images=images,\n            model=model,\n            input_type=input_type,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def rerank(\n        self,\n        *,\n        query: str,\n        documents: typing.Sequence[RerankRequestDocumentsItem],\n        model: typing.Optional[str] = OMIT,\n        top_n: typing.Optional[int] = OMIT,\n        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,\n        return_documents: typing.Optional[bool] = OMIT,\n        max_chunks_per_doc: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> RerankResponse:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        query : str\n            The search query\n\n        documents : typing.Sequence[RerankRequestDocumentsItem]\n            A list of document objects or strings to rerank.\n            If a document is provided the text fields is required and all other fields will be preserved in the response.\n\n            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.\n\n            We recommend a maximum of 1,000 documents for optimal endpoint performance.\n\n        model : typing.Optional[str]\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        top_n : typing.Optional[int]\n            The number of most relevant documents or indices to return, defaults to the length of the documents\n\n        rank_fields : typing.Optional[typing.Sequence[str]]\n            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.\n\n        return_documents : typing.Optional[bool]\n            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.\n            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.\n\n        max_chunks_per_doc : typing.Optional[int]\n            The maximum number of chunks to produce internally from a document\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        RerankResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.rerank(\n            documents=[\n                {\n                    \"text\": \"Carson City is the capital city of the American state of Nevada.\"\n                },\n                {\n                    \"text\": \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\"\n                },\n                {\n                    \"text\": \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\"\n                },\n                {\n                    \"text\": \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\"\n                },\n                {\n                    \"text\": \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\"\n                },\n            ],\n            query=\"What is the capital of the United States?\",\n            top_n=3,\n            model=\"rerank-v4.0-pro\",\n        )\n        \"\"\"\n        _response = self._raw_client.rerank(\n            query=query,\n            documents=documents,\n            model=model,\n            top_n=top_n,\n            rank_fields=rank_fields,\n            return_documents=return_documents,\n            max_chunks_per_doc=max_chunks_per_doc,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def classify(\n        self,\n        *,\n        inputs: typing.Sequence[str],\n        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ClassifyResponse:\n        \"\"\"\n        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.\n        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        Parameters\n        ----------\n        inputs : typing.Sequence[str]\n            A list of up to 96 texts to be classified. Each one must be a non-empty string.\n            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the \"max tokens\" column [here](https://docs.cohere.com/docs/models).\n            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.\n\n        examples : typing.Optional[typing.Sequence[ClassifyExample]]\n            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: \"...\",label: \"...\"}`.\n            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        model : typing.Optional[str]\n            ID of a [Fine-tuned](https://docs.cohere.com/v2/docs/classify-starting-the-training) Classify model\n\n        preset : typing.Optional[str]\n            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.\n\n        truncate : typing.Optional[ClassifyRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ClassifyResponse\n            OK\n\n        Examples\n        --------\n        from cohere import ClassifyExample, Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.classify(\n            examples=[\n                ClassifyExample(\n                    text=\"Dermatologists don't like her!\",\n                    label=\"Spam\",\n                ),\n                ClassifyExample(\n                    text=\"'Hello, open to this?'\",\n                    label=\"Spam\",\n                ),\n                ClassifyExample(\n                    text=\"I need help please wire me $1000 right now\",\n                    label=\"Spam\",\n                ),\n                ClassifyExample(\n                    text=\"Nice to know you ;)\",\n                    label=\"Spam\",\n                ),\n                ClassifyExample(\n                    text=\"Please help me?\",\n                    label=\"Spam\",\n                ),\n                ClassifyExample(\n                    text=\"Your parcel will be delivered today\",\n                    label=\"Not spam\",\n                ),\n                ClassifyExample(\n                    text=\"Review changes to our Terms and Conditions\",\n                    label=\"Not spam\",\n                ),\n                ClassifyExample(\n                    text=\"Weekly sync notes\",\n                    label=\"Not spam\",\n                ),\n                ClassifyExample(\n                    text=\"'Re: Follow up from today's meeting'\",\n                    label=\"Not spam\",\n                ),\n                ClassifyExample(\n                    text=\"Pre-read for tomorrow\",\n                    label=\"Not spam\",\n                ),\n            ],\n            inputs=[\"Confirm your email address\", \"hey i need u to send some $\"],\n            model=\"YOUR-FINE-TUNED-MODEL-ID\",\n        )\n        \"\"\"\n        _response = self._raw_client.classify(\n            inputs=inputs,\n            examples=examples,\n            model=model,\n            preset=preset,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def summarize(\n        self,\n        *,\n        text: str,\n        length: typing.Optional[SummarizeRequestLength] = OMIT,\n        format: typing.Optional[SummarizeRequestFormat] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        additional_command: typing.Optional[str] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> SummarizeResponse:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates a summary in English for a given text.\n\n        Parameters\n        ----------\n        text : str\n            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.\n\n        length : typing.Optional[SummarizeRequestLength]\n            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.\n\n        format : typing.Optional[SummarizeRequestFormat]\n            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, \"light\" models are faster, while larger models will perform better.\n\n        extractiveness : typing.Optional[SummarizeRequestExtractiveness]\n            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.\n\n        temperature : typing.Optional[float]\n            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.\n\n        additional_command : typing.Optional[str]\n            A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". Eg. \"focusing on the next steps\" or \"written by Yoda\"\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        SummarizeResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.summarize(\n            text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\\n\\nThe meaning of the name \"ice cream\" varies from one country to another. In some countries, such as the United States, \"ice cream\" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled \"frozen dairy dessert\" instead. In other countries, such as Italy and Argentina, one word is used fo\\r all variants. Analogues made from dairy alternatives, such as goat\\'s or sheep\\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',\n        )\n        \"\"\"\n        _response = self._raw_client.summarize(\n            text=text,\n            length=length,\n            format=format,\n            model=model,\n            extractiveness=extractiveness,\n            temperature=temperature,\n            additional_command=additional_command,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def tokenize(\n        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> TokenizeResponse:\n        \"\"\"\n        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        text : str\n            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.\n\n        model : str\n            The input will be tokenized by the tokenizer that is used by this model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        TokenizeResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.tokenize(\n            text=\"tokenize me! :D\",\n            model=\"command\",\n        )\n        \"\"\"\n        _response = self._raw_client.tokenize(text=text, model=model, request_options=request_options)\n        return _response.data\n\n    def detokenize(\n        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> DetokenizeResponse:\n        \"\"\"\n        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        tokens : typing.Sequence[int]\n            The list of tokens to be detokenized.\n\n        model : str\n            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DetokenizeResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.detokenize(\n            tokens=[10002, 2261, 2012, 8, 2792, 43],\n            model=\"command\",\n        )\n        \"\"\"\n        _response = self._raw_client.detokenize(tokens=tokens, model=model, request_options=request_options)\n        return _response.data\n\n    def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:\n        \"\"\"\n        Checks that the api key in the Authorization header is valid and active\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CheckApiKeyResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.check_api_key()\n        \"\"\"\n        _response = self._raw_client.check_api_key(request_options=request_options)\n        return _response.data\n\n    @property\n    def v2(self):\n        if self._v2 is None:\n            from .v2.client import V2Client  # noqa: E402\n\n            self._v2 = V2Client(client_wrapper=self._client_wrapper)\n        return self._v2\n\n    @property\n    def batches(self):\n        if self._batches is None:\n            from .batches.client import BatchesClient  # noqa: E402\n\n            self._batches = BatchesClient(client_wrapper=self._client_wrapper)\n        return self._batches\n\n    @property\n    def embed_jobs(self):\n        if self._embed_jobs is None:\n            from .embed_jobs.client import EmbedJobsClient  # noqa: E402\n\n            self._embed_jobs = EmbedJobsClient(client_wrapper=self._client_wrapper)\n        return self._embed_jobs\n\n    @property\n    def datasets(self):\n        if self._datasets is None:\n            from .datasets.client import DatasetsClient  # noqa: E402\n\n            self._datasets = DatasetsClient(client_wrapper=self._client_wrapper)\n        return self._datasets\n\n    @property\n    def connectors(self):\n        if self._connectors is None:\n            from .connectors.client import ConnectorsClient  # noqa: E402\n\n            self._connectors = ConnectorsClient(client_wrapper=self._client_wrapper)\n        return self._connectors\n\n    @property\n    def models(self):\n        if self._models is None:\n            from .models.client import ModelsClient  # noqa: E402\n\n            self._models = ModelsClient(client_wrapper=self._client_wrapper)\n        return self._models\n\n    @property\n    def finetuning(self):\n        if self._finetuning is None:\n            from .finetuning.client import FinetuningClient  # noqa: E402\n\n            self._finetuning = FinetuningClient(client_wrapper=self._client_wrapper)\n        return self._finetuning\n\n    @property\n    def audio(self):\n        if self._audio is None:\n            from .audio.client import AudioClient  # noqa: E402\n\n            self._audio = AudioClient(client_wrapper=self._client_wrapper)\n        return self._audio\n\n\ndef _make_default_async_client(\n    timeout: typing.Optional[float],\n    follow_redirects: typing.Optional[bool],\n) -> httpx.AsyncClient:\n    try:\n        import httpx_aiohttp  # type: ignore[import-not-found]\n    except ImportError:\n        pass\n    else:\n        if follow_redirects is not None:\n            return httpx_aiohttp.HttpxAiohttpClient(timeout=timeout, follow_redirects=follow_redirects)\n        return httpx_aiohttp.HttpxAiohttpClient(timeout=timeout)\n\n    if follow_redirects is not None:\n        return httpx.AsyncClient(timeout=timeout, follow_redirects=follow_redirects)\n    return httpx.AsyncClient(timeout=timeout)\n\n\nclass AsyncBaseCohere:\n    \"\"\"\n    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propagate to these functions.\n\n    Parameters\n    ----------\n    base_url : typing.Optional[str]\n        The base url to use for requests from the client.\n\n    environment : ClientEnvironment\n        The environment to use for requests from the client. from .environment import ClientEnvironment\n\n\n\n        Defaults to ClientEnvironment.PRODUCTION\n\n\n\n    client_name : typing.Optional[str]\n    token : typing.Optional[typing.Union[str, typing.Callable[[], str]]]\n    headers : typing.Optional[typing.Dict[str, str]]\n        Additional headers to send with every request.\n\n    async_token : typing.Optional[typing.Callable[[], typing.Awaitable[str]]]\n        An async callable that returns a bearer token. Use this when token acquisition involves async I/O (e.g., refreshing tokens via an async HTTP client). When provided, this is used instead of the synchronous token for async requests.\n\n    timeout : typing.Optional[float]\n        The timeout to be used, in seconds, for requests. By default the timeout is 300 seconds, unless a custom httpx client is used, in which case this default is not enforced.\n\n    follow_redirects : typing.Optional[bool]\n        Whether the default httpx client follows redirects or not, this is irrelevant if a custom httpx client is passed in.\n\n    httpx_client : typing.Optional[httpx.AsyncClient]\n        The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.\n\n    logging : typing.Optional[typing.Union[LogConfig, Logger]]\n        Configure logging for the SDK. Accepts a LogConfig dict with 'level' (debug/info/warn/error), 'logger' (custom logger implementation), and 'silent' (boolean, defaults to True) fields. You can also pass a pre-configured Logger instance.\n\n    Examples\n    --------\n    from cohere import AsyncClient\n\n    client = AsyncClient(\n        client_name=\"YOUR_CLIENT_NAME\",\n        token=\"YOUR_TOKEN\",\n    )\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        base_url: typing.Optional[str] = None,\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv(\"CO_API_KEY\"),\n        headers: typing.Optional[typing.Dict[str, str]] = None,\n        async_token: typing.Optional[typing.Callable[[], typing.Awaitable[str]]] = None,\n        timeout: typing.Optional[float] = None,\n        follow_redirects: typing.Optional[bool] = True,\n        httpx_client: typing.Optional[httpx.AsyncClient] = None,\n        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n    ):\n        _defaulted_timeout = (\n            timeout if timeout is not None else 300 if httpx_client is None else httpx_client.timeout.read\n        )\n        if token is None:\n            raise ApiError(body=\"The client must be instantiated be either passing in token or setting CO_API_KEY\")\n        self._client_wrapper = AsyncClientWrapper(\n            base_url=_get_base_url(base_url=base_url, environment=environment),\n            client_name=client_name,\n            token=token,\n            headers=headers,\n            async_token=async_token,\n            httpx_client=httpx_client\n            if httpx_client is not None\n            else _make_default_async_client(timeout=_defaulted_timeout, follow_redirects=follow_redirects),\n            timeout=_defaulted_timeout,\n            logging=logging,\n        )\n        self._raw_client = AsyncRawBaseCohere(client_wrapper=self._client_wrapper)\n        self._v2: typing.Optional[AsyncV2Client] = None\n        self._batches: typing.Optional[AsyncBatchesClient] = None\n        self._embed_jobs: typing.Optional[AsyncEmbedJobsClient] = None\n        self._datasets: typing.Optional[AsyncDatasetsClient] = None\n        self._connectors: typing.Optional[AsyncConnectorsClient] = None\n        self._models: typing.Optional[AsyncModelsClient] = None\n        self._finetuning: typing.Optional[AsyncFinetuningClient] = None\n        self._audio: typing.Optional[AsyncAudioClient] = None\n\n    @property\n    def with_raw_response(self) -> AsyncRawBaseCohere:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawBaseCohere\n        \"\"\"\n        return self._raw_client\n\n    async def chat_stream(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[StreamedChatResponse]:\n        \"\"\"\n        Generates a streamed text response to a user message.\n\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[StreamedChatResponse]\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            response = await client.chat_stream(\n                model=\"command-a-03-2025\",\n                message=\"hello!\",\n            )\n            async for chunk in response:\n                yield chunk\n\n\n        asyncio.run(main())\n        \"\"\"\n        async with self._raw_client.chat_stream(\n            message=message,\n            accepts=accepts,\n            model=model,\n            preamble=preamble,\n            chat_history=chat_history,\n            conversation_id=conversation_id,\n            prompt_truncation=prompt_truncation,\n            connectors=connectors,\n            search_queries_only=search_queries_only,\n            documents=documents,\n            citation_quality=citation_quality,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            max_input_tokens=max_input_tokens,\n            k=k,\n            p=p,\n            seed=seed,\n            stop_sequences=stop_sequences,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            raw_prompting=raw_prompting,\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=force_single_step,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            request_options=request_options,\n        ) as r:\n            async for _chunk in r.data:\n                yield _chunk\n\n    async def chat(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> NonStreamedChatResponse:\n        \"\"\"\n        Generates a text response to a user message.\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        NonStreamedChatResponse\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.chat(\n                model=\"command-a-03-2025\",\n                message=\"Tell me about LLMs\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.chat(\n            message=message,\n            accepts=accepts,\n            model=model,\n            preamble=preamble,\n            chat_history=chat_history,\n            conversation_id=conversation_id,\n            prompt_truncation=prompt_truncation,\n            connectors=connectors,\n            search_queries_only=search_queries_only,\n            documents=documents,\n            citation_quality=citation_quality,\n            temperature=temperature,\n            max_tokens=max_tokens,\n            max_input_tokens=max_input_tokens,\n            k=k,\n            p=p,\n            seed=seed,\n            stop_sequences=stop_sequences,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            raw_prompting=raw_prompting,\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=force_single_step,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def generate_stream(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[GenerateStreamedResponse]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat with Streaming API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateStreamRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[GenerateStreamedResponse]\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            response = await client.generate_stream(\n                prompt=\"Please explain to me how LLMs work\",\n            )\n            async for chunk in response:\n                yield chunk\n\n\n        asyncio.run(main())\n        \"\"\"\n        async with self._raw_client.generate_stream(\n            prompt=prompt,\n            model=model,\n            num_generations=num_generations,\n            max_tokens=max_tokens,\n            truncate=truncate,\n            temperature=temperature,\n            seed=seed,\n            preset=preset,\n            end_sequences=end_sequences,\n            stop_sequences=stop_sequences,\n            k=k,\n            p=p,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            return_likelihoods=return_likelihoods,\n            raw_prompting=raw_prompting,\n            request_options=request_options,\n        ) as r:\n            async for _chunk in r.data:\n                yield _chunk\n\n    async def generate(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> Generation:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        Generation\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.generate(\n                prompt=\"Please explain to me how LLMs work\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.generate(\n            prompt=prompt,\n            model=model,\n            num_generations=num_generations,\n            max_tokens=max_tokens,\n            truncate=truncate,\n            temperature=temperature,\n            seed=seed,\n            preset=preset,\n            end_sequences=end_sequences,\n            stop_sequences=stop_sequences,\n            k=k,\n            p=p,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            return_likelihoods=return_likelihoods,\n            raw_prompting=raw_prompting,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> EmbedResponse:\n        \"\"\"\n        This endpoint returns text and image embeddings. An embedding is a list of floating point numbers that captures semantic information about the content that it represents.\n\n        Embeddings can be used to create classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Images are only supported with Embed v3.0 and newer models.\n\n        model : typing.Optional[str]\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : typing.Optional[EmbedInputType]\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.embed(\n                texts=[\"hello\", \"goodbye\"],\n                model=\"embed-v4.0\",\n                input_type=\"classification\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.embed(\n            texts=texts,\n            images=images,\n            model=model,\n            input_type=input_type,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def rerank(\n        self,\n        *,\n        query: str,\n        documents: typing.Sequence[RerankRequestDocumentsItem],\n        model: typing.Optional[str] = OMIT,\n        top_n: typing.Optional[int] = OMIT,\n        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,\n        return_documents: typing.Optional[bool] = OMIT,\n        max_chunks_per_doc: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> RerankResponse:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        query : str\n            The search query\n\n        documents : typing.Sequence[RerankRequestDocumentsItem]\n            A list of document objects or strings to rerank.\n            If a document is provided the text fields is required and all other fields will be preserved in the response.\n\n            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.\n\n            We recommend a maximum of 1,000 documents for optimal endpoint performance.\n\n        model : typing.Optional[str]\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        top_n : typing.Optional[int]\n            The number of most relevant documents or indices to return, defaults to the length of the documents\n\n        rank_fields : typing.Optional[typing.Sequence[str]]\n            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.\n\n        return_documents : typing.Optional[bool]\n            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.\n            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.\n\n        max_chunks_per_doc : typing.Optional[int]\n            The maximum number of chunks to produce internally from a document\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        RerankResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.rerank(\n                documents=[\n                    {\n                        \"text\": \"Carson City is the capital city of the American state of Nevada.\"\n                    },\n                    {\n                        \"text\": \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\"\n                    },\n                    {\n                        \"text\": \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\"\n                    },\n                    {\n                        \"text\": \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\"\n                    },\n                    {\n                        \"text\": \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\"\n                    },\n                ],\n                query=\"What is the capital of the United States?\",\n                top_n=3,\n                model=\"rerank-v4.0-pro\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.rerank(\n            query=query,\n            documents=documents,\n            model=model,\n            top_n=top_n,\n            rank_fields=rank_fields,\n            return_documents=return_documents,\n            max_chunks_per_doc=max_chunks_per_doc,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def classify(\n        self,\n        *,\n        inputs: typing.Sequence[str],\n        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ClassifyResponse:\n        \"\"\"\n        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.\n        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        Parameters\n        ----------\n        inputs : typing.Sequence[str]\n            A list of up to 96 texts to be classified. Each one must be a non-empty string.\n            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the \"max tokens\" column [here](https://docs.cohere.com/docs/models).\n            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.\n\n        examples : typing.Optional[typing.Sequence[ClassifyExample]]\n            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: \"...\",label: \"...\"}`.\n            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        model : typing.Optional[str]\n            ID of a [Fine-tuned](https://docs.cohere.com/v2/docs/classify-starting-the-training) Classify model\n\n        preset : typing.Optional[str]\n            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.\n\n        truncate : typing.Optional[ClassifyRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ClassifyResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient, ClassifyExample\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.classify(\n                examples=[\n                    ClassifyExample(\n                        text=\"Dermatologists don't like her!\",\n                        label=\"Spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"'Hello, open to this?'\",\n                        label=\"Spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"I need help please wire me $1000 right now\",\n                        label=\"Spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Nice to know you ;)\",\n                        label=\"Spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Please help me?\",\n                        label=\"Spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Your parcel will be delivered today\",\n                        label=\"Not spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Review changes to our Terms and Conditions\",\n                        label=\"Not spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Weekly sync notes\",\n                        label=\"Not spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"'Re: Follow up from today's meeting'\",\n                        label=\"Not spam\",\n                    ),\n                    ClassifyExample(\n                        text=\"Pre-read for tomorrow\",\n                        label=\"Not spam\",\n                    ),\n                ],\n                inputs=[\"Confirm your email address\", \"hey i need u to send some $\"],\n                model=\"YOUR-FINE-TUNED-MODEL-ID\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.classify(\n            inputs=inputs,\n            examples=examples,\n            model=model,\n            preset=preset,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def summarize(\n        self,\n        *,\n        text: str,\n        length: typing.Optional[SummarizeRequestLength] = OMIT,\n        format: typing.Optional[SummarizeRequestFormat] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        additional_command: typing.Optional[str] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> SummarizeResponse:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates a summary in English for a given text.\n\n        Parameters\n        ----------\n        text : str\n            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.\n\n        length : typing.Optional[SummarizeRequestLength]\n            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.\n\n        format : typing.Optional[SummarizeRequestFormat]\n            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, \"light\" models are faster, while larger models will perform better.\n\n        extractiveness : typing.Optional[SummarizeRequestExtractiveness]\n            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.\n\n        temperature : typing.Optional[float]\n            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.\n\n        additional_command : typing.Optional[str]\n            A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". Eg. \"focusing on the next steps\" or \"written by Yoda\"\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        SummarizeResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.summarize(\n                text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\\n\\nThe meaning of the name \"ice cream\" varies from one country to another. In some countries, such as the United States, \"ice cream\" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled \"frozen dairy dessert\" instead. In other countries, such as Italy and Argentina, one word is used fo\\r all variants. Analogues made from dairy alternatives, such as goat\\'s or sheep\\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.summarize(\n            text=text,\n            length=length,\n            format=format,\n            model=model,\n            extractiveness=extractiveness,\n            temperature=temperature,\n            additional_command=additional_command,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def tokenize(\n        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> TokenizeResponse:\n        \"\"\"\n        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        text : str\n            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.\n\n        model : str\n            The input will be tokenized by the tokenizer that is used by this model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        TokenizeResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.tokenize(\n                text=\"tokenize me! :D\",\n                model=\"command\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.tokenize(text=text, model=model, request_options=request_options)\n        return _response.data\n\n    async def detokenize(\n        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> DetokenizeResponse:\n        \"\"\"\n        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        tokens : typing.Sequence[int]\n            The list of tokens to be detokenized.\n\n        model : str\n            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DetokenizeResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.detokenize(\n                tokens=[10002, 2261, 2012, 8, 2792, 43],\n                model=\"command\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.detokenize(tokens=tokens, model=model, request_options=request_options)\n        return _response.data\n\n    async def check_api_key(self, *, request_options: typing.Optional[RequestOptions] = None) -> CheckApiKeyResponse:\n        \"\"\"\n        Checks that the api key in the Authorization header is valid and active\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CheckApiKeyResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.check_api_key()\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.check_api_key(request_options=request_options)\n        return _response.data\n\n    @property\n    def v2(self):\n        if self._v2 is None:\n            from .v2.client import AsyncV2Client  # noqa: E402\n\n            self._v2 = AsyncV2Client(client_wrapper=self._client_wrapper)\n        return self._v2\n\n    @property\n    def batches(self):\n        if self._batches is None:\n            from .batches.client import AsyncBatchesClient  # noqa: E402\n\n            self._batches = AsyncBatchesClient(client_wrapper=self._client_wrapper)\n        return self._batches\n\n    @property\n    def embed_jobs(self):\n        if self._embed_jobs is None:\n            from .embed_jobs.client import AsyncEmbedJobsClient  # noqa: E402\n\n            self._embed_jobs = AsyncEmbedJobsClient(client_wrapper=self._client_wrapper)\n        return self._embed_jobs\n\n    @property\n    def datasets(self):\n        if self._datasets is None:\n            from .datasets.client import AsyncDatasetsClient  # noqa: E402\n\n            self._datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)\n        return self._datasets\n\n    @property\n    def connectors(self):\n        if self._connectors is None:\n            from .connectors.client import AsyncConnectorsClient  # noqa: E402\n\n            self._connectors = AsyncConnectorsClient(client_wrapper=self._client_wrapper)\n        return self._connectors\n\n    @property\n    def models(self):\n        if self._models is None:\n            from .models.client import AsyncModelsClient  # noqa: E402\n\n            self._models = AsyncModelsClient(client_wrapper=self._client_wrapper)\n        return self._models\n\n    @property\n    def finetuning(self):\n        if self._finetuning is None:\n            from .finetuning.client import AsyncFinetuningClient  # noqa: E402\n\n            self._finetuning = AsyncFinetuningClient(client_wrapper=self._client_wrapper)\n        return self._finetuning\n\n    @property\n    def audio(self):\n        if self._audio is None:\n            from .audio.client import AsyncAudioClient  # noqa: E402\n\n            self._audio = AsyncAudioClient(client_wrapper=self._client_wrapper)\n        return self._audio\n\n\ndef _get_base_url(*, base_url: typing.Optional[str] = None, environment: ClientEnvironment) -> str:\n    if base_url is not None:\n        return base_url\n    elif environment is not None:\n        return environment.value\n    else:\n        raise Exception(\"Please pass in either base_url or environment to construct the client\")\n"
  },
  {
    "path": "src/cohere/batches/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import (\n        Batch,\n        BatchStatus,\n        CancelBatchResponse,\n        CreateBatchResponse,\n        GetBatchResponse,\n        ListBatchesResponse,\n    )\n_dynamic_imports: typing.Dict[str, str] = {\n    \"Batch\": \".types\",\n    \"BatchStatus\": \".types\",\n    \"CancelBatchResponse\": \".types\",\n    \"CreateBatchResponse\": \".types\",\n    \"GetBatchResponse\": \".types\",\n    \"ListBatchesResponse\": \".types\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"Batch\",\n    \"BatchStatus\",\n    \"CancelBatchResponse\",\n    \"CreateBatchResponse\",\n    \"GetBatchResponse\",\n    \"ListBatchesResponse\",\n]\n"
  },
  {
    "path": "src/cohere/batches/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom .raw_client import AsyncRawBatchesClient, RawBatchesClient\nfrom .types.batch import Batch\nfrom .types.cancel_batch_response import CancelBatchResponse\nfrom .types.create_batch_response import CreateBatchResponse\nfrom .types.get_batch_response import GetBatchResponse\nfrom .types.list_batches_response import ListBatchesResponse\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass BatchesClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawBatchesClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawBatchesClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawBatchesClient\n        \"\"\"\n        return self._raw_client\n\n    def list(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListBatchesResponse:\n        \"\"\"\n        List the batches for the current user\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            The maximum number of batches to return. The service may return fewer than\n            this value.\n            If unspecified, at most 50 batches will be returned.\n            The maximum value is 1000; values above 1000 will be coerced to 1000.\n\n        page_token : typing.Optional[str]\n            A page token, received from a previous `ListBatches` call.\n            Provide this to retrieve the subsequent page.\n\n        order_by : typing.Optional[str]\n            Batches can be ordered by creation time or last updated time.\n            Use `created_at` for creation time or `updated_at` for last updated time.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListBatchesResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.batches.list(\n            page_size=1,\n            page_token=\"page_token\",\n            order_by=\"order_by\",\n        )\n        \"\"\"\n        _response = self._raw_client.list(\n            page_size=page_size, page_token=page_token, order_by=order_by, request_options=request_options\n        )\n        return _response.data\n\n    def create(self, *, request: Batch, request_options: typing.Optional[RequestOptions] = None) -> CreateBatchResponse:\n        \"\"\"\n        Creates and executes a batch from an uploaded dataset of requests\n\n        Parameters\n        ----------\n        request : Batch\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n        from cohere.batches import Batch\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.batches.create(\n            request=Batch(\n                name=\"name\",\n                input_dataset_id=\"input_dataset_id\",\n                model=\"model\",\n            ),\n        )\n        \"\"\"\n        _response = self._raw_client.create(request=request, request_options=request_options)\n        return _response.data\n\n    def retrieve(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetBatchResponse:\n        \"\"\"\n        Retrieves a batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.batches.retrieve(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.retrieve(id, request_options=request_options)\n        return _response.data\n\n    def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> CancelBatchResponse:\n        \"\"\"\n        Cancels an in-progress batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CancelBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.batches.cancel(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.cancel(id, request_options=request_options)\n        return _response.data\n\n\nclass AsyncBatchesClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawBatchesClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawBatchesClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawBatchesClient\n        \"\"\"\n        return self._raw_client\n\n    async def list(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListBatchesResponse:\n        \"\"\"\n        List the batches for the current user\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            The maximum number of batches to return. The service may return fewer than\n            this value.\n            If unspecified, at most 50 batches will be returned.\n            The maximum value is 1000; values above 1000 will be coerced to 1000.\n\n        page_token : typing.Optional[str]\n            A page token, received from a previous `ListBatches` call.\n            Provide this to retrieve the subsequent page.\n\n        order_by : typing.Optional[str]\n            Batches can be ordered by creation time or last updated time.\n            Use `created_at` for creation time or `updated_at` for last updated time.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListBatchesResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.batches.list(\n                page_size=1,\n                page_token=\"page_token\",\n                order_by=\"order_by\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list(\n            page_size=page_size, page_token=page_token, order_by=order_by, request_options=request_options\n        )\n        return _response.data\n\n    async def create(\n        self, *, request: Batch, request_options: typing.Optional[RequestOptions] = None\n    ) -> CreateBatchResponse:\n        \"\"\"\n        Creates and executes a batch from an uploaded dataset of requests\n\n        Parameters\n        ----------\n        request : Batch\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n        from cohere.batches import Batch\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.batches.create(\n                request=Batch(\n                    name=\"name\",\n                    input_dataset_id=\"input_dataset_id\",\n                    model=\"model\",\n                ),\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create(request=request, request_options=request_options)\n        return _response.data\n\n    async def retrieve(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetBatchResponse:\n        \"\"\"\n        Retrieves a batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.batches.retrieve(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.retrieve(id, request_options=request_options)\n        return _response.data\n\n    async def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> CancelBatchResponse:\n        \"\"\"\n        Cancels an in-progress batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CancelBatchResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.batches.cancel(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.cancel(id, request_options=request_options)\n        return _response.data\n"
  },
  {
    "path": "src/cohere/batches/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.serialization import convert_and_respect_annotation_metadata\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom .types.batch import Batch\nfrom .types.cancel_batch_response import CancelBatchResponse\nfrom .types.create_batch_response import CreateBatchResponse\nfrom .types.get_batch_response import GetBatchResponse\nfrom .types.list_batches_response import ListBatchesResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawBatchesClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def list(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListBatchesResponse]:\n        \"\"\"\n        List the batches for the current user\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            The maximum number of batches to return. The service may return fewer than\n            this value.\n            If unspecified, at most 50 batches will be returned.\n            The maximum value is 1000; values above 1000 will be coerced to 1000.\n\n        page_token : typing.Optional[str]\n            A page token, received from a previous `ListBatches` call.\n            Provide this to retrieve the subsequent page.\n\n        order_by : typing.Optional[str]\n            Batches can be ordered by creation time or last updated time.\n            Use `created_at` for creation time or `updated_at` for last updated time.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListBatchesResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/batches\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListBatchesResponse,\n                    construct_type(\n                        type_=ListBatchesResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def create(\n        self, *, request: Batch, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[CreateBatchResponse]:\n        \"\"\"\n        Creates and executes a batch from an uploaded dataset of requests\n\n        Parameters\n        ----------\n        request : Batch\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CreateBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/batches\",\n            method=\"POST\",\n            json=convert_and_respect_annotation_metadata(object_=request, annotation=Batch, direction=\"write\"),\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateBatchResponse,\n                    construct_type(\n                        type_=CreateBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def retrieve(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[GetBatchResponse]:\n        \"\"\"\n        Retrieves a batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[GetBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v2/batches/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetBatchResponse,\n                    construct_type(\n                        type_=GetBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def cancel(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[CancelBatchResponse]:\n        \"\"\"\n        Cancels an in-progress batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CancelBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v2/batches/{jsonable_encoder(id)}:cancel\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CancelBatchResponse,\n                    construct_type(\n                        type_=CancelBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawBatchesClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def list(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListBatchesResponse]:\n        \"\"\"\n        List the batches for the current user\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            The maximum number of batches to return. The service may return fewer than\n            this value.\n            If unspecified, at most 50 batches will be returned.\n            The maximum value is 1000; values above 1000 will be coerced to 1000.\n\n        page_token : typing.Optional[str]\n            A page token, received from a previous `ListBatches` call.\n            Provide this to retrieve the subsequent page.\n\n        order_by : typing.Optional[str]\n            Batches can be ordered by creation time or last updated time.\n            Use `created_at` for creation time or `updated_at` for last updated time.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListBatchesResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/batches\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListBatchesResponse,\n                    construct_type(\n                        type_=ListBatchesResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def create(\n        self, *, request: Batch, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[CreateBatchResponse]:\n        \"\"\"\n        Creates and executes a batch from an uploaded dataset of requests\n\n        Parameters\n        ----------\n        request : Batch\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CreateBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/batches\",\n            method=\"POST\",\n            json=convert_and_respect_annotation_metadata(object_=request, annotation=Batch, direction=\"write\"),\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateBatchResponse,\n                    construct_type(\n                        type_=CreateBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def retrieve(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[GetBatchResponse]:\n        \"\"\"\n        Retrieves a batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[GetBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v2/batches/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetBatchResponse,\n                    construct_type(\n                        type_=GetBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def cancel(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[CancelBatchResponse]:\n        \"\"\"\n        Cancels an in-progress batch\n\n        Parameters\n        ----------\n        id : str\n            The batch ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CancelBatchResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v2/batches/{jsonable_encoder(id)}:cancel\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CancelBatchResponse,\n                    construct_type(\n                        type_=CancelBatchResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/batches/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .batch import Batch\n    from .batch_status import BatchStatus\n    from .cancel_batch_response import CancelBatchResponse\n    from .create_batch_response import CreateBatchResponse\n    from .get_batch_response import GetBatchResponse\n    from .list_batches_response import ListBatchesResponse\n_dynamic_imports: typing.Dict[str, str] = {\n    \"Batch\": \".batch\",\n    \"BatchStatus\": \".batch_status\",\n    \"CancelBatchResponse\": \".cancel_batch_response\",\n    \"CreateBatchResponse\": \".create_batch_response\",\n    \"GetBatchResponse\": \".get_batch_response\",\n    \"ListBatchesResponse\": \".list_batches_response\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"Batch\",\n    \"BatchStatus\",\n    \"CancelBatchResponse\",\n    \"CreateBatchResponse\",\n    \"GetBatchResponse\",\n    \"ListBatchesResponse\",\n]\n"
  },
  {
    "path": "src/cohere/batches/types/batch.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom .batch_status import BatchStatus\n\n\nclass Batch(UncheckedBaseModel):\n    \"\"\"\n    This resource represents a batch job.\n    \"\"\"\n\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Batch ID.\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    Batch name (e.g. `foobar`).\n    \"\"\"\n\n    creator_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. User ID of the creator.\n    \"\"\"\n\n    org_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Organization ID.\n    \"\"\"\n\n    status: typing.Optional[BatchStatus] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Current stage in the life-cycle of the batch.\n    \"\"\"\n\n    created_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Creation timestamp.\n    \"\"\"\n\n    updated_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Latest update timestamp.\n    \"\"\"\n\n    input_dataset_id: str = pydantic.Field()\n    \"\"\"\n    ID of the dataset the batch reads inputs from.\n    \"\"\"\n\n    output_dataset_id: typing.Optional[str] = None\n    input_tokens: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The total number of input tokens in the batch.\n    \"\"\"\n\n    output_tokens: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The total number of output tokens in the batch.\n    \"\"\"\n\n    model: str = pydantic.Field()\n    \"\"\"\n    The name of the model the batch uses.\n    \"\"\"\n\n    num_records: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The total number of records in the batch.\n    \"\"\"\n\n    num_successful_records: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The current number of successful records in the batch.\n    \"\"\"\n\n    num_failed_records: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The current number of failed records in the batch.\n    \"\"\"\n\n    status_reason: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. More details about the reason for the status of a batch job.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/batches/types/batch_status.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nBatchStatus = typing.Union[\n    typing.Literal[\n        \"BATCH_STATUS_UNSPECIFIED\",\n        \"BATCH_STATUS_QUEUED\",\n        \"BATCH_STATUS_IN_PROGRESS\",\n        \"BATCH_STATUS_CANCELING\",\n        \"BATCH_STATUS_COMPLETED\",\n        \"BATCH_STATUS_FAILED\",\n        \"BATCH_STATUS_CANCELED\",\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/batches/types/cancel_batch_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nCancelBatchResponse = typing.Dict[str, typing.Any]\n\"\"\"\nResponse to a request to cancel a batch.\n\"\"\"\n"
  },
  {
    "path": "src/cohere/batches/types/create_batch_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom .batch import Batch\n\n\nclass CreateBatchResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to request to create a batch.\n    \"\"\"\n\n    batch: Batch = pydantic.Field()\n    \"\"\"\n    Information about the batch.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/batches/types/get_batch_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom .batch import Batch\n\n\nclass GetBatchResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to get a batch.\n    \"\"\"\n\n    batch: Batch = pydantic.Field()\n    \"\"\"\n    Information about the batch.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/batches/types/list_batches_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom .batch import Batch\n\n\nclass ListBatchesResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to list batches.\n    \"\"\"\n\n    batches: typing.Optional[typing.List[Batch]] = pydantic.Field(default=None)\n    \"\"\"\n    The batches that belong to the authenticated user.\n    \"\"\"\n\n    next_page_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    A token, which can be sent as `page_token` to retrieve the next page.\n    If this field is omitted, there are no subsequent pages.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/bedrock_client.py",
    "content": "import typing\n\nfrom tokenizers import Tokenizer  # type: ignore\n\nfrom .aws_client import AwsClient, AwsClientV2\n\n\nclass BedrockClient(AwsClient):\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n    ):\n        AwsClient.__init__(\n            self,\n            service=\"bedrock\",\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n            timeout=timeout,\n        )\n\n    def rerank(self, *, query, documents, model = ..., top_n = ..., rank_fields = ..., return_documents = ..., max_chunks_per_doc = ..., request_options = None):\n        raise NotImplementedError(\"Please use cohere.BedrockClientV2 instead: Rerank API on Bedrock is not supported with cohere.BedrockClient for this model.\")\n\nclass BedrockClientV2(AwsClientV2):\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n    ):\n        AwsClientV2.__init__(\n            self,\n            service=\"bedrock\",\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n            timeout=timeout,\n        )\n"
  },
  {
    "path": "src/cohere/client.py",
    "content": "import asyncio\nimport os\nimport typing\nfrom concurrent.futures import ThreadPoolExecutor\nfrom tokenizers import Tokenizer  # type: ignore\nimport logging\n\nimport httpx\n\nfrom cohere.types.detokenize_response import DetokenizeResponse\nfrom cohere.types.tokenize_response import TokenizeResponse\n\nfrom . import EmbedResponse, EmbedInputType, EmbeddingType, EmbedRequestTruncate\nfrom .base_client import BaseCohere, AsyncBaseCohere, OMIT\nfrom .config import embed_batch_size, embed_stream_batch_size\nfrom .core import RequestOptions\nfrom .environment import ClientEnvironment\nfrom .manually_maintained.cache import CacheMixin\nfrom .manually_maintained import tokenizers as local_tokenizers\nfrom .overrides import run_overrides\nfrom .utils import wait, async_wait, merge_embed_responses, SyncSdkUtils, AsyncSdkUtils\n\nlogger = logging.getLogger(__name__)\nrun_overrides()\n\n# Use NoReturn as Never type for compatibility\nNever = typing.NoReturn\n\n\ndef validate_args(obj: typing.Any, method_name: str, check_fn: typing.Callable[[typing.Any], typing.Any]) -> None:\n    method = getattr(obj, method_name)\n\n    def _wrapped(*args: typing.Any, **kwargs: typing.Any) -> typing.Any:\n        check_fn(*args, **kwargs)\n        return method(*args, **kwargs)\n\n    async def _async_wrapped(*args: typing.Any, **kwargs: typing.Any) -> typing.Any:\n        # The `return await` looks redundant, but it's necessary to ensure that the return type is correct.\n        check_fn(*args, **kwargs)\n        return await method(*args, **kwargs)\n\n    wrapped = _wrapped\n    if asyncio.iscoroutinefunction(method):\n        wrapped = _async_wrapped\n\n    wrapped.__name__ = method.__name__\n    wrapped.__doc__ = method.__doc__\n    setattr(obj, method_name, wrapped)\n\n\ndef throw_if_stream_is_true(*args, **kwargs) -> None:\n    if kwargs.get(\"stream\") is True:\n        raise ValueError(\n            \"Since python sdk cohere==5.0.0, you must now use chat_stream(...) instead of chat(stream=True, ...)\"\n        )\n\n\ndef moved_function(fn_name: str, new_fn_name: str) -> typing.Any:\n    \"\"\"\n    This method is moved. Please update usage.\n    \"\"\"\n\n    def fn(*args, **kwargs):\n        raise ValueError(\n            f\"Since python sdk cohere==5.0.0, the function {fn_name}(...) has been moved to {new_fn_name}(...). \"\n            f\"Please update your code. Issues may be filed in https://github.com/cohere-ai/cohere-python/issues.\"\n        )\n\n    return fn\n\n\ndef deprecated_function(fn_name: str) -> typing.Any:\n    \"\"\"\n    This method is deprecated. Please update usage.\n    \"\"\"\n\n    def fn(*args, **kwargs):\n        raise ValueError(\n            f\"Since python sdk cohere==5.0.0, the function {fn_name}(...) has been deprecated. \"\n            f\"Please update your code. Issues may be filed in https://github.com/cohere-ai/cohere-python/issues.\"\n        )\n\n    return fn\n\n\n# Logs a warning when a user calls a function with an experimental parameter (kwarg in our case)\n# `deprecated_kwarg` is the name of the experimental parameter, which can be a dot-separated string for nested parameters\ndef experimental_kwarg_decorator(func, deprecated_kwarg):\n    # Recursive utility function to check if a kwarg is present in the kwargs.\n    def check_kwarg(deprecated_kwarg: str, kwargs: typing.Dict[str, typing.Any]) -> bool:\n        if \".\" in deprecated_kwarg:\n            key, rest = deprecated_kwarg.split(\".\", 1)\n            if key in kwargs:\n                return check_kwarg(rest, kwargs[key])\n        return deprecated_kwarg in kwargs\n\n    def _wrapped(*args, **kwargs):\n        if check_kwarg(deprecated_kwarg, kwargs):\n            logger.warning(\n                f\"The `{deprecated_kwarg}` parameter is an experimental feature and may change in future releases.\\n\"\n                \"To suppress this warning, set `log_warning_experimental_features=False` when initializing the client.\"\n            )\n        return func(*args, **kwargs)\n\n    async def _async_wrapped(*args, **kwargs):\n        if check_kwarg(deprecated_kwarg, kwargs):\n            logger.warning(\n                f\"The `{deprecated_kwarg}` parameter is an experimental feature and may change in future releases.\\n\"\n                \"To suppress this warning, set `log_warning_experimental_features=False` when initializing the client.\"\n            )\n        return await func(*args, **kwargs)\n\n    wrap = _wrapped\n    if asyncio.iscoroutinefunction(func):\n        wrap = _async_wrapped\n\n    wrap.__name__ = func.__name__\n    wrap.__doc__ = func.__doc__\n\n    return wrap\n\n\ndef fix_base_url(base_url: typing.Optional[str]) -> typing.Optional[str]:\n    if base_url is not None:\n        if \"cohere.com\" in base_url or \"cohere.ai\" in base_url:\n            return base_url.replace(\"/v1\", \"\")\n        return base_url\n    return None\n\n\nclass Client(BaseCohere, CacheMixin):\n    _executor: ThreadPoolExecutor\n\n    def __init__(\n        self,\n        api_key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None,\n        *,\n        base_url: typing.Optional[str] = os.getenv(\"CO_API_URL\"),\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        timeout: typing.Optional[float] = None,\n        httpx_client: typing.Optional[httpx.Client] = None,\n        thread_pool_executor: ThreadPoolExecutor = ThreadPoolExecutor(64),\n        log_warning_experimental_features: bool = True,\n    ):\n        if api_key is None:\n            api_key = _get_api_key_from_environment()\n\n        base_url = fix_base_url(base_url)\n\n        self._executor = thread_pool_executor\n\n        BaseCohere.__init__(\n            self,\n            base_url=base_url,\n            environment=environment,\n            client_name=client_name,\n            token=api_key,\n            timeout=timeout,\n            httpx_client=httpx_client,\n        )\n\n        validate_args(self, \"chat\", throw_if_stream_is_true)\n        if log_warning_experimental_features:\n            self.chat = experimental_kwarg_decorator(self.chat, \"response_format.schema\")  # type: ignore\n            self.chat_stream = experimental_kwarg_decorator(self.chat_stream, \"response_format.schema\")  # type: ignore\n\n    utils = SyncSdkUtils()\n\n    # support context manager until Fern upstreams\n    # https://linear.app/buildwithfern/issue/FER-1242/expose-a-context-manager-interface-or-the-http-client-easily\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_value, traceback):\n        self._client_wrapper.httpx_client.httpx_client.close()\n\n    wait = wait\n\n    def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n        batching: typing.Optional[bool] = True,\n    ) -> EmbedResponse:\n        # skip batching for images for now\n        if batching is False or images is not OMIT:\n            return BaseCohere.embed(\n                self,\n                texts=texts,\n                images=images,\n                model=model,\n                input_type=input_type,\n                embedding_types=embedding_types,\n                truncate=truncate,\n                request_options=request_options,\n            )\n\n        textsarr: typing.Sequence[str]  = texts if texts is not OMIT and texts is not None else []\n        texts_batches = [textsarr[i : i + embed_batch_size] for i in range(0, len(textsarr), embed_batch_size)]\n\n        responses = [\n            response\n            for response in self._executor.map(\n                lambda text_batch: BaseCohere.embed(\n                    self,\n                    texts=text_batch,\n                    model=model,\n                    input_type=input_type,\n                    embedding_types=embedding_types,\n                    truncate=truncate,\n                    request_options=request_options,\n                ),\n                texts_batches,\n            )\n        ]\n\n        return merge_embed_responses(responses)\n\n    def embed_stream(\n        self,\n        *,\n        texts: typing.Sequence[str],\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        batch_size: int = embed_stream_batch_size,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[typing.Any]:\n        \"\"\"\n        Memory-efficient embed that yields embeddings one batch at a time.\n\n        Processes texts in batches and yields individual StreamedEmbedding objects\n        as they come back, so you can write to a vector store incrementally without\n        holding all embeddings in memory.\n\n        Args:\n            texts: Texts to embed.\n            model: Embedding model ID.\n            input_type: Input type (search_document, search_query, etc.).\n            embedding_types: Types of embeddings to return (float, int8, etc.).\n            truncate: How to handle inputs longer than the max token length.\n            batch_size: Texts per API call. Defaults to 96 (API max).\n            request_options: Request-specific configuration.\n\n        Yields:\n            StreamedEmbedding with index, embedding, embedding_type, and text.\n        \"\"\"\n        from .manually_maintained.streaming_embed import extract_embeddings_from_response\n\n        if not texts:\n            return\n        if batch_size < 1:\n            raise ValueError(\"batch_size must be at least 1\")\n\n        texts_list = list(texts)\n\n        for batch_start in range(0, len(texts_list), batch_size):\n            batch_texts = texts_list[batch_start : batch_start + batch_size]\n\n            response = BaseCohere.embed(\n                self,\n                texts=batch_texts,\n                model=model,\n                input_type=input_type,\n                embedding_types=embedding_types,\n                truncate=truncate,\n                request_options=request_options,\n            )\n\n            response_data = response.dict() if hasattr(response, \"dict\") else response.__dict__\n            yield from extract_embeddings_from_response(response_data, batch_texts, batch_start)\n\n    \"\"\"\n    The following methods have been moved or deprecated in cohere==5.0.0. Please update your usage.\n    Issues may be filed in https://github.com/cohere-ai/cohere-python/issues.\n    \"\"\"\n    check_api_key: Never = deprecated_function(\"check_api_key\")\n    loglikelihood: Never = deprecated_function(\"loglikelihood\")\n    batch_generate: Never = deprecated_function(\"batch_generate\")\n    codebook: Never = deprecated_function(\"codebook\")\n    batch_tokenize: Never = deprecated_function(\"batch_tokenize\")\n    batch_detokenize: Never = deprecated_function(\"batch_detokenize\")\n    detect_language: Never = deprecated_function(\"detect_language\")\n    generate_feedback: Never = deprecated_function(\"generate_feedback\")\n    generate_preference_feedback: Never = deprecated_function(\"generate_preference_feedback\")\n    create_dataset: Never = moved_function(\"create_dataset\", \".datasets.create\")\n    get_dataset: Never = moved_function(\"get_dataset\", \".datasets.get\")\n    list_datasets: Never = moved_function(\"list_datasets\", \".datasets.list\")\n    delete_dataset: Never = moved_function(\"delete_dataset\", \".datasets.delete\")\n    get_dataset_usage: Never = moved_function(\"get_dataset_usage\", \".datasets.get_usage\")\n    wait_for_dataset: Never = moved_function(\"wait_for_dataset\", \".wait\")\n    _check_response: Never = deprecated_function(\"_check_response\")\n    _request: Never = deprecated_function(\"_request\")\n    create_cluster_job: Never = deprecated_function(\"create_cluster_job\")\n    get_cluster_job: Never = deprecated_function(\"get_cluster_job\")\n    list_cluster_jobs: Never = deprecated_function(\"list_cluster_jobs\")\n    wait_for_cluster_job: Never = deprecated_function(\"wait_for_cluster_job\")\n    create_embed_job: Never = moved_function(\"create_embed_job\", \".embed_jobs.create\")\n    list_embed_jobs: Never = moved_function(\"list_embed_jobs\", \".embed_jobs.list\")\n    get_embed_job: Never = moved_function(\"get_embed_job\", \".embed_jobs.get\")\n    cancel_embed_job: Never = moved_function(\"cancel_embed_job\", \".embed_jobs.cancel\")\n    wait_for_embed_job: Never = moved_function(\"wait_for_embed_job\", \".wait\")\n    create_custom_model: Never = deprecated_function(\"create_custom_model\")\n    wait_for_custom_model: Never = deprecated_function(\"wait_for_custom_model\")\n    _upload_dataset: Never = deprecated_function(\"_upload_dataset\")\n    _create_signed_url: Never = deprecated_function(\"_create_signed_url\")\n    get_custom_model: Never = deprecated_function(\"get_custom_model\")\n    get_custom_model_by_name: Never = deprecated_function(\"get_custom_model_by_name\")\n    get_custom_model_metrics: Never = deprecated_function(\"get_custom_model_metrics\")\n    list_custom_models: Never = deprecated_function(\"list_custom_models\")\n    create_connector: Never = moved_function(\"create_connector\", \".connectors.create\")\n    update_connector: Never = moved_function(\"update_connector\", \".connectors.update\")\n    get_connector: Never = moved_function(\"get_connector\", \".connectors.get\")\n    list_connectors: Never = moved_function(\"list_connectors\", \".connectors.list\")\n    delete_connector: Never = moved_function(\"delete_connector\", \".connectors.delete\")\n    oauth_authorize_connector: Never = moved_function(\"oauth_authorize_connector\", \".connectors.o_auth_authorize\")\n\n    def tokenize(\n        self,\n        *,\n        text: str,\n        model: str,\n        request_options: typing.Optional[RequestOptions] = None,\n        offline: bool = True,\n    ) -> TokenizeResponse:\n        # `offline` parameter controls whether to use an offline tokenizer. If set to True, the tokenizer config will be downloaded (and cached),\n        # and the request will be processed using the offline tokenizer. If set to False, the request will be processed using the API. The default value is True.\n        opts: RequestOptions = request_options or {}  # type: ignore\n\n        if offline:\n            try:\n                tokens = local_tokenizers.local_tokenize(self, text=text, model=model)\n                return TokenizeResponse(tokens=tokens, token_strings=[])\n            except Exception:\n                # Fallback to calling the API.\n                opts[\"additional_headers\"] = opts.get(\"additional_headers\", {})\n                opts[\"additional_headers\"][\"sdk-api-warning-message\"] = \"offline_tokenizer_failed\"\n        return super().tokenize(text=text, model=model, request_options=opts)\n\n    def detokenize(\n        self,\n        *,\n        tokens: typing.Sequence[int],\n        model: str,\n        request_options: typing.Optional[RequestOptions] = None,\n        offline: typing.Optional[bool] = True,\n    ) -> DetokenizeResponse:\n        # `offline` parameter controls whether to use an offline tokenizer. If set to True, the tokenizer config will be downloaded (and cached),\n        # and the request will be processed using the offline tokenizer. If set to False, the request will be processed using the API. The default value is True.\n        opts: RequestOptions = request_options or {}  # type: ignore\n\n        if offline:\n            try:\n                text = local_tokenizers.local_detokenize(self, model=model, tokens=tokens)\n                return DetokenizeResponse(text=text)\n            except Exception:\n                # Fallback to calling the API.\n                opts[\"additional_headers\"] = opts.get(\"additional_headers\", {})\n                opts[\"additional_headers\"][\"sdk-api-warning-message\"] = \"offline_tokenizer_failed\"\n\n        return super().detokenize(tokens=tokens, model=model, request_options=opts)\n\n    def fetch_tokenizer(self, *, model: str) -> Tokenizer:\n        \"\"\"\n        Returns a Hugging Face tokenizer from a given model name.\n        \"\"\"\n        return local_tokenizers.get_hf_tokenizer(self, model)\n\n\nclass AsyncClient(AsyncBaseCohere, CacheMixin):\n    _executor: ThreadPoolExecutor\n\n    def __init__(\n        self,\n        api_key: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = None,\n        *,\n        base_url: typing.Optional[str] = os.getenv(\"CO_API_URL\"),\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        timeout: typing.Optional[float] = None,\n        httpx_client: typing.Optional[httpx.AsyncClient] = None,\n        thread_pool_executor: ThreadPoolExecutor = ThreadPoolExecutor(64),\n        log_warning_experimental_features: bool = True,\n    ):\n        if api_key is None:\n            api_key = _get_api_key_from_environment()\n\n        base_url = fix_base_url(base_url)\n\n        self._executor = thread_pool_executor\n\n        AsyncBaseCohere.__init__(\n            self,\n            base_url=base_url,\n            environment=environment,\n            client_name=client_name,\n            token=api_key,\n            timeout=timeout,\n            httpx_client=httpx_client,\n        )\n\n        validate_args(self, \"chat\", throw_if_stream_is_true)\n        if log_warning_experimental_features:\n            self.chat = experimental_kwarg_decorator(self.chat, \"response_format.schema\")  # type: ignore\n            self.chat_stream = experimental_kwarg_decorator(self.chat_stream, \"response_format.schema\")  # type: ignore\n\n    utils = AsyncSdkUtils()\n\n    # support context manager until Fern upstreams\n    # https://linear.app/buildwithfern/issue/FER-1242/expose-a-context-manager-interface-or-the-http-client-easily\n    async def __aenter__(self):\n        return self\n\n    async def __aexit__(self, exc_type, exc_value, traceback):\n        await self._client_wrapper.httpx_client.httpx_client.aclose()\n\n    wait = async_wait\n\n    async def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n        batching: typing.Optional[bool] = True,\n    ) -> EmbedResponse:\n        # skip batching for images for now\n        if batching is False or images is not OMIT:\n            return await AsyncBaseCohere.embed(\n                self,\n                texts=texts,\n                images=images,\n                model=model,\n                input_type=input_type,\n                embedding_types=embedding_types,\n                truncate=truncate,\n                request_options=request_options,\n            )\n\n        textsarr: typing.Sequence[str]  = texts if texts is not OMIT and texts is not None else []\n        texts_batches = [textsarr[i : i + embed_batch_size] for i in range(0, len(textsarr), embed_batch_size)]\n\n        responses = typing.cast(\n            typing.List[EmbedResponse],\n            await asyncio.gather(\n                *[\n                    AsyncBaseCohere.embed(\n                        self,\n                        texts=text_batch,\n                        model=model,\n                        input_type=input_type,\n                        embedding_types=embedding_types,\n                        truncate=truncate,\n                        request_options=request_options,\n                    )\n                    for text_batch in texts_batches\n                ]\n            ),\n        )\n\n        return merge_embed_responses(responses)\n\n    \"\"\"\n    The following methods have been moved or deprecated in cohere==5.0.0. Please update your usage.\n    Issues may be filed in https://github.com/cohere-ai/cohere-python/issues.\n    \"\"\"\n    check_api_key: Never = deprecated_function(\"check_api_key\")\n    loglikelihood: Never = deprecated_function(\"loglikelihood\")\n    batch_generate: Never = deprecated_function(\"batch_generate\")\n    codebook: Never = deprecated_function(\"codebook\")\n    batch_tokenize: Never = deprecated_function(\"batch_tokenize\")\n    batch_detokenize: Never = deprecated_function(\"batch_detokenize\")\n    detect_language: Never = deprecated_function(\"detect_language\")\n    generate_feedback: Never = deprecated_function(\"generate_feedback\")\n    generate_preference_feedback: Never = deprecated_function(\"generate_preference_feedback\")\n    create_dataset: Never = moved_function(\"create_dataset\", \".datasets.create\")\n    get_dataset: Never = moved_function(\"get_dataset\", \".datasets.get\")\n    list_datasets: Never = moved_function(\"list_datasets\", \".datasets.list\")\n    delete_dataset: Never = moved_function(\"delete_dataset\", \".datasets.delete\")\n    get_dataset_usage: Never = moved_function(\"get_dataset_usage\", \".datasets.get_usage\")\n    wait_for_dataset: Never = moved_function(\"wait_for_dataset\", \".wait\")\n    _check_response: Never = deprecated_function(\"_check_response\")\n    _request: Never = deprecated_function(\"_request\")\n    create_cluster_job: Never = deprecated_function(\"create_cluster_job\")\n    get_cluster_job: Never = deprecated_function(\"get_cluster_job\")\n    list_cluster_jobs: Never = deprecated_function(\"list_cluster_jobs\")\n    wait_for_cluster_job: Never = deprecated_function(\"wait_for_cluster_job\")\n    create_embed_job: Never = moved_function(\"create_embed_job\", \".embed_jobs.create\")\n    list_embed_jobs: Never = moved_function(\"list_embed_jobs\", \".embed_jobs.list\")\n    get_embed_job: Never = moved_function(\"get_embed_job\", \".embed_jobs.get\")\n    cancel_embed_job: Never = moved_function(\"cancel_embed_job\", \".embed_jobs.cancel\")\n    wait_for_embed_job: Never = moved_function(\"wait_for_embed_job\", \".wait\")\n    create_custom_model: Never = deprecated_function(\"create_custom_model\")\n    wait_for_custom_model: Never = deprecated_function(\"wait_for_custom_model\")\n    _upload_dataset: Never = deprecated_function(\"_upload_dataset\")\n    _create_signed_url: Never = deprecated_function(\"_create_signed_url\")\n    get_custom_model: Never = deprecated_function(\"get_custom_model\")\n    get_custom_model_by_name: Never = deprecated_function(\"get_custom_model_by_name\")\n    get_custom_model_metrics: Never = deprecated_function(\"get_custom_model_metrics\")\n    list_custom_models: Never = deprecated_function(\"list_custom_models\")\n    create_connector: Never = moved_function(\"create_connector\", \".connectors.create\")\n    update_connector: Never = moved_function(\"update_connector\", \".connectors.update\")\n    get_connector: Never = moved_function(\"get_connector\", \".connectors.get\")\n    list_connectors: Never = moved_function(\"list_connectors\", \".connectors.list\")\n    delete_connector: Never = moved_function(\"delete_connector\", \".connectors.delete\")\n    oauth_authorize_connector: Never = moved_function(\"oauth_authorize_connector\", \".connectors.o_auth_authorize\")\n\n    async def tokenize(\n        self,\n        *,\n        text: str,\n        model: str,\n        request_options: typing.Optional[RequestOptions] = None,\n        offline: typing.Optional[bool] = True,\n    ) -> TokenizeResponse:\n        # `offline` parameter controls whether to use an offline tokenizer. If set to True, the tokenizer config will be downloaded (and cached),\n        # and the request will be processed using the offline tokenizer. If set to False, the request will be processed using the API. The default value is True.\n        opts: RequestOptions = request_options or {}  # type: ignore\n        if offline:\n            try:\n                tokens = await local_tokenizers.async_local_tokenize(self, model=model, text=text)\n                return TokenizeResponse(tokens=tokens, token_strings=[])\n            except Exception:\n                opts[\"additional_headers\"] = opts.get(\"additional_headers\", {})\n                opts[\"additional_headers\"][\"sdk-api-warning-message\"] = \"offline_tokenizer_failed\"\n\n        return await super().tokenize(text=text, model=model, request_options=opts)\n\n    async def detokenize(\n        self,\n        *,\n        tokens: typing.Sequence[int],\n        model: str,\n        request_options: typing.Optional[RequestOptions] = None,\n        offline: typing.Optional[bool] = True,\n    ) -> DetokenizeResponse:\n        # `offline` parameter controls whether to use an offline tokenizer. If set to True, the tokenizer config will be downloaded (and cached),\n        # and the request will be processed using the offline tokenizer. If set to False, the request will be processed using the API. The default value is True.\n        opts: RequestOptions = request_options or {}  # type: ignore\n        if offline:\n            try:\n                text = await local_tokenizers.async_local_detokenize(self, model=model, tokens=tokens)\n                return DetokenizeResponse(text=text)\n            except Exception:\n                opts[\"additional_headers\"] = opts.get(\"additional_headers\", {})\n                opts[\"additional_headers\"][\"sdk-api-warning-message\"] = \"offline_tokenizer_failed\"\n\n        return await super().detokenize(tokens=tokens, model=model, request_options=opts)\n\n    async def fetch_tokenizer(self, *, model: str) -> Tokenizer:\n        \"\"\"\n        Returns a Hugging Face tokenizer from a given model name.\n        \"\"\"\n        return await local_tokenizers.async_get_hf_tokenizer(self, model)\n\n\ndef _get_api_key_from_environment() -> typing.Optional[str]:\n    \"\"\"\n    Retrieves the Cohere API key from specific environment variables.\n    CO_API_KEY is preferred (and documented) COHERE_API_KEY is accepted (but not documented).\n    \"\"\"\n    return os.getenv(\"CO_API_KEY\", os.getenv(\"COHERE_API_KEY\"))\n"
  },
  {
    "path": "src/cohere/client_v2.py",
    "content": "import os\nimport typing\nfrom concurrent.futures import ThreadPoolExecutor\n\nimport httpx\nfrom .client import AsyncClient, Client\nfrom .environment import ClientEnvironment\nfrom .v2.client import AsyncRawV2Client, AsyncV2Client, RawV2Client, V2Client\n\n\nclass _CombinedRawClient:\n    \"\"\"Proxy that combines v1 and v2 raw clients.\n\n    V2Client and Client both assign to self._raw_client in __init__,\n    causing a collision when combined in ClientV2/AsyncClientV2.\n    This proxy delegates to v2 first, falling back to v1 for\n    legacy methods like generate_stream.\n    \"\"\"\n\n    def __init__(self, v1_raw_client: typing.Any, v2_raw_client: typing.Any):\n        self._v1 = v1_raw_client\n        self._v2 = v2_raw_client\n\n    def __getattr__(self, name: str) -> typing.Any:\n        try:\n            return getattr(self._v2, name)\n        except AttributeError:\n            return getattr(self._v1, name)\n\n\nclass ClientV2(V2Client, Client):  # type: ignore\n    def __init__(\n        self,\n        api_key: typing.Optional[typing.Union[str,\n                                              typing.Callable[[], str]]] = None,\n        *,\n        base_url: typing.Optional[str] = os.getenv(\"CO_API_URL\"),\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        timeout: typing.Optional[float] = None,\n        httpx_client: typing.Optional[httpx.Client] = None,\n        thread_pool_executor: ThreadPoolExecutor = ThreadPoolExecutor(64),\n        log_warning_experimental_features: bool = True,\n    ):\n        Client.__init__(\n            self,\n            api_key=api_key,\n            base_url=base_url,\n            environment=environment,\n            client_name=client_name,\n            timeout=timeout,\n            httpx_client=httpx_client,\n            thread_pool_executor=thread_pool_executor,\n            log_warning_experimental_features=log_warning_experimental_features,\n        )\n        v1_raw = self._raw_client\n        V2Client.__init__(\n            self,\n            client_wrapper=self._client_wrapper\n        )\n        self._raw_client = typing.cast(RawV2Client, _CombinedRawClient(v1_raw, self._raw_client))\n\n\nclass AsyncClientV2(AsyncV2Client, AsyncClient):  # type: ignore\n    def __init__(\n        self,\n        api_key: typing.Optional[typing.Union[str,\n                                              typing.Callable[[], str]]] = None,\n        *,\n        base_url: typing.Optional[str] = os.getenv(\"CO_API_URL\"),\n        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,\n        client_name: typing.Optional[str] = None,\n        timeout: typing.Optional[float] = None,\n        httpx_client: typing.Optional[httpx.AsyncClient] = None,\n        thread_pool_executor: ThreadPoolExecutor = ThreadPoolExecutor(64),\n        log_warning_experimental_features: bool = True,\n    ):\n        AsyncClient.__init__(\n            self,\n            api_key=api_key,\n            base_url=base_url,\n            environment=environment,\n            client_name=client_name,\n            timeout=timeout,\n            httpx_client=httpx_client,\n            thread_pool_executor=thread_pool_executor,\n            log_warning_experimental_features=log_warning_experimental_features,\n        )\n        v1_raw = self._raw_client\n        AsyncV2Client.__init__(\n            self,\n            client_wrapper=self._client_wrapper\n        )\n        self._raw_client = typing.cast(AsyncRawV2Client, _CombinedRawClient(v1_raw, self._raw_client))\n"
  },
  {
    "path": "src/cohere/config.py",
    "content": "embed_batch_size = 96\nembed_stream_batch_size = 96  # Max texts per API request (API limit)\n"
  },
  {
    "path": "src/cohere/connectors/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\n"
  },
  {
    "path": "src/cohere/connectors/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom ..types.create_connector_o_auth import CreateConnectorOAuth\nfrom ..types.create_connector_response import CreateConnectorResponse\nfrom ..types.create_connector_service_auth import CreateConnectorServiceAuth\nfrom ..types.delete_connector_response import DeleteConnectorResponse\nfrom ..types.get_connector_response import GetConnectorResponse\nfrom ..types.list_connectors_response import ListConnectorsResponse\nfrom ..types.o_auth_authorize_response import OAuthAuthorizeResponse\nfrom ..types.update_connector_response import UpdateConnectorResponse\nfrom .raw_client import AsyncRawConnectorsClient, RawConnectorsClient\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass ConnectorsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawConnectorsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawConnectorsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawConnectorsClient\n        \"\"\"\n        return self._raw_client\n\n    def list(\n        self,\n        *,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListConnectorsResponse:\n        \"\"\"\n        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        limit : typing.Optional[float]\n            Maximum number of connectors to return [0, 100].\n\n        offset : typing.Optional[float]\n            Number of connectors to skip before returning results [0, inf].\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListConnectorsResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.list(\n            limit=1.1,\n            offset=1.1,\n        )\n        \"\"\"\n        _response = self._raw_client.list(limit=limit, offset=offset, request_options=request_options)\n        return _response.data\n\n    def create(\n        self,\n        *,\n        name: str,\n        url: str,\n        description: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> CreateConnectorResponse:\n        \"\"\"\n        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) for more information.\n\n        Parameters\n        ----------\n        name : str\n            A human-readable name for the connector.\n\n        url : str\n            The URL of the connector that will be used to search for documents.\n\n        description : typing.Optional[str]\n            A description of the connector.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n            Whether the connector is active or not.\n\n        continue_on_failure : typing.Optional[bool]\n            Whether a chat request should continue or not if the request to this connector fails.\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateConnectorResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.create(\n            name=\"name\",\n            url=\"url\",\n        )\n        \"\"\"\n        _response = self._raw_client.create(\n            name=name,\n            url=url,\n            description=description,\n            excludes=excludes,\n            oauth=oauth,\n            active=active,\n            continue_on_failure=continue_on_failure,\n            service_auth=service_auth,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetConnectorResponse:\n        \"\"\"\n        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetConnectorResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.get(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    def delete(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DeleteConnectorResponse:\n        \"\"\"\n        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to delete.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DeleteConnectorResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.delete(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.delete(id, request_options=request_options)\n        return _response.data\n\n    def update(\n        self,\n        id: str,\n        *,\n        name: typing.Optional[str] = OMIT,\n        url: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> UpdateConnectorResponse:\n        \"\"\"\n        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to update.\n\n        name : typing.Optional[str]\n            A human-readable name for the connector.\n\n        url : typing.Optional[str]\n            The URL of the connector that will be used to search for documents.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n\n        continue_on_failure : typing.Optional[bool]\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        UpdateConnectorResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.update(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.update(\n            id,\n            name=name,\n            url=url,\n            excludes=excludes,\n            oauth=oauth,\n            active=active,\n            continue_on_failure=continue_on_failure,\n            service_auth=service_auth,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def o_auth_authorize(\n        self,\n        id: str,\n        *,\n        after_token_redirect: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> OAuthAuthorizeResponse:\n        \"\"\"\n        Authorize the connector with the given ID for the connector oauth app.  See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to authorize.\n\n        after_token_redirect : typing.Optional[str]\n            The URL to redirect to after the connector has been authorized.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        OAuthAuthorizeResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.connectors.o_auth_authorize(\n            id=\"id\",\n            after_token_redirect=\"after_token_redirect\",\n        )\n        \"\"\"\n        _response = self._raw_client.o_auth_authorize(\n            id, after_token_redirect=after_token_redirect, request_options=request_options\n        )\n        return _response.data\n\n\nclass AsyncConnectorsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawConnectorsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawConnectorsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawConnectorsClient\n        \"\"\"\n        return self._raw_client\n\n    async def list(\n        self,\n        *,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListConnectorsResponse:\n        \"\"\"\n        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        limit : typing.Optional[float]\n            Maximum number of connectors to return [0, 100].\n\n        offset : typing.Optional[float]\n            Number of connectors to skip before returning results [0, inf].\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListConnectorsResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.list(\n                limit=1.1,\n                offset=1.1,\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list(limit=limit, offset=offset, request_options=request_options)\n        return _response.data\n\n    async def create(\n        self,\n        *,\n        name: str,\n        url: str,\n        description: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> CreateConnectorResponse:\n        \"\"\"\n        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) for more information.\n\n        Parameters\n        ----------\n        name : str\n            A human-readable name for the connector.\n\n        url : str\n            The URL of the connector that will be used to search for documents.\n\n        description : typing.Optional[str]\n            A description of the connector.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n            Whether the connector is active or not.\n\n        continue_on_failure : typing.Optional[bool]\n            Whether a chat request should continue or not if the request to this connector fails.\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateConnectorResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.create(\n                name=\"name\",\n                url=\"url\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create(\n            name=name,\n            url=url,\n            description=description,\n            excludes=excludes,\n            oauth=oauth,\n            active=active,\n            continue_on_failure=continue_on_failure,\n            service_auth=service_auth,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetConnectorResponse:\n        \"\"\"\n        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetConnectorResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.get(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    async def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> DeleteConnectorResponse:\n        \"\"\"\n        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to delete.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DeleteConnectorResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.delete(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.delete(id, request_options=request_options)\n        return _response.data\n\n    async def update(\n        self,\n        id: str,\n        *,\n        name: typing.Optional[str] = OMIT,\n        url: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> UpdateConnectorResponse:\n        \"\"\"\n        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to update.\n\n        name : typing.Optional[str]\n            A human-readable name for the connector.\n\n        url : typing.Optional[str]\n            The URL of the connector that will be used to search for documents.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n\n        continue_on_failure : typing.Optional[bool]\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        UpdateConnectorResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.update(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.update(\n            id,\n            name=name,\n            url=url,\n            excludes=excludes,\n            oauth=oauth,\n            active=active,\n            continue_on_failure=continue_on_failure,\n            service_auth=service_auth,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def o_auth_authorize(\n        self,\n        id: str,\n        *,\n        after_token_redirect: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> OAuthAuthorizeResponse:\n        \"\"\"\n        Authorize the connector with the given ID for the connector oauth app.  See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to authorize.\n\n        after_token_redirect : typing.Optional[str]\n            The URL to redirect to after the connector has been authorized.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        OAuthAuthorizeResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.connectors.o_auth_authorize(\n                id=\"id\",\n                after_token_redirect=\"after_token_redirect\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.o_auth_authorize(\n            id, after_token_redirect=after_token_redirect, request_options=request_options\n        )\n        return _response.data\n"
  },
  {
    "path": "src/cohere/connectors/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.serialization import convert_and_respect_annotation_metadata\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.client_closed_request_error import ClientClosedRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.gateway_timeout_error import GatewayTimeoutError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.invalid_token_error import InvalidTokenError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.not_implemented_error import NotImplementedError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.too_many_requests_error import TooManyRequestsError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom ..errors.unprocessable_entity_error import UnprocessableEntityError\nfrom ..types.create_connector_o_auth import CreateConnectorOAuth\nfrom ..types.create_connector_response import CreateConnectorResponse\nfrom ..types.create_connector_service_auth import CreateConnectorServiceAuth\nfrom ..types.delete_connector_response import DeleteConnectorResponse\nfrom ..types.get_connector_response import GetConnectorResponse\nfrom ..types.list_connectors_response import ListConnectorsResponse\nfrom ..types.o_auth_authorize_response import OAuthAuthorizeResponse\nfrom ..types.update_connector_response import UpdateConnectorResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawConnectorsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def list(\n        self,\n        *,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListConnectorsResponse]:\n        \"\"\"\n        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        limit : typing.Optional[float]\n            Maximum number of connectors to return [0, 100].\n\n        offset : typing.Optional[float]\n            Number of connectors to skip before returning results [0, inf].\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListConnectorsResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/connectors\",\n            method=\"GET\",\n            params={\n                \"limit\": limit,\n                \"offset\": offset,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListConnectorsResponse,\n                    construct_type(\n                        type_=ListConnectorsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def create(\n        self,\n        *,\n        name: str,\n        url: str,\n        description: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[CreateConnectorResponse]:\n        \"\"\"\n        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) for more information.\n\n        Parameters\n        ----------\n        name : str\n            A human-readable name for the connector.\n\n        url : str\n            The URL of the connector that will be used to search for documents.\n\n        description : typing.Optional[str]\n            A description of the connector.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n            Whether the connector is active or not.\n\n        continue_on_failure : typing.Optional[bool]\n            Whether a chat request should continue or not if the request to this connector fails.\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CreateConnectorResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/connectors\",\n            method=\"POST\",\n            json={\n                \"name\": name,\n                \"description\": description,\n                \"url\": url,\n                \"excludes\": excludes,\n                \"oauth\": convert_and_respect_annotation_metadata(\n                    object_=oauth, annotation=CreateConnectorOAuth, direction=\"write\"\n                ),\n                \"active\": active,\n                \"continue_on_failure\": continue_on_failure,\n                \"service_auth\": convert_and_respect_annotation_metadata(\n                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateConnectorResponse,\n                    construct_type(\n                        type_=CreateConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def get(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[GetConnectorResponse]:\n        \"\"\"\n        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[GetConnectorResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetConnectorResponse,\n                    construct_type(\n                        type_=GetConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[DeleteConnectorResponse]:\n        \"\"\"\n        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to delete.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DeleteConnectorResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DeleteConnectorResponse,\n                    construct_type(\n                        type_=DeleteConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def update(\n        self,\n        id: str,\n        *,\n        name: typing.Optional[str] = OMIT,\n        url: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[UpdateConnectorResponse]:\n        \"\"\"\n        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to update.\n\n        name : typing.Optional[str]\n            A human-readable name for the connector.\n\n        url : typing.Optional[str]\n            The URL of the connector that will be used to search for documents.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n\n        continue_on_failure : typing.Optional[bool]\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[UpdateConnectorResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"PATCH\",\n            json={\n                \"name\": name,\n                \"url\": url,\n                \"excludes\": excludes,\n                \"oauth\": convert_and_respect_annotation_metadata(\n                    object_=oauth, annotation=CreateConnectorOAuth, direction=\"write\"\n                ),\n                \"active\": active,\n                \"continue_on_failure\": continue_on_failure,\n                \"service_auth\": convert_and_respect_annotation_metadata(\n                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    UpdateConnectorResponse,\n                    construct_type(\n                        type_=UpdateConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def o_auth_authorize(\n        self,\n        id: str,\n        *,\n        after_token_redirect: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[OAuthAuthorizeResponse]:\n        \"\"\"\n        Authorize the connector with the given ID for the connector oauth app.  See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to authorize.\n\n        after_token_redirect : typing.Optional[str]\n            The URL to redirect to after the connector has been authorized.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[OAuthAuthorizeResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}/oauth/authorize\",\n            method=\"POST\",\n            params={\n                \"after_token_redirect\": after_token_redirect,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    OAuthAuthorizeResponse,\n                    construct_type(\n                        type_=OAuthAuthorizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawConnectorsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def list(\n        self,\n        *,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListConnectorsResponse]:\n        \"\"\"\n        Returns a list of connectors ordered by descending creation date (newer first). See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        limit : typing.Optional[float]\n            Maximum number of connectors to return [0, 100].\n\n        offset : typing.Optional[float]\n            Number of connectors to skip before returning results [0, inf].\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListConnectorsResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/connectors\",\n            method=\"GET\",\n            params={\n                \"limit\": limit,\n                \"offset\": offset,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListConnectorsResponse,\n                    construct_type(\n                        type_=ListConnectorsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def create(\n        self,\n        *,\n        name: str,\n        url: str,\n        description: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[CreateConnectorResponse]:\n        \"\"\"\n        Creates a new connector. The connector is tested during registration and will cancel registration when the test is unsuccessful. See ['Creating and Deploying a Connector'](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) for more information.\n\n        Parameters\n        ----------\n        name : str\n            A human-readable name for the connector.\n\n        url : str\n            The URL of the connector that will be used to search for documents.\n\n        description : typing.Optional[str]\n            A description of the connector.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n            Whether the connector is active or not.\n\n        continue_on_failure : typing.Optional[bool]\n            Whether a chat request should continue or not if the request to this connector fails.\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CreateConnectorResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/connectors\",\n            method=\"POST\",\n            json={\n                \"name\": name,\n                \"description\": description,\n                \"url\": url,\n                \"excludes\": excludes,\n                \"oauth\": convert_and_respect_annotation_metadata(\n                    object_=oauth, annotation=CreateConnectorOAuth, direction=\"write\"\n                ),\n                \"active\": active,\n                \"continue_on_failure\": continue_on_failure,\n                \"service_auth\": convert_and_respect_annotation_metadata(\n                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateConnectorResponse,\n                    construct_type(\n                        type_=CreateConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def get(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[GetConnectorResponse]:\n        \"\"\"\n        Retrieve a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[GetConnectorResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetConnectorResponse,\n                    construct_type(\n                        type_=GetConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[DeleteConnectorResponse]:\n        \"\"\"\n        Delete a connector by ID. See ['Connectors'](https://docs.cohere.com/docs/connectors) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to delete.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DeleteConnectorResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DeleteConnectorResponse,\n                    construct_type(\n                        type_=DeleteConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def update(\n        self,\n        id: str,\n        *,\n        name: typing.Optional[str] = OMIT,\n        url: typing.Optional[str] = OMIT,\n        excludes: typing.Optional[typing.Sequence[str]] = OMIT,\n        oauth: typing.Optional[CreateConnectorOAuth] = OMIT,\n        active: typing.Optional[bool] = OMIT,\n        continue_on_failure: typing.Optional[bool] = OMIT,\n        service_auth: typing.Optional[CreateConnectorServiceAuth] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[UpdateConnectorResponse]:\n        \"\"\"\n        Update a connector by ID. Omitted fields will not be updated. See ['Managing your Connector'](https://docs.cohere.com/docs/managing-your-connector) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to update.\n\n        name : typing.Optional[str]\n            A human-readable name for the connector.\n\n        url : typing.Optional[str]\n            The URL of the connector that will be used to search for documents.\n\n        excludes : typing.Optional[typing.Sequence[str]]\n            A list of fields to exclude from the prompt (fields remain in the document).\n\n        oauth : typing.Optional[CreateConnectorOAuth]\n            The OAuth 2.0 configuration for the connector. Cannot be specified if service_auth is specified.\n\n        active : typing.Optional[bool]\n\n        continue_on_failure : typing.Optional[bool]\n\n        service_auth : typing.Optional[CreateConnectorServiceAuth]\n            The service to service authentication configuration for the connector. Cannot be specified if oauth is specified.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[UpdateConnectorResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}\",\n            method=\"PATCH\",\n            json={\n                \"name\": name,\n                \"url\": url,\n                \"excludes\": excludes,\n                \"oauth\": convert_and_respect_annotation_metadata(\n                    object_=oauth, annotation=CreateConnectorOAuth, direction=\"write\"\n                ),\n                \"active\": active,\n                \"continue_on_failure\": continue_on_failure,\n                \"service_auth\": convert_and_respect_annotation_metadata(\n                    object_=service_auth, annotation=CreateConnectorServiceAuth, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    UpdateConnectorResponse,\n                    construct_type(\n                        type_=UpdateConnectorResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def o_auth_authorize(\n        self,\n        id: str,\n        *,\n        after_token_redirect: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[OAuthAuthorizeResponse]:\n        \"\"\"\n        Authorize the connector with the given ID for the connector oauth app.  See ['Connector Authentication'](https://docs.cohere.com/docs/connector-authentication) for more information.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the connector to authorize.\n\n        after_token_redirect : typing.Optional[str]\n            The URL to redirect to after the connector has been authorized.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[OAuthAuthorizeResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/connectors/{jsonable_encoder(id)}/oauth/authorize\",\n            method=\"POST\",\n            params={\n                \"after_token_redirect\": after_token_redirect,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    OAuthAuthorizeResponse,\n                    construct_type(\n                        type_=OAuthAuthorizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/core/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .api_error import ApiError\n    from .client_wrapper import AsyncClientWrapper, BaseClientWrapper, SyncClientWrapper\n    from .datetime_utils import Rfc2822DateTime, parse_rfc2822_datetime, serialize_datetime\n    from .file import File, convert_file_dict_to_httpx_tuples, with_content_type\n    from .http_client import AsyncHttpClient, HttpClient\n    from .http_response import AsyncHttpResponse, HttpResponse\n    from .jsonable_encoder import jsonable_encoder\n    from .logging import ConsoleLogger, ILogger, LogConfig, LogLevel, Logger, create_logger\n    from .parse_error import ParsingError\n    from .pydantic_utilities import (\n        IS_PYDANTIC_V2,\n        UniversalBaseModel,\n        UniversalRootModel,\n        parse_obj_as,\n        universal_field_validator,\n        universal_root_validator,\n        update_forward_refs,\n    )\n    from .query_encoder import encode_query\n    from .remove_none_from_dict import remove_none_from_dict\n    from .request_options import RequestOptions\n    from .serialization import FieldMetadata, convert_and_respect_annotation_metadata\n    from .unchecked_base_model import UncheckedBaseModel, UnionMetadata, construct_type\n_dynamic_imports: typing.Dict[str, str] = {\n    \"ApiError\": \".api_error\",\n    \"AsyncClientWrapper\": \".client_wrapper\",\n    \"AsyncHttpClient\": \".http_client\",\n    \"AsyncHttpResponse\": \".http_response\",\n    \"BaseClientWrapper\": \".client_wrapper\",\n    \"ConsoleLogger\": \".logging\",\n    \"FieldMetadata\": \".serialization\",\n    \"File\": \".file\",\n    \"HttpClient\": \".http_client\",\n    \"HttpResponse\": \".http_response\",\n    \"ILogger\": \".logging\",\n    \"IS_PYDANTIC_V2\": \".pydantic_utilities\",\n    \"LogConfig\": \".logging\",\n    \"LogLevel\": \".logging\",\n    \"Logger\": \".logging\",\n    \"ParsingError\": \".parse_error\",\n    \"RequestOptions\": \".request_options\",\n    \"Rfc2822DateTime\": \".datetime_utils\",\n    \"SyncClientWrapper\": \".client_wrapper\",\n    \"UncheckedBaseModel\": \".unchecked_base_model\",\n    \"UnionMetadata\": \".unchecked_base_model\",\n    \"UniversalBaseModel\": \".pydantic_utilities\",\n    \"UniversalRootModel\": \".pydantic_utilities\",\n    \"construct_type\": \".unchecked_base_model\",\n    \"convert_and_respect_annotation_metadata\": \".serialization\",\n    \"convert_file_dict_to_httpx_tuples\": \".file\",\n    \"create_logger\": \".logging\",\n    \"encode_query\": \".query_encoder\",\n    \"jsonable_encoder\": \".jsonable_encoder\",\n    \"parse_obj_as\": \".pydantic_utilities\",\n    \"parse_rfc2822_datetime\": \".datetime_utils\",\n    \"remove_none_from_dict\": \".remove_none_from_dict\",\n    \"serialize_datetime\": \".datetime_utils\",\n    \"universal_field_validator\": \".pydantic_utilities\",\n    \"universal_root_validator\": \".pydantic_utilities\",\n    \"update_forward_refs\": \".pydantic_utilities\",\n    \"with_content_type\": \".file\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"ApiError\",\n    \"AsyncClientWrapper\",\n    \"AsyncHttpClient\",\n    \"AsyncHttpResponse\",\n    \"BaseClientWrapper\",\n    \"ConsoleLogger\",\n    \"FieldMetadata\",\n    \"File\",\n    \"HttpClient\",\n    \"HttpResponse\",\n    \"ILogger\",\n    \"IS_PYDANTIC_V2\",\n    \"LogConfig\",\n    \"LogLevel\",\n    \"Logger\",\n    \"ParsingError\",\n    \"RequestOptions\",\n    \"Rfc2822DateTime\",\n    \"SyncClientWrapper\",\n    \"UncheckedBaseModel\",\n    \"UnionMetadata\",\n    \"UniversalBaseModel\",\n    \"UniversalRootModel\",\n    \"construct_type\",\n    \"convert_and_respect_annotation_metadata\",\n    \"convert_file_dict_to_httpx_tuples\",\n    \"create_logger\",\n    \"encode_query\",\n    \"jsonable_encoder\",\n    \"parse_obj_as\",\n    \"parse_rfc2822_datetime\",\n    \"remove_none_from_dict\",\n    \"serialize_datetime\",\n    \"universal_field_validator\",\n    \"universal_root_validator\",\n    \"update_forward_refs\",\n    \"with_content_type\",\n]\n"
  },
  {
    "path": "src/cohere/core/api_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Any, Dict, Optional\n\n\nclass ApiError(Exception):\n    headers: Optional[Dict[str, str]]\n    status_code: Optional[int]\n    body: Any\n\n    def __init__(\n        self,\n        *,\n        headers: Optional[Dict[str, str]] = None,\n        status_code: Optional[int] = None,\n        body: Any = None,\n    ) -> None:\n        self.headers = headers\n        self.status_code = status_code\n        self.body = body\n\n    def __str__(self) -> str:\n        return f\"headers: {self.headers}, status_code: {self.status_code}, body: {self.body}\"\n"
  },
  {
    "path": "src/cohere/core/client_wrapper.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport httpx\nfrom .http_client import AsyncHttpClient, HttpClient\nfrom .logging import LogConfig, Logger\n\n\nclass BaseClientWrapper:\n    def __init__(\n        self,\n        *,\n        client_name: typing.Optional[str] = None,\n        token: typing.Union[str, typing.Callable[[], str]],\n        headers: typing.Optional[typing.Dict[str, str]] = None,\n        base_url: str,\n        timeout: typing.Optional[float] = None,\n        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n    ):\n        self._client_name = client_name\n        self._token = token\n        self._headers = headers\n        self._base_url = base_url\n        self._timeout = timeout\n        self._logging = logging\n\n    def get_headers(self) -> typing.Dict[str, str]:\n        import platform\n\n        headers: typing.Dict[str, str] = {\n            \"User-Agent\": \"cohere/6.1.0\",\n            \"X-Fern-Language\": \"Python\",\n            \"X-Fern-Runtime\": f\"python/{platform.python_version()}\",\n            \"X-Fern-Platform\": f\"{platform.system().lower()}/{platform.release()}\",\n            \"X-Fern-SDK-Name\": \"cohere\",\n            \"X-Fern-SDK-Version\": \"6.1.0\",\n            **(self.get_custom_headers() or {}),\n        }\n        if self._client_name is not None:\n            headers[\"X-Client-Name\"] = self._client_name\n        headers[\"Authorization\"] = f\"Bearer {self._get_token()}\"\n        return headers\n\n    def _get_token(self) -> str:\n        if isinstance(self._token, str):\n            return self._token\n        else:\n            return self._token()\n\n    def get_custom_headers(self) -> typing.Optional[typing.Dict[str, str]]:\n        return self._headers\n\n    def get_base_url(self) -> str:\n        return self._base_url\n\n    def get_timeout(self) -> typing.Optional[float]:\n        return self._timeout\n\n\nclass SyncClientWrapper(BaseClientWrapper):\n    def __init__(\n        self,\n        *,\n        client_name: typing.Optional[str] = None,\n        token: typing.Union[str, typing.Callable[[], str]],\n        headers: typing.Optional[typing.Dict[str, str]] = None,\n        base_url: str,\n        timeout: typing.Optional[float] = None,\n        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n        httpx_client: httpx.Client,\n    ):\n        super().__init__(\n            client_name=client_name, token=token, headers=headers, base_url=base_url, timeout=timeout, logging=logging\n        )\n        self.httpx_client = HttpClient(\n            httpx_client=httpx_client,\n            base_headers=self.get_headers,\n            base_timeout=self.get_timeout,\n            base_url=self.get_base_url,\n            logging_config=self._logging,\n        )\n\n\nclass AsyncClientWrapper(BaseClientWrapper):\n    def __init__(\n        self,\n        *,\n        client_name: typing.Optional[str] = None,\n        token: typing.Union[str, typing.Callable[[], str]],\n        headers: typing.Optional[typing.Dict[str, str]] = None,\n        base_url: str,\n        timeout: typing.Optional[float] = None,\n        logging: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n        async_token: typing.Optional[typing.Callable[[], typing.Awaitable[str]]] = None,\n        httpx_client: httpx.AsyncClient,\n    ):\n        super().__init__(\n            client_name=client_name, token=token, headers=headers, base_url=base_url, timeout=timeout, logging=logging\n        )\n        self._async_token = async_token\n        self.httpx_client = AsyncHttpClient(\n            httpx_client=httpx_client,\n            base_headers=self.get_headers,\n            base_timeout=self.get_timeout,\n            base_url=self.get_base_url,\n            async_base_headers=self.async_get_headers,\n            logging_config=self._logging,\n        )\n\n    async def async_get_headers(self) -> typing.Dict[str, str]:\n        headers = self.get_headers()\n        if self._async_token is not None:\n            token = await self._async_token()\n            headers[\"Authorization\"] = f\"Bearer {token}\"\n        return headers\n"
  },
  {
    "path": "src/cohere/core/datetime_utils.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nfrom email.utils import parsedate_to_datetime\nfrom typing import Any\n\nimport pydantic\n\nIS_PYDANTIC_V2 = pydantic.VERSION.startswith(\"2.\")\n\n\ndef parse_rfc2822_datetime(v: Any) -> dt.datetime:\n    \"\"\"\n    Parse an RFC 2822 datetime string (e.g., \"Wed, 02 Oct 2002 13:00:00 GMT\")\n    into a datetime object. If the value is already a datetime, return it as-is.\n    Falls back to ISO 8601 parsing if RFC 2822 parsing fails.\n    \"\"\"\n    if isinstance(v, dt.datetime):\n        return v\n    if isinstance(v, str):\n        try:\n            return parsedate_to_datetime(v)\n        except Exception:\n            pass\n        # Fallback to ISO 8601 parsing\n        return dt.datetime.fromisoformat(v.replace(\"Z\", \"+00:00\"))\n    raise ValueError(f\"Expected str or datetime, got {type(v)}\")\n\n\nclass Rfc2822DateTime(dt.datetime):\n    \"\"\"A datetime subclass that parses RFC 2822 date strings.\n\n    On Pydantic V1, uses __get_validators__ for pre-validation.\n    On Pydantic V2, uses __get_pydantic_core_schema__ for BeforeValidator-style parsing.\n    \"\"\"\n\n    @classmethod\n    def __get_validators__(cls):  # type: ignore[no-untyped-def]\n        yield parse_rfc2822_datetime\n\n    @classmethod\n    def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: Any) -> Any:  # type: ignore[override]\n        from pydantic_core import core_schema\n\n        return core_schema.no_info_before_validator_function(parse_rfc2822_datetime, core_schema.datetime_schema())\n\n\ndef serialize_datetime(v: dt.datetime) -> str:\n    \"\"\"\n    Serialize a datetime including timezone info.\n\n    Uses the timezone info provided if present, otherwise uses the current runtime's timezone info.\n\n    UTC datetimes end in \"Z\" while all other timezones are represented as offset from UTC, e.g. +05:00.\n    \"\"\"\n\n    def _serialize_zoned_datetime(v: dt.datetime) -> str:\n        if v.tzinfo is not None and v.tzinfo.tzname(None) == dt.timezone.utc.tzname(None):\n            # UTC is a special case where we use \"Z\" at the end instead of \"+00:00\"\n            return v.isoformat().replace(\"+00:00\", \"Z\")\n        else:\n            # Delegate to the typical +/- offset format\n            return v.isoformat()\n\n    if v.tzinfo is not None:\n        return _serialize_zoned_datetime(v)\n    else:\n        local_tz = dt.datetime.now().astimezone().tzinfo\n        localized_dt = v.replace(tzinfo=local_tz)\n        return _serialize_zoned_datetime(localized_dt)\n"
  },
  {
    "path": "src/cohere/core/file.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import IO, Dict, List, Mapping, Optional, Tuple, Union, cast\n\n# File typing inspired by the flexibility of types within the httpx library\n# https://github.com/encode/httpx/blob/master/httpx/_types.py\nFileContent = Union[IO[bytes], bytes, str]\nFile = Union[\n    # file (or bytes)\n    FileContent,\n    # (filename, file (or bytes))\n    Tuple[Optional[str], FileContent],\n    # (filename, file (or bytes), content_type)\n    Tuple[Optional[str], FileContent, Optional[str]],\n    # (filename, file (or bytes), content_type, headers)\n    Tuple[\n        Optional[str],\n        FileContent,\n        Optional[str],\n        Mapping[str, str],\n    ],\n]\n\n\ndef convert_file_dict_to_httpx_tuples(\n    d: Dict[str, Union[File, List[File]]],\n) -> List[Tuple[str, File]]:\n    \"\"\"\n    The format we use is a list of tuples, where the first element is the\n    name of the file and the second is the file object. Typically HTTPX wants\n    a dict, but to be able to send lists of files, you have to use the list\n    approach (which also works for non-lists)\n    https://github.com/encode/httpx/pull/1032\n    \"\"\"\n\n    httpx_tuples = []\n    for key, file_like in d.items():\n        if isinstance(file_like, list):\n            for file_like_item in file_like:\n                httpx_tuples.append((key, file_like_item))\n        else:\n            httpx_tuples.append((key, file_like))\n    return httpx_tuples\n\n\ndef with_content_type(*, file: File, default_content_type: str) -> File:\n    \"\"\"\n    This function resolves to the file's content type, if provided, and defaults\n    to the default_content_type value if not.\n    \"\"\"\n    if isinstance(file, tuple):\n        if len(file) == 2:\n            filename, content = cast(Tuple[Optional[str], FileContent], file)  # type: ignore\n            return (filename, content, default_content_type)\n        elif len(file) == 3:\n            filename, content, file_content_type = cast(Tuple[Optional[str], FileContent, Optional[str]], file)  # type: ignore\n            out_content_type = file_content_type or default_content_type\n            return (filename, content, out_content_type)\n        elif len(file) == 4:\n            filename, content, file_content_type, headers = cast(  # type: ignore\n                Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]], file\n            )\n            out_content_type = file_content_type or default_content_type\n            return (filename, content, out_content_type, headers)\n        else:\n            raise ValueError(f\"Unexpected tuple length: {len(file)}\")\n    return (None, file, default_content_type)\n"
  },
  {
    "path": "src/cohere/core/force_multipart.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Any, Dict\n\n\nclass ForceMultipartDict(Dict[str, Any]):\n    \"\"\"\n    A dictionary subclass that always evaluates to True in boolean contexts.\n\n    This is used to force multipart/form-data encoding in HTTP requests even when\n    the dictionary is empty, which would normally evaluate to False.\n    \"\"\"\n\n    def __bool__(self) -> bool:\n        return True\n\n\nFORCE_MULTIPART = ForceMultipartDict()\n"
  },
  {
    "path": "src/cohere/core/http_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport asyncio\nimport email.utils\nimport re\nimport time\nimport typing\nfrom contextlib import asynccontextmanager, contextmanager\nfrom random import random\n\nimport httpx\nfrom .file import File, convert_file_dict_to_httpx_tuples\nfrom .force_multipart import FORCE_MULTIPART\nfrom .jsonable_encoder import jsonable_encoder\nfrom .logging import LogConfig, Logger, create_logger\nfrom .query_encoder import encode_query\nfrom .remove_none_from_dict import remove_none_from_dict as remove_none_from_dict\nfrom .request_options import RequestOptions\nfrom httpx._types import RequestFiles\n\nINITIAL_RETRY_DELAY_SECONDS = 1.0\nMAX_RETRY_DELAY_SECONDS = 60.0\nJITTER_FACTOR = 0.2  # 20% random jitter\n\n\ndef _parse_retry_after(response_headers: httpx.Headers) -> typing.Optional[float]:\n    \"\"\"\n    This function parses the `Retry-After` header in a HTTP response and returns the number of seconds to wait.\n\n    Inspired by the urllib3 retry implementation.\n    \"\"\"\n    retry_after_ms = response_headers.get(\"retry-after-ms\")\n    if retry_after_ms is not None:\n        try:\n            return int(retry_after_ms) / 1000 if retry_after_ms > 0 else 0\n        except Exception:\n            pass\n\n    retry_after = response_headers.get(\"retry-after\")\n    if retry_after is None:\n        return None\n\n    # Attempt to parse the header as an int.\n    if re.match(r\"^\\s*[0-9]+\\s*$\", retry_after):\n        seconds = float(retry_after)\n    # Fallback to parsing it as a date.\n    else:\n        retry_date_tuple = email.utils.parsedate_tz(retry_after)\n        if retry_date_tuple is None:\n            return None\n        if retry_date_tuple[9] is None:  # Python 2\n            # Assume UTC if no timezone was specified\n            # On Python2.7, parsedate_tz returns None for a timezone offset\n            # instead of 0 if no timezone is given, where mktime_tz treats\n            # a None timezone offset as local time.\n            retry_date_tuple = retry_date_tuple[:9] + (0,) + retry_date_tuple[10:]\n\n        retry_date = email.utils.mktime_tz(retry_date_tuple)\n        seconds = retry_date - time.time()\n\n    if seconds < 0:\n        seconds = 0\n\n    return seconds\n\n\ndef _add_positive_jitter(delay: float) -> float:\n    \"\"\"Add positive jitter (0-20%) to prevent thundering herd.\"\"\"\n    jitter_multiplier = 1 + random() * JITTER_FACTOR\n    return delay * jitter_multiplier\n\n\ndef _add_symmetric_jitter(delay: float) -> float:\n    \"\"\"Add symmetric jitter (±10%) for exponential backoff.\"\"\"\n    jitter_multiplier = 1 + (random() - 0.5) * JITTER_FACTOR\n    return delay * jitter_multiplier\n\n\ndef _parse_x_ratelimit_reset(response_headers: httpx.Headers) -> typing.Optional[float]:\n    \"\"\"\n    Parse the X-RateLimit-Reset header (Unix timestamp in seconds).\n    Returns seconds to wait, or None if header is missing/invalid.\n    \"\"\"\n    reset_time_str = response_headers.get(\"x-ratelimit-reset\")\n    if reset_time_str is None:\n        return None\n\n    try:\n        reset_time = int(reset_time_str)\n        delay = reset_time - time.time()\n        if delay > 0:\n            return delay\n    except (ValueError, TypeError):\n        pass\n\n    return None\n\n\ndef _retry_timeout(response: httpx.Response, retries: int) -> float:\n    \"\"\"\n    Determine the amount of time to wait before retrying a request.\n    This function begins by trying to parse a retry-after header from the response, and then proceeds to use exponential backoff\n    with a jitter to determine the number of seconds to wait.\n    \"\"\"\n\n    # 1. Check Retry-After header first\n    retry_after = _parse_retry_after(response.headers)\n    if retry_after is not None and retry_after > 0:\n        return min(retry_after, MAX_RETRY_DELAY_SECONDS)\n\n    # 2. Check X-RateLimit-Reset header (with positive jitter)\n    ratelimit_reset = _parse_x_ratelimit_reset(response.headers)\n    if ratelimit_reset is not None:\n        return _add_positive_jitter(min(ratelimit_reset, MAX_RETRY_DELAY_SECONDS))\n\n    # 3. Fall back to exponential backoff (with symmetric jitter)\n    backoff = min(INITIAL_RETRY_DELAY_SECONDS * pow(2.0, retries), MAX_RETRY_DELAY_SECONDS)\n    return _add_symmetric_jitter(backoff)\n\n\ndef _retry_timeout_from_retries(retries: int) -> float:\n    \"\"\"Determine retry timeout using exponential backoff when no response is available.\"\"\"\n    backoff = min(INITIAL_RETRY_DELAY_SECONDS * pow(2.0, retries), MAX_RETRY_DELAY_SECONDS)\n    return _add_symmetric_jitter(backoff)\n\n\ndef _should_retry(response: httpx.Response) -> bool:\n    retryable_400s = [429, 408, 409]\n    return response.status_code >= 500 or response.status_code in retryable_400s\n\n\n_SENSITIVE_HEADERS = frozenset(\n    {\n        \"authorization\",\n        \"www-authenticate\",\n        \"x-api-key\",\n        \"api-key\",\n        \"apikey\",\n        \"x-api-token\",\n        \"x-auth-token\",\n        \"auth-token\",\n        \"cookie\",\n        \"set-cookie\",\n        \"proxy-authorization\",\n        \"proxy-authenticate\",\n        \"x-csrf-token\",\n        \"x-xsrf-token\",\n        \"x-session-token\",\n        \"x-access-token\",\n    }\n)\n\n\ndef _redact_headers(headers: typing.Dict[str, str]) -> typing.Dict[str, str]:\n    return {k: (\"[REDACTED]\" if k.lower() in _SENSITIVE_HEADERS else v) for k, v in headers.items()}\n\n\ndef _build_url(base_url: str, path: typing.Optional[str]) -> str:\n    \"\"\"\n    Build a full URL by joining a base URL with a path.\n\n    This function correctly handles base URLs that contain path prefixes (e.g., tenant-based URLs)\n    by using string concatenation instead of urllib.parse.urljoin(), which would incorrectly\n    strip path components when the path starts with '/'.\n\n    Example:\n        >>> _build_url(\"https://cloud.example.com/org/tenant/api\", \"/users\")\n        'https://cloud.example.com/org/tenant/api/users'\n\n    Args:\n        base_url: The base URL, which may contain path prefixes.\n        path: The path to append. Can be None or empty string.\n\n    Returns:\n        The full URL with base_url and path properly joined.\n    \"\"\"\n    if not path:\n        return base_url\n    return f\"{base_url.rstrip('/')}/{path.lstrip('/')}\"\n\n\ndef _maybe_filter_none_from_multipart_data(\n    data: typing.Optional[typing.Any],\n    request_files: typing.Optional[RequestFiles],\n    force_multipart: typing.Optional[bool],\n) -> typing.Optional[typing.Any]:\n    \"\"\"\n    Filter None values from data body for multipart/form requests.\n    This prevents httpx from converting None to empty strings in multipart encoding.\n    Only applies when files are present or force_multipart is True.\n    \"\"\"\n    if data is not None and isinstance(data, typing.Mapping) and (request_files or force_multipart):\n        return remove_none_from_dict(data)\n    return data\n\n\ndef remove_omit_from_dict(\n    original: typing.Dict[str, typing.Optional[typing.Any]],\n    omit: typing.Optional[typing.Any],\n) -> typing.Dict[str, typing.Any]:\n    if omit is None:\n        return original\n    new: typing.Dict[str, typing.Any] = {}\n    for key, value in original.items():\n        if value is not omit:\n            new[key] = value\n    return new\n\n\ndef maybe_filter_request_body(\n    data: typing.Optional[typing.Any],\n    request_options: typing.Optional[RequestOptions],\n    omit: typing.Optional[typing.Any],\n) -> typing.Optional[typing.Any]:\n    if data is None:\n        return (\n            jsonable_encoder(request_options.get(\"additional_body_parameters\", {})) or {}\n            if request_options is not None\n            else None\n        )\n    elif not isinstance(data, typing.Mapping):\n        data_content = jsonable_encoder(data)\n    else:\n        data_content = {\n            **(jsonable_encoder(remove_omit_from_dict(data, omit))),  # type: ignore\n            **(\n                jsonable_encoder(request_options.get(\"additional_body_parameters\", {})) or {}\n                if request_options is not None\n                else {}\n            ),\n        }\n    return data_content\n\n\n# Abstracted out for testing purposes\ndef get_request_body(\n    *,\n    json: typing.Optional[typing.Any],\n    data: typing.Optional[typing.Any],\n    request_options: typing.Optional[RequestOptions],\n    omit: typing.Optional[typing.Any],\n) -> typing.Tuple[typing.Optional[typing.Any], typing.Optional[typing.Any]]:\n    json_body = None\n    data_body = None\n    if data is not None:\n        data_body = maybe_filter_request_body(data, request_options, omit)\n    else:\n        # If both data and json are None, we send json data in the event extra properties are specified\n        json_body = maybe_filter_request_body(json, request_options, omit)\n\n    has_additional_body_parameters = bool(\n        request_options is not None and request_options.get(\"additional_body_parameters\")\n    )\n\n    # Only collapse empty dict to None when the body was not explicitly provided\n    # and there are no additional body parameters. This preserves explicit empty\n    # bodies (e.g., when an endpoint has a request body type but all fields are optional).\n    if json_body == {} and json is None and not has_additional_body_parameters:\n        json_body = None\n    if data_body == {} and data is None and not has_additional_body_parameters:\n        data_body = None\n\n    return json_body, data_body\n\n\nclass HttpClient:\n    def __init__(\n        self,\n        *,\n        httpx_client: httpx.Client,\n        base_timeout: typing.Callable[[], typing.Optional[float]],\n        base_headers: typing.Callable[[], typing.Dict[str, str]],\n        base_url: typing.Optional[typing.Callable[[], str]] = None,\n        base_max_retries: int = 2,\n        logging_config: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n    ):\n        self.base_url = base_url\n        self.base_timeout = base_timeout\n        self.base_headers = base_headers\n        self.base_max_retries = base_max_retries\n        self.httpx_client = httpx_client\n        self.logger = create_logger(logging_config)\n\n    def get_base_url(self, maybe_base_url: typing.Optional[str]) -> str:\n        base_url = maybe_base_url\n        if self.base_url is not None and base_url is None:\n            base_url = self.base_url()\n\n        if base_url is None:\n            raise ValueError(\"A base_url is required to make this request, please provide one and try again.\")\n        return base_url\n\n    def request(\n        self,\n        path: typing.Optional[str] = None,\n        *,\n        method: str,\n        base_url: typing.Optional[str] = None,\n        params: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        json: typing.Optional[typing.Any] = None,\n        data: typing.Optional[typing.Any] = None,\n        content: typing.Optional[typing.Union[bytes, typing.Iterator[bytes], typing.AsyncIterator[bytes]]] = None,\n        files: typing.Optional[\n            typing.Union[\n                typing.Dict[str, typing.Optional[typing.Union[File, typing.List[File]]]],\n                typing.List[typing.Tuple[str, File]],\n            ]\n        ] = None,\n        headers: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n        retries: int = 0,\n        omit: typing.Optional[typing.Any] = None,\n        force_multipart: typing.Optional[bool] = None,\n    ) -> httpx.Response:\n        base_url = self.get_base_url(base_url)\n        timeout = (\n            request_options.get(\"timeout_in_seconds\")\n            if request_options is not None and request_options.get(\"timeout_in_seconds\") is not None\n            else self.base_timeout()\n        )\n\n        json_body, data_body = get_request_body(json=json, data=data, request_options=request_options, omit=omit)\n\n        request_files: typing.Optional[RequestFiles] = (\n            convert_file_dict_to_httpx_tuples(remove_omit_from_dict(remove_none_from_dict(files), omit))\n            if (files is not None and files is not omit and isinstance(files, dict))\n            else None\n        )\n\n        if (request_files is None or len(request_files) == 0) and force_multipart:\n            request_files = FORCE_MULTIPART\n\n        data_body = _maybe_filter_none_from_multipart_data(data_body, request_files, force_multipart)\n\n        # Compute encoded params separately to avoid passing empty list to httpx\n        # (httpx strips existing query params from URL when params=[] is passed)\n        _encoded_params = encode_query(\n            jsonable_encoder(\n                remove_none_from_dict(\n                    remove_omit_from_dict(\n                        {\n                            **(params if params is not None else {}),\n                            **(\n                                request_options.get(\"additional_query_parameters\", {}) or {}\n                                if request_options is not None\n                                else {}\n                            ),\n                        },\n                        omit,\n                    )\n                )\n            )\n        )\n\n        _request_url = _build_url(base_url, path)\n        _request_headers = jsonable_encoder(\n            remove_none_from_dict(\n                {\n                    **self.base_headers(),\n                    **(headers if headers is not None else {}),\n                    **(request_options.get(\"additional_headers\", {}) or {} if request_options is not None else {}),\n                }\n            )\n        )\n\n        if self.logger.is_debug():\n            self.logger.debug(\n                \"Making HTTP request\",\n                method=method,\n                url=_request_url,\n                headers=_redact_headers(_request_headers),\n                has_body=json_body is not None or data_body is not None,\n            )\n\n        max_retries: int = (\n            request_options.get(\"max_retries\", self.base_max_retries)\n            if request_options is not None\n            else self.base_max_retries\n        )\n\n        try:\n            response = self.httpx_client.request(\n                method=method,\n                url=_request_url,\n                headers=_request_headers,\n                params=_encoded_params if _encoded_params else None,\n                json=json_body,\n                data=data_body,\n                content=content,\n                files=request_files,\n                timeout=timeout,\n            )\n        except (httpx.ConnectError, httpx.RemoteProtocolError):\n            if retries < max_retries:\n                time.sleep(_retry_timeout_from_retries(retries=retries))\n                return self.request(\n                    path=path,\n                    method=method,\n                    base_url=base_url,\n                    params=params,\n                    json=json,\n                    data=data,\n                    content=content,\n                    files=files,\n                    headers=headers,\n                    request_options=request_options,\n                    retries=retries + 1,\n                    omit=omit,\n                    force_multipart=force_multipart,\n                )\n            raise\n\n        if _should_retry(response=response):\n            if retries < max_retries:\n                time.sleep(_retry_timeout(response=response, retries=retries))\n                return self.request(\n                    path=path,\n                    method=method,\n                    base_url=base_url,\n                    params=params,\n                    json=json,\n                    data=data,\n                    content=content,\n                    files=files,\n                    headers=headers,\n                    request_options=request_options,\n                    retries=retries + 1,\n                    omit=omit,\n                    force_multipart=force_multipart,\n                )\n\n        if self.logger.is_debug():\n            if 200 <= response.status_code < 400:\n                self.logger.debug(\n                    \"HTTP request succeeded\",\n                    method=method,\n                    url=_request_url,\n                    status_code=response.status_code,\n                )\n\n        if self.logger.is_error():\n            if response.status_code >= 400:\n                self.logger.error(\n                    \"HTTP request failed with error status\",\n                    method=method,\n                    url=_request_url,\n                    status_code=response.status_code,\n                )\n\n        return response\n\n    @contextmanager\n    def stream(\n        self,\n        path: typing.Optional[str] = None,\n        *,\n        method: str,\n        base_url: typing.Optional[str] = None,\n        params: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        json: typing.Optional[typing.Any] = None,\n        data: typing.Optional[typing.Any] = None,\n        content: typing.Optional[typing.Union[bytes, typing.Iterator[bytes], typing.AsyncIterator[bytes]]] = None,\n        files: typing.Optional[\n            typing.Union[\n                typing.Dict[str, typing.Optional[typing.Union[File, typing.List[File]]]],\n                typing.List[typing.Tuple[str, File]],\n            ]\n        ] = None,\n        headers: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n        retries: int = 0,\n        omit: typing.Optional[typing.Any] = None,\n        force_multipart: typing.Optional[bool] = None,\n    ) -> typing.Iterator[httpx.Response]:\n        base_url = self.get_base_url(base_url)\n        timeout = (\n            request_options.get(\"timeout_in_seconds\")\n            if request_options is not None and request_options.get(\"timeout_in_seconds\") is not None\n            else self.base_timeout()\n        )\n\n        request_files: typing.Optional[RequestFiles] = (\n            convert_file_dict_to_httpx_tuples(remove_omit_from_dict(remove_none_from_dict(files), omit))\n            if (files is not None and files is not omit and isinstance(files, dict))\n            else None\n        )\n\n        if (request_files is None or len(request_files) == 0) and force_multipart:\n            request_files = FORCE_MULTIPART\n\n        json_body, data_body = get_request_body(json=json, data=data, request_options=request_options, omit=omit)\n\n        data_body = _maybe_filter_none_from_multipart_data(data_body, request_files, force_multipart)\n\n        # Compute encoded params separately to avoid passing empty list to httpx\n        # (httpx strips existing query params from URL when params=[] is passed)\n        _encoded_params = encode_query(\n            jsonable_encoder(\n                remove_none_from_dict(\n                    remove_omit_from_dict(\n                        {\n                            **(params if params is not None else {}),\n                            **(\n                                request_options.get(\"additional_query_parameters\", {})\n                                if request_options is not None\n                                else {}\n                            ),\n                        },\n                        omit,\n                    )\n                )\n            )\n        )\n\n        _request_url = _build_url(base_url, path)\n        _request_headers = jsonable_encoder(\n            remove_none_from_dict(\n                {\n                    **self.base_headers(),\n                    **(headers if headers is not None else {}),\n                    **(request_options.get(\"additional_headers\", {}) if request_options is not None else {}),\n                }\n            )\n        )\n\n        if self.logger.is_debug():\n            self.logger.debug(\n                \"Making streaming HTTP request\",\n                method=method,\n                url=_request_url,\n                headers=_redact_headers(_request_headers),\n            )\n\n        with self.httpx_client.stream(\n            method=method,\n            url=_request_url,\n            headers=_request_headers,\n            params=_encoded_params if _encoded_params else None,\n            json=json_body,\n            data=data_body,\n            content=content,\n            files=request_files,\n            timeout=timeout,\n        ) as stream:\n            yield stream\n\n\nclass AsyncHttpClient:\n    def __init__(\n        self,\n        *,\n        httpx_client: httpx.AsyncClient,\n        base_timeout: typing.Callable[[], typing.Optional[float]],\n        base_headers: typing.Callable[[], typing.Dict[str, str]],\n        base_url: typing.Optional[typing.Callable[[], str]] = None,\n        base_max_retries: int = 2,\n        async_base_headers: typing.Optional[typing.Callable[[], typing.Awaitable[typing.Dict[str, str]]]] = None,\n        logging_config: typing.Optional[typing.Union[LogConfig, Logger]] = None,\n    ):\n        self.base_url = base_url\n        self.base_timeout = base_timeout\n        self.base_headers = base_headers\n        self.base_max_retries = base_max_retries\n        self.async_base_headers = async_base_headers\n        self.httpx_client = httpx_client\n        self.logger = create_logger(logging_config)\n\n    async def _get_headers(self) -> typing.Dict[str, str]:\n        if self.async_base_headers is not None:\n            return await self.async_base_headers()\n        return self.base_headers()\n\n    def get_base_url(self, maybe_base_url: typing.Optional[str]) -> str:\n        base_url = maybe_base_url\n        if self.base_url is not None and base_url is None:\n            base_url = self.base_url()\n\n        if base_url is None:\n            raise ValueError(\"A base_url is required to make this request, please provide one and try again.\")\n        return base_url\n\n    async def request(\n        self,\n        path: typing.Optional[str] = None,\n        *,\n        method: str,\n        base_url: typing.Optional[str] = None,\n        params: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        json: typing.Optional[typing.Any] = None,\n        data: typing.Optional[typing.Any] = None,\n        content: typing.Optional[typing.Union[bytes, typing.Iterator[bytes], typing.AsyncIterator[bytes]]] = None,\n        files: typing.Optional[\n            typing.Union[\n                typing.Dict[str, typing.Optional[typing.Union[File, typing.List[File]]]],\n                typing.List[typing.Tuple[str, File]],\n            ]\n        ] = None,\n        headers: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n        retries: int = 0,\n        omit: typing.Optional[typing.Any] = None,\n        force_multipart: typing.Optional[bool] = None,\n    ) -> httpx.Response:\n        base_url = self.get_base_url(base_url)\n        timeout = (\n            request_options.get(\"timeout_in_seconds\")\n            if request_options is not None and request_options.get(\"timeout_in_seconds\") is not None\n            else self.base_timeout()\n        )\n\n        request_files: typing.Optional[RequestFiles] = (\n            convert_file_dict_to_httpx_tuples(remove_omit_from_dict(remove_none_from_dict(files), omit))\n            if (files is not None and files is not omit and isinstance(files, dict))\n            else None\n        )\n\n        if (request_files is None or len(request_files) == 0) and force_multipart:\n            request_files = FORCE_MULTIPART\n\n        json_body, data_body = get_request_body(json=json, data=data, request_options=request_options, omit=omit)\n\n        data_body = _maybe_filter_none_from_multipart_data(data_body, request_files, force_multipart)\n\n        # Get headers (supports async token providers)\n        _headers = await self._get_headers()\n\n        # Compute encoded params separately to avoid passing empty list to httpx\n        # (httpx strips existing query params from URL when params=[] is passed)\n        _encoded_params = encode_query(\n            jsonable_encoder(\n                remove_none_from_dict(\n                    remove_omit_from_dict(\n                        {\n                            **(params if params is not None else {}),\n                            **(\n                                request_options.get(\"additional_query_parameters\", {}) or {}\n                                if request_options is not None\n                                else {}\n                            ),\n                        },\n                        omit,\n                    )\n                )\n            )\n        )\n\n        _request_url = _build_url(base_url, path)\n        _request_headers = jsonable_encoder(\n            remove_none_from_dict(\n                {\n                    **_headers,\n                    **(headers if headers is not None else {}),\n                    **(request_options.get(\"additional_headers\", {}) or {} if request_options is not None else {}),\n                }\n            )\n        )\n\n        if self.logger.is_debug():\n            self.logger.debug(\n                \"Making HTTP request\",\n                method=method,\n                url=_request_url,\n                headers=_redact_headers(_request_headers),\n                has_body=json_body is not None or data_body is not None,\n            )\n\n        max_retries: int = (\n            request_options.get(\"max_retries\", self.base_max_retries)\n            if request_options is not None\n            else self.base_max_retries\n        )\n\n        try:\n            response = await self.httpx_client.request(\n                method=method,\n                url=_request_url,\n                headers=_request_headers,\n                params=_encoded_params if _encoded_params else None,\n                json=json_body,\n                data=data_body,\n                content=content,\n                files=request_files,\n                timeout=timeout,\n            )\n        except (httpx.ConnectError, httpx.RemoteProtocolError):\n            if retries < max_retries:\n                await asyncio.sleep(_retry_timeout_from_retries(retries=retries))\n                return await self.request(\n                    path=path,\n                    method=method,\n                    base_url=base_url,\n                    params=params,\n                    json=json,\n                    data=data,\n                    content=content,\n                    files=files,\n                    headers=headers,\n                    request_options=request_options,\n                    retries=retries + 1,\n                    omit=omit,\n                    force_multipart=force_multipart,\n                )\n            raise\n\n        if _should_retry(response=response):\n            if retries < max_retries:\n                await asyncio.sleep(_retry_timeout(response=response, retries=retries))\n                return await self.request(\n                    path=path,\n                    method=method,\n                    base_url=base_url,\n                    params=params,\n                    json=json,\n                    data=data,\n                    content=content,\n                    files=files,\n                    headers=headers,\n                    request_options=request_options,\n                    retries=retries + 1,\n                    omit=omit,\n                    force_multipart=force_multipart,\n                )\n\n        if self.logger.is_debug():\n            if 200 <= response.status_code < 400:\n                self.logger.debug(\n                    \"HTTP request succeeded\",\n                    method=method,\n                    url=_request_url,\n                    status_code=response.status_code,\n                )\n\n        if self.logger.is_error():\n            if response.status_code >= 400:\n                self.logger.error(\n                    \"HTTP request failed with error status\",\n                    method=method,\n                    url=_request_url,\n                    status_code=response.status_code,\n                )\n\n        return response\n\n    @asynccontextmanager\n    async def stream(\n        self,\n        path: typing.Optional[str] = None,\n        *,\n        method: str,\n        base_url: typing.Optional[str] = None,\n        params: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        json: typing.Optional[typing.Any] = None,\n        data: typing.Optional[typing.Any] = None,\n        content: typing.Optional[typing.Union[bytes, typing.Iterator[bytes], typing.AsyncIterator[bytes]]] = None,\n        files: typing.Optional[\n            typing.Union[\n                typing.Dict[str, typing.Optional[typing.Union[File, typing.List[File]]]],\n                typing.List[typing.Tuple[str, File]],\n            ]\n        ] = None,\n        headers: typing.Optional[typing.Dict[str, typing.Any]] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n        retries: int = 0,\n        omit: typing.Optional[typing.Any] = None,\n        force_multipart: typing.Optional[bool] = None,\n    ) -> typing.AsyncIterator[httpx.Response]:\n        base_url = self.get_base_url(base_url)\n        timeout = (\n            request_options.get(\"timeout_in_seconds\")\n            if request_options is not None and request_options.get(\"timeout_in_seconds\") is not None\n            else self.base_timeout()\n        )\n\n        request_files: typing.Optional[RequestFiles] = (\n            convert_file_dict_to_httpx_tuples(remove_omit_from_dict(remove_none_from_dict(files), omit))\n            if (files is not None and files is not omit and isinstance(files, dict))\n            else None\n        )\n\n        if (request_files is None or len(request_files) == 0) and force_multipart:\n            request_files = FORCE_MULTIPART\n\n        json_body, data_body = get_request_body(json=json, data=data, request_options=request_options, omit=omit)\n\n        data_body = _maybe_filter_none_from_multipart_data(data_body, request_files, force_multipart)\n\n        # Get headers (supports async token providers)\n        _headers = await self._get_headers()\n\n        # Compute encoded params separately to avoid passing empty list to httpx\n        # (httpx strips existing query params from URL when params=[] is passed)\n        _encoded_params = encode_query(\n            jsonable_encoder(\n                remove_none_from_dict(\n                    remove_omit_from_dict(\n                        {\n                            **(params if params is not None else {}),\n                            **(\n                                request_options.get(\"additional_query_parameters\", {})\n                                if request_options is not None\n                                else {}\n                            ),\n                        },\n                        omit=omit,\n                    )\n                )\n            )\n        )\n\n        _request_url = _build_url(base_url, path)\n        _request_headers = jsonable_encoder(\n            remove_none_from_dict(\n                {\n                    **_headers,\n                    **(headers if headers is not None else {}),\n                    **(request_options.get(\"additional_headers\", {}) if request_options is not None else {}),\n                }\n            )\n        )\n\n        if self.logger.is_debug():\n            self.logger.debug(\n                \"Making streaming HTTP request\",\n                method=method,\n                url=_request_url,\n                headers=_redact_headers(_request_headers),\n            )\n\n        async with self.httpx_client.stream(\n            method=method,\n            url=_request_url,\n            headers=_request_headers,\n            params=_encoded_params if _encoded_params else None,\n            json=json_body,\n            data=data_body,\n            content=content,\n            files=request_files,\n            timeout=timeout,\n        ) as stream:\n            yield stream\n"
  },
  {
    "path": "src/cohere/core/http_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Dict, Generic, TypeVar\n\nimport httpx\n\n# Generic to represent the underlying type of the data wrapped by the HTTP response.\nT = TypeVar(\"T\")\n\n\nclass BaseHttpResponse:\n    \"\"\"Minimalist HTTP response wrapper that exposes response headers and status code.\"\"\"\n\n    _response: httpx.Response\n\n    def __init__(self, response: httpx.Response):\n        self._response = response\n\n    @property\n    def headers(self) -> Dict[str, str]:\n        return dict(self._response.headers)\n\n    @property\n    def status_code(self) -> int:\n        return self._response.status_code\n\n\nclass HttpResponse(Generic[T], BaseHttpResponse):\n    \"\"\"HTTP response wrapper that exposes response headers and data.\"\"\"\n\n    _data: T\n\n    def __init__(self, response: httpx.Response, data: T):\n        super().__init__(response)\n        self._data = data\n\n    @property\n    def data(self) -> T:\n        return self._data\n\n    def close(self) -> None:\n        self._response.close()\n\n\nclass AsyncHttpResponse(Generic[T], BaseHttpResponse):\n    \"\"\"HTTP response wrapper that exposes response headers and data.\"\"\"\n\n    _data: T\n\n    def __init__(self, response: httpx.Response, data: T):\n        super().__init__(response)\n        self._data = data\n\n    @property\n    def data(self) -> T:\n        return self._data\n\n    async def close(self) -> None:\n        await self._response.aclose()\n"
  },
  {
    "path": "src/cohere/core/http_sse/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from ._api import EventSource, aconnect_sse, connect_sse\n    from ._exceptions import SSEError\n    from ._models import ServerSentEvent\n_dynamic_imports: typing.Dict[str, str] = {\n    \"EventSource\": \"._api\",\n    \"SSEError\": \"._exceptions\",\n    \"ServerSentEvent\": \"._models\",\n    \"aconnect_sse\": \"._api\",\n    \"connect_sse\": \"._api\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"EventSource\", \"SSEError\", \"ServerSentEvent\", \"aconnect_sse\", \"connect_sse\"]\n"
  },
  {
    "path": "src/cohere/core/http_sse/_api.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport re\nfrom contextlib import asynccontextmanager, contextmanager\nfrom typing import Any, AsyncGenerator, AsyncIterator, Iterator, cast\n\nimport httpx\nfrom ._decoders import SSEDecoder\nfrom ._exceptions import SSEError\nfrom ._models import ServerSentEvent\n\n\nclass EventSource:\n    def __init__(self, response: httpx.Response) -> None:\n        self._response = response\n\n    def _check_content_type(self) -> None:\n        content_type = self._response.headers.get(\"content-type\", \"\").partition(\";\")[0]\n        if \"text/event-stream\" not in content_type:\n            raise SSEError(\n                f\"Expected response header Content-Type to contain 'text/event-stream', got {content_type!r}\"\n            )\n\n    def _get_charset(self) -> str:\n        \"\"\"Extract charset from Content-Type header, fallback to UTF-8.\"\"\"\n        content_type = self._response.headers.get(\"content-type\", \"\")\n\n        # Parse charset parameter using regex\n        charset_match = re.search(r\"charset=([^;\\s]+)\", content_type, re.IGNORECASE)\n        if charset_match:\n            charset = charset_match.group(1).strip(\"\\\"'\")\n            # Validate that it's a known encoding\n            try:\n                # Test if the charset is valid by trying to encode/decode\n                \"test\".encode(charset).decode(charset)\n                return charset\n            except (LookupError, UnicodeError):\n                # If charset is invalid, fall back to UTF-8\n                pass\n\n        # Default to UTF-8 if no charset specified or invalid charset\n        return \"utf-8\"\n\n    @property\n    def response(self) -> httpx.Response:\n        return self._response\n\n    def iter_sse(self) -> Iterator[ServerSentEvent]:\n        self._check_content_type()\n        decoder = SSEDecoder()\n        charset = self._get_charset()\n\n        buffer = \"\"\n        for chunk in self._response.iter_bytes():\n            # Decode chunk using detected charset\n            text_chunk = chunk.decode(charset, errors=\"replace\")\n            buffer += text_chunk\n\n            # Process complete lines\n            while \"\\n\" in buffer:\n                line, buffer = buffer.split(\"\\n\", 1)\n                line = line.rstrip(\"\\r\")\n                sse = decoder.decode(line)\n                # when we reach a \"\\n\\n\" => line = ''\n                # => decoder will attempt to return an SSE Event\n                if sse is not None:\n                    yield sse\n\n        # Process any remaining data in buffer\n        if buffer.strip():\n            line = buffer.rstrip(\"\\r\")\n            sse = decoder.decode(line)\n            if sse is not None:\n                yield sse\n\n    async def aiter_sse(self) -> AsyncGenerator[ServerSentEvent, None]:\n        self._check_content_type()\n        decoder = SSEDecoder()\n        lines = cast(AsyncGenerator[str, None], self._response.aiter_lines())\n        try:\n            async for line in lines:\n                line = line.rstrip(\"\\n\")\n                sse = decoder.decode(line)\n                if sse is not None:\n                    yield sse\n        finally:\n            await lines.aclose()\n\n\n@contextmanager\ndef connect_sse(client: httpx.Client, method: str, url: str, **kwargs: Any) -> Iterator[EventSource]:\n    headers = kwargs.pop(\"headers\", {})\n    headers[\"Accept\"] = \"text/event-stream\"\n    headers[\"Cache-Control\"] = \"no-store\"\n\n    with client.stream(method, url, headers=headers, **kwargs) as response:\n        yield EventSource(response)\n\n\n@asynccontextmanager\nasync def aconnect_sse(\n    client: httpx.AsyncClient,\n    method: str,\n    url: str,\n    **kwargs: Any,\n) -> AsyncIterator[EventSource]:\n    headers = kwargs.pop(\"headers\", {})\n    headers[\"Accept\"] = \"text/event-stream\"\n    headers[\"Cache-Control\"] = \"no-store\"\n\n    async with client.stream(method, url, headers=headers, **kwargs) as response:\n        yield EventSource(response)\n"
  },
  {
    "path": "src/cohere/core/http_sse/_decoders.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import List, Optional\n\nfrom ._models import ServerSentEvent\n\n\nclass SSEDecoder:\n    def __init__(self) -> None:\n        self._event = \"\"\n        self._data: List[str] = []\n        self._last_event_id = \"\"\n        self._retry: Optional[int] = None\n\n    def decode(self, line: str) -> Optional[ServerSentEvent]:\n        # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501\n\n        if not line:\n            if not self._event and not self._data and not self._last_event_id and self._retry is None:\n                return None\n\n            sse = ServerSentEvent(\n                event=self._event,\n                data=\"\\n\".join(self._data),\n                id=self._last_event_id,\n                retry=self._retry,\n            )\n\n            # NOTE: as per the SSE spec, do not reset last_event_id.\n            self._event = \"\"\n            self._data = []\n            self._retry = None\n\n            return sse\n\n        if line.startswith(\":\"):\n            return None\n\n        fieldname, _, value = line.partition(\":\")\n\n        if value.startswith(\" \"):\n            value = value[1:]\n\n        if fieldname == \"event\":\n            self._event = value\n        elif fieldname == \"data\":\n            self._data.append(value)\n        elif fieldname == \"id\":\n            if \"\\0\" in value:\n                pass\n            else:\n                self._last_event_id = value\n        elif fieldname == \"retry\":\n            try:\n                self._retry = int(value)\n            except (TypeError, ValueError):\n                pass\n        else:\n            pass  # Field is ignored.\n\n        return None\n"
  },
  {
    "path": "src/cohere/core/http_sse/_exceptions.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport httpx\n\n\nclass SSEError(httpx.TransportError):\n    pass\n"
  },
  {
    "path": "src/cohere/core/http_sse/_models.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport json\nfrom dataclasses import dataclass\nfrom typing import Any, Optional\n\n\n@dataclass(frozen=True)\nclass ServerSentEvent:\n    event: str = \"message\"\n    data: str = \"\"\n    id: str = \"\"\n    retry: Optional[int] = None\n\n    def json(self) -> Any:\n        \"\"\"Parse the data field as JSON.\"\"\"\n        return json.loads(self.data)\n"
  },
  {
    "path": "src/cohere/core/jsonable_encoder.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n\"\"\"\njsonable_encoder converts a Python object to a JSON-friendly dict\n(e.g. datetimes to strings, Pydantic models to dicts).\n\nTaken from FastAPI, and made a bit simpler\nhttps://github.com/tiangolo/fastapi/blob/master/fastapi/encoders.py\n\"\"\"\n\nimport base64\nimport dataclasses\nimport datetime as dt\nfrom enum import Enum\nfrom pathlib import PurePath\nfrom types import GeneratorType\nfrom typing import Any, Callable, Dict, List, Optional, Set, Union\n\nimport pydantic\nfrom .datetime_utils import serialize_datetime\nfrom .pydantic_utilities import (\n    IS_PYDANTIC_V2,\n    encode_by_type,\n    to_jsonable_with_fallback,\n)\n\nSetIntStr = Set[Union[int, str]]\nDictIntStrAny = Dict[Union[int, str], Any]\n\n\ndef jsonable_encoder(obj: Any, custom_encoder: Optional[Dict[Any, Callable[[Any], Any]]] = None) -> Any:\n    custom_encoder = custom_encoder or {}\n    # Generated SDKs use Ellipsis (`...`) as the sentinel value for \"OMIT\".\n    # OMIT values should be excluded from serialized payloads.\n    if obj is Ellipsis:\n        return None\n    if custom_encoder:\n        if type(obj) in custom_encoder:\n            return custom_encoder[type(obj)](obj)\n        else:\n            for encoder_type, encoder_instance in custom_encoder.items():\n                if isinstance(obj, encoder_type):\n                    return encoder_instance(obj)\n    if isinstance(obj, pydantic.BaseModel):\n        if IS_PYDANTIC_V2:\n            encoder = getattr(obj.model_config, \"json_encoders\", {})  # type: ignore # Pydantic v2\n        else:\n            encoder = getattr(obj.__config__, \"json_encoders\", {})  # type: ignore # Pydantic v1\n        if custom_encoder:\n            encoder.update(custom_encoder)\n        obj_dict = obj.dict(by_alias=True)\n        if \"__root__\" in obj_dict:\n            obj_dict = obj_dict[\"__root__\"]\n        if \"root\" in obj_dict:\n            obj_dict = obj_dict[\"root\"]\n        return jsonable_encoder(obj_dict, custom_encoder=encoder)\n    if dataclasses.is_dataclass(obj):\n        obj_dict = dataclasses.asdict(obj)  # type: ignore\n        return jsonable_encoder(obj_dict, custom_encoder=custom_encoder)\n    if isinstance(obj, bytes):\n        return base64.b64encode(obj).decode(\"utf-8\")\n    if isinstance(obj, Enum):\n        return obj.value\n    if isinstance(obj, PurePath):\n        return str(obj)\n    if isinstance(obj, (str, int, float, type(None))):\n        return obj\n    if isinstance(obj, dt.datetime):\n        return serialize_datetime(obj)\n    if isinstance(obj, dt.date):\n        return str(obj)\n    if isinstance(obj, dict):\n        encoded_dict = {}\n        allowed_keys = set(obj.keys())\n        for key, value in obj.items():\n            if key in allowed_keys:\n                if value is Ellipsis:\n                    continue\n                encoded_key = jsonable_encoder(key, custom_encoder=custom_encoder)\n                encoded_value = jsonable_encoder(value, custom_encoder=custom_encoder)\n                encoded_dict[encoded_key] = encoded_value\n        return encoded_dict\n    if isinstance(obj, (list, set, frozenset, GeneratorType, tuple)):\n        encoded_list = []\n        for item in obj:\n            if item is Ellipsis:\n                continue\n            encoded_list.append(jsonable_encoder(item, custom_encoder=custom_encoder))\n        return encoded_list\n\n    def fallback_serializer(o: Any) -> Any:\n        attempt_encode = encode_by_type(o)\n        if attempt_encode is not None:\n            return attempt_encode\n\n        try:\n            data = dict(o)\n        except Exception as e:\n            errors: List[Exception] = []\n            errors.append(e)\n            try:\n                data = vars(o)\n            except Exception as e:\n                errors.append(e)\n                raise ValueError(errors) from e\n        return jsonable_encoder(data, custom_encoder=custom_encoder)\n\n    return to_jsonable_with_fallback(obj, fallback_serializer)\n"
  },
  {
    "path": "src/cohere/core/logging.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport logging\nimport typing\n\nLogLevel = typing.Literal[\"debug\", \"info\", \"warn\", \"error\"]\n\n_LOG_LEVEL_MAP: typing.Dict[LogLevel, int] = {\n    \"debug\": 1,\n    \"info\": 2,\n    \"warn\": 3,\n    \"error\": 4,\n}\n\n\nclass ILogger(typing.Protocol):\n    def debug(self, message: str, **kwargs: typing.Any) -> None: ...\n    def info(self, message: str, **kwargs: typing.Any) -> None: ...\n    def warn(self, message: str, **kwargs: typing.Any) -> None: ...\n    def error(self, message: str, **kwargs: typing.Any) -> None: ...\n\n\nclass ConsoleLogger:\n    _logger: logging.Logger\n\n    def __init__(self) -> None:\n        self._logger = logging.getLogger(\"fern\")\n        if not self._logger.handlers:\n            handler = logging.StreamHandler()\n            handler.setFormatter(logging.Formatter(\"%(levelname)s - %(message)s\"))\n            self._logger.addHandler(handler)\n            self._logger.setLevel(logging.DEBUG)\n\n    def debug(self, message: str, **kwargs: typing.Any) -> None:\n        self._logger.debug(message, extra=kwargs)\n\n    def info(self, message: str, **kwargs: typing.Any) -> None:\n        self._logger.info(message, extra=kwargs)\n\n    def warn(self, message: str, **kwargs: typing.Any) -> None:\n        self._logger.warning(message, extra=kwargs)\n\n    def error(self, message: str, **kwargs: typing.Any) -> None:\n        self._logger.error(message, extra=kwargs)\n\n\nclass LogConfig(typing.TypedDict, total=False):\n    level: LogLevel\n    logger: ILogger\n    silent: bool\n\n\nclass Logger:\n    _level: int\n    _logger: ILogger\n    _silent: bool\n\n    def __init__(self, *, level: LogLevel, logger: ILogger, silent: bool) -> None:\n        self._level = _LOG_LEVEL_MAP[level]\n        self._logger = logger\n        self._silent = silent\n\n    def _should_log(self, level: LogLevel) -> bool:\n        return not self._silent and self._level <= _LOG_LEVEL_MAP[level]\n\n    def is_debug(self) -> bool:\n        return self._should_log(\"debug\")\n\n    def is_info(self) -> bool:\n        return self._should_log(\"info\")\n\n    def is_warn(self) -> bool:\n        return self._should_log(\"warn\")\n\n    def is_error(self) -> bool:\n        return self._should_log(\"error\")\n\n    def debug(self, message: str, **kwargs: typing.Any) -> None:\n        if self.is_debug():\n            self._logger.debug(message, **kwargs)\n\n    def info(self, message: str, **kwargs: typing.Any) -> None:\n        if self.is_info():\n            self._logger.info(message, **kwargs)\n\n    def warn(self, message: str, **kwargs: typing.Any) -> None:\n        if self.is_warn():\n            self._logger.warn(message, **kwargs)\n\n    def error(self, message: str, **kwargs: typing.Any) -> None:\n        if self.is_error():\n            self._logger.error(message, **kwargs)\n\n\n_default_logger: Logger = Logger(level=\"info\", logger=ConsoleLogger(), silent=True)\n\n\ndef create_logger(config: typing.Optional[typing.Union[LogConfig, Logger]] = None) -> Logger:\n    if config is None:\n        return _default_logger\n    if isinstance(config, Logger):\n        return config\n    return Logger(\n        level=config.get(\"level\", \"info\"),\n        logger=config.get(\"logger\", ConsoleLogger()),\n        silent=config.get(\"silent\", True),\n    )\n"
  },
  {
    "path": "src/cohere/core/parse_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Any, Dict, Optional\n\n\nclass ParsingError(Exception):\n    \"\"\"\n    Raised when the SDK fails to parse/validate a response from the server.\n    This typically indicates that the server returned a response whose shape\n    does not match the expected schema.\n    \"\"\"\n\n    headers: Optional[Dict[str, str]]\n    status_code: Optional[int]\n    body: Any\n    cause: Optional[Exception]\n\n    def __init__(\n        self,\n        *,\n        headers: Optional[Dict[str, str]] = None,\n        status_code: Optional[int] = None,\n        body: Any = None,\n        cause: Optional[Exception] = None,\n    ) -> None:\n        self.headers = headers\n        self.status_code = status_code\n        self.body = body\n        self.cause = cause\n        super().__init__()\n        if cause is not None:\n            self.__cause__ = cause\n\n    def __str__(self) -> str:\n        cause_str = f\", cause: {self.cause}\" if self.cause is not None else \"\"\n        return f\"headers: {self.headers}, status_code: {self.status_code}, body: {self.body}{cause_str}\"\n"
  },
  {
    "path": "src/cohere/core/pydantic_utilities.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# nopycln: file\nimport datetime as dt\nimport inspect\nimport json\nimport logging\nfrom collections import defaultdict\nfrom dataclasses import asdict\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Callable,\n    ClassVar,\n    Dict,\n    List,\n    Mapping,\n    Optional,\n    Set,\n    Tuple,\n    Type,\n    TypeVar,\n    Union,\n    cast,\n)\n\nimport pydantic\nimport typing_extensions\nfrom pydantic.fields import FieldInfo as _FieldInfo\n\n_logger = logging.getLogger(__name__)\n\nif TYPE_CHECKING:\n    from .http_sse._models import ServerSentEvent\n\nIS_PYDANTIC_V2 = pydantic.VERSION.startswith(\"2.\")\n\nif IS_PYDANTIC_V2:\n    _datetime_adapter = pydantic.TypeAdapter(dt.datetime)  # type: ignore[attr-defined]\n    _date_adapter = pydantic.TypeAdapter(dt.date)  # type: ignore[attr-defined]\n\n    def parse_datetime(value: Any) -> dt.datetime:  # type: ignore[misc]\n        if isinstance(value, dt.datetime):\n            return value\n        return _datetime_adapter.validate_python(value)\n\n    def parse_date(value: Any) -> dt.date:  # type: ignore[misc]\n        if isinstance(value, dt.datetime):\n            return value.date()\n        if isinstance(value, dt.date):\n            return value\n        return _date_adapter.validate_python(value)\n\n    # Avoid importing from pydantic.v1 to maintain Python 3.14 compatibility.\n    from typing import get_args as get_args  # type: ignore[assignment]\n    from typing import get_origin as get_origin  # type: ignore[assignment]\n\n    def is_literal_type(tp: Optional[Type[Any]]) -> bool:  # type: ignore[misc]\n        return typing_extensions.get_origin(tp) is typing_extensions.Literal\n\n    def is_union(tp: Optional[Type[Any]]) -> bool:  # type: ignore[misc]\n        return tp is Union or typing_extensions.get_origin(tp) is Union  # type: ignore[comparison-overlap]\n\n    # Inline encoders_by_type to avoid importing from pydantic.v1.json\n    import re as _re\n    from collections import deque as _deque\n    from decimal import Decimal as _Decimal\n    from enum import Enum as _Enum\n    from ipaddress import (\n        IPv4Address as _IPv4Address,\n    )\n    from ipaddress import (\n        IPv4Interface as _IPv4Interface,\n    )\n    from ipaddress import (\n        IPv4Network as _IPv4Network,\n    )\n    from ipaddress import (\n        IPv6Address as _IPv6Address,\n    )\n    from ipaddress import (\n        IPv6Interface as _IPv6Interface,\n    )\n    from ipaddress import (\n        IPv6Network as _IPv6Network,\n    )\n    from pathlib import Path as _Path\n    from types import GeneratorType as _GeneratorType\n    from uuid import UUID as _UUID\n\n    from pydantic.fields import FieldInfo as ModelField  # type: ignore[no-redef, assignment]\n\n    def _decimal_encoder(dec_value: Any) -> Any:\n        if dec_value.as_tuple().exponent >= 0:\n            return int(dec_value)\n        return float(dec_value)\n\n    encoders_by_type: Dict[Type[Any], Callable[[Any], Any]] = {  # type: ignore[no-redef]\n        bytes: lambda o: o.decode(),\n        dt.date: lambda o: o.isoformat(),\n        dt.datetime: lambda o: o.isoformat(),\n        dt.time: lambda o: o.isoformat(),\n        dt.timedelta: lambda td: td.total_seconds(),\n        _Decimal: _decimal_encoder,\n        _Enum: lambda o: o.value,\n        frozenset: list,\n        _deque: list,\n        _GeneratorType: list,\n        _IPv4Address: str,\n        _IPv4Interface: str,\n        _IPv4Network: str,\n        _IPv6Address: str,\n        _IPv6Interface: str,\n        _IPv6Network: str,\n        _Path: str,\n        _re.Pattern: lambda o: o.pattern,\n        set: list,\n        _UUID: str,\n    }\nelse:\n    from pydantic.datetime_parse import parse_date as parse_date  # type: ignore[no-redef]\n    from pydantic.datetime_parse import parse_datetime as parse_datetime  # type: ignore[no-redef]\n    from pydantic.fields import ModelField as ModelField  # type: ignore[attr-defined, no-redef, assignment]\n    from pydantic.json import ENCODERS_BY_TYPE as encoders_by_type  # type: ignore[no-redef]\n    from pydantic.typing import get_args as get_args  # type: ignore[no-redef]\n    from pydantic.typing import get_origin as get_origin  # type: ignore[no-redef]\n    from pydantic.typing import is_literal_type as is_literal_type  # type: ignore[no-redef, assignment]\n    from pydantic.typing import is_union as is_union  # type: ignore[no-redef]\n\nfrom .datetime_utils import serialize_datetime\nfrom .serialization import convert_and_respect_annotation_metadata\nfrom typing_extensions import TypeAlias\n\nT = TypeVar(\"T\")\nModel = TypeVar(\"Model\", bound=pydantic.BaseModel)\n\n\ndef _get_discriminator_and_variants(type_: Type[Any]) -> Tuple[Optional[str], Optional[List[Type[Any]]]]:\n    \"\"\"\n    Extract the discriminator field name and union variants from a discriminated union type.\n    Supports Annotated[Union[...], Field(discriminator=...)] patterns.\n    Returns (discriminator, variants) or (None, None) if not a discriminated union.\n    \"\"\"\n    origin = typing_extensions.get_origin(type_)\n\n    if origin is typing_extensions.Annotated:\n        args = typing_extensions.get_args(type_)\n        if len(args) >= 2:\n            inner_type = args[0]\n            # Check annotations for discriminator\n            discriminator = None\n            for annotation in args[1:]:\n                if hasattr(annotation, \"discriminator\"):\n                    discriminator = getattr(annotation, \"discriminator\", None)\n                    break\n\n            if discriminator:\n                inner_origin = typing_extensions.get_origin(inner_type)\n                if inner_origin is Union:\n                    variants = list(typing_extensions.get_args(inner_type))\n                    return discriminator, variants\n    return None, None\n\n\ndef _get_field_annotation(model: Type[Any], field_name: str) -> Optional[Type[Any]]:\n    \"\"\"Get the type annotation of a field from a Pydantic model.\"\"\"\n    if IS_PYDANTIC_V2:\n        fields = getattr(model, \"model_fields\", {})\n        field_info = fields.get(field_name)\n        if field_info:\n            return cast(Optional[Type[Any]], field_info.annotation)\n    else:\n        fields = getattr(model, \"__fields__\", {})\n        field_info = fields.get(field_name)\n        if field_info:\n            return cast(Optional[Type[Any]], field_info.outer_type_)\n    return None\n\n\ndef _find_variant_by_discriminator(\n    variants: List[Type[Any]],\n    discriminator: str,\n    discriminator_value: Any,\n) -> Optional[Type[Any]]:\n    \"\"\"Find the union variant that matches the discriminator value.\"\"\"\n    for variant in variants:\n        if not (inspect.isclass(variant) and issubclass(variant, pydantic.BaseModel)):\n            continue\n\n        disc_annotation = _get_field_annotation(variant, discriminator)\n        if disc_annotation and is_literal_type(disc_annotation):\n            literal_args = get_args(disc_annotation)\n            if literal_args and literal_args[0] == discriminator_value:\n                return variant\n    return None\n\n\ndef _is_string_type(type_: Type[Any]) -> bool:\n    \"\"\"Check if a type is str or Optional[str].\"\"\"\n    if type_ is str:\n        return True\n\n    origin = typing_extensions.get_origin(type_)\n    if origin is Union:\n        args = typing_extensions.get_args(type_)\n        # Optional[str] = Union[str, None]\n        non_none_args = [a for a in args if a is not type(None)]\n        if len(non_none_args) == 1 and non_none_args[0] is str:\n            return True\n\n    return False\n\n\ndef parse_sse_obj(sse: \"ServerSentEvent\", type_: Type[T]) -> T:\n    \"\"\"\n    Parse a ServerSentEvent into the appropriate type.\n\n    Handles two scenarios based on where the discriminator field is located:\n\n    1. Data-level discrimination: The discriminator (e.g., 'type') is inside the 'data' payload.\n       The union describes the data content, not the SSE envelope.\n       -> Returns: json.loads(data) parsed into the type\n\n       Example: ChatStreamResponse with discriminator='type'\n       Input:  ServerSentEvent(event=\"message\", data='{\"type\": \"content-delta\", ...}', id=\"\")\n       Output: ContentDeltaEvent (parsed from data, SSE envelope stripped)\n\n    2. Event-level discrimination: The discriminator (e.g., 'event') is at the SSE event level.\n       The union describes the full SSE event structure.\n       -> Returns: SSE envelope with 'data' field JSON-parsed only if the variant expects non-string\n\n       Example: JobStreamResponse with discriminator='event'\n       Input:  ServerSentEvent(event=\"ERROR\", data='{\"code\": \"FAILED\", ...}', id=\"123\")\n       Output: JobStreamResponse_Error with data as ErrorData object\n\n       But for variants where data is str (like STATUS_UPDATE):\n       Input:  ServerSentEvent(event=\"STATUS_UPDATE\", data='{\"status\": \"processing\"}', id=\"1\")\n       Output: JobStreamResponse_StatusUpdate with data as string (not parsed)\n\n    Args:\n        sse: The ServerSentEvent object to parse\n        type_: The target discriminated union type\n\n    Returns:\n        The parsed object of type T\n\n    Note:\n        This function is only available in SDK contexts where http_sse module exists.\n    \"\"\"\n    sse_event = asdict(sse)\n    discriminator, variants = _get_discriminator_and_variants(type_)\n\n    if discriminator is None or variants is None:\n        # Not a discriminated union - parse the data field as JSON\n        data_value = sse_event.get(\"data\")\n        if isinstance(data_value, str) and data_value:\n            try:\n                parsed_data = json.loads(data_value)\n                return parse_obj_as(type_, parsed_data)\n            except json.JSONDecodeError as e:\n                _logger.warning(\n                    \"Failed to parse SSE data field as JSON: %s, data: %s\",\n                    e,\n                    data_value[:100] if len(data_value) > 100 else data_value,\n                )\n        return parse_obj_as(type_, sse_event)\n\n    data_value = sse_event.get(\"data\")\n\n    # Check if discriminator is at the top level (event-level discrimination)\n    if discriminator in sse_event:\n        # Case 2: Event-level discrimination\n        # Find the matching variant to check if 'data' field needs JSON parsing\n        disc_value = sse_event.get(discriminator)\n        matching_variant = _find_variant_by_discriminator(variants, discriminator, disc_value)\n\n        if matching_variant is not None:\n            # Check what type the variant expects for 'data'\n            data_type = _get_field_annotation(matching_variant, \"data\")\n            if data_type is not None and not _is_string_type(data_type):\n                # Variant expects non-string data - parse JSON\n                if isinstance(data_value, str) and data_value:\n                    try:\n                        parsed_data = json.loads(data_value)\n                        new_object = dict(sse_event)\n                        new_object[\"data\"] = parsed_data\n                        return parse_obj_as(type_, new_object)\n                    except json.JSONDecodeError as e:\n                        _logger.warning(\n                            \"Failed to parse SSE data field as JSON for event-level discrimination: %s, data: %s\",\n                            e,\n                            data_value[:100] if len(data_value) > 100 else data_value,\n                        )\n        # Either no matching variant, data is string type, or JSON parse failed\n        return parse_obj_as(type_, sse_event)\n\n    else:\n        # Case 1: Data-level discrimination\n        # The discriminator is inside the data payload - extract and parse data only\n        if isinstance(data_value, str) and data_value:\n            try:\n                parsed_data = json.loads(data_value)\n                return parse_obj_as(type_, parsed_data)\n            except json.JSONDecodeError as e:\n                _logger.warning(\n                    \"Failed to parse SSE data field as JSON for data-level discrimination: %s, data: %s\",\n                    e,\n                    data_value[:100] if len(data_value) > 100 else data_value,\n                )\n        return parse_obj_as(type_, sse_event)\n\n\ndef parse_obj_as(type_: Type[T], object_: Any) -> T:\n    # convert_and_respect_annotation_metadata is required for TypedDict aliasing.\n    #\n    # For Pydantic models, whether we should pre-dealias depends on how the model encodes aliasing:\n    # - If the model uses real Pydantic aliases (pydantic.Field(alias=...)), then we must pass wire keys through\n    #   unchanged so Pydantic can validate them.\n    # - If the model encodes aliasing only via FieldMetadata annotations, then we MUST pre-dealias because Pydantic\n    #   will not recognize those aliases during validation.\n    if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):\n        has_pydantic_aliases = False\n        if IS_PYDANTIC_V2:\n            for field_name, field_info in getattr(type_, \"model_fields\", {}).items():  # type: ignore[attr-defined]\n                alias = getattr(field_info, \"alias\", None)\n                if alias is not None and alias != field_name:\n                    has_pydantic_aliases = True\n                    break\n        else:\n            for field in getattr(type_, \"__fields__\", {}).values():\n                alias = getattr(field, \"alias\", None)\n                name = getattr(field, \"name\", None)\n                if alias is not None and name is not None and alias != name:\n                    has_pydantic_aliases = True\n                    break\n\n        dealiased_object = (\n            object_\n            if has_pydantic_aliases\n            else convert_and_respect_annotation_metadata(object_=object_, annotation=type_, direction=\"read\")\n        )\n    else:\n        dealiased_object = convert_and_respect_annotation_metadata(object_=object_, annotation=type_, direction=\"read\")\n    if IS_PYDANTIC_V2:\n        adapter = pydantic.TypeAdapter(type_)  # type: ignore[attr-defined]\n        return adapter.validate_python(dealiased_object)\n    return pydantic.parse_obj_as(type_, dealiased_object)\n\n\ndef to_jsonable_with_fallback(obj: Any, fallback_serializer: Callable[[Any], Any]) -> Any:\n    if IS_PYDANTIC_V2:\n        from pydantic_core import to_jsonable_python\n\n        return to_jsonable_python(obj, fallback=fallback_serializer)\n    return fallback_serializer(obj)\n\n\nclass UniversalBaseModel(pydantic.BaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(  # type: ignore[typeddict-unknown-key]\n            # Allow fields beginning with `model_` to be used in the model\n            protected_namespaces=(),\n        )\n\n        @pydantic.model_validator(mode=\"before\")  # type: ignore[attr-defined]\n        @classmethod\n        def _coerce_field_names_to_aliases(cls, data: Any) -> Any:\n            \"\"\"\n            Accept Python field names in input by rewriting them to their Pydantic aliases,\n            while avoiding silent collisions when a key could refer to multiple fields.\n            \"\"\"\n            if not isinstance(data, Mapping):\n                return data\n\n            fields = getattr(cls, \"model_fields\", {})  # type: ignore[attr-defined]\n            name_to_alias: Dict[str, str] = {}\n            alias_to_name: Dict[str, str] = {}\n\n            for name, field_info in fields.items():\n                alias = getattr(field_info, \"alias\", None) or name\n                name_to_alias[name] = alias\n                if alias != name:\n                    alias_to_name[alias] = name\n\n            # Detect ambiguous keys: a key that is an alias for one field and a name for another.\n            ambiguous_keys = set(alias_to_name.keys()).intersection(set(name_to_alias.keys()))\n            for key in ambiguous_keys:\n                if key in data and name_to_alias[key] not in data:\n                    raise ValueError(\n                        f\"Ambiguous input key '{key}': it is both a field name and an alias. \"\n                        \"Provide the explicit alias key to disambiguate.\"\n                    )\n\n            original_keys = set(data.keys())\n            rewritten: Dict[str, Any] = dict(data)\n            for name, alias in name_to_alias.items():\n                if alias != name and name in original_keys and alias not in rewritten:\n                    rewritten[alias] = rewritten.pop(name)\n\n            return rewritten\n\n        @pydantic.model_serializer(mode=\"plain\", when_used=\"json\")  # type: ignore[attr-defined]\n        def serialize_model(self) -> Any:  # type: ignore[name-defined]\n            serialized = self.dict()  # type: ignore[attr-defined]\n            data = {k: serialize_datetime(v) if isinstance(v, dt.datetime) else v for k, v in serialized.items()}\n            return data\n\n    else:\n\n        class Config:\n            smart_union = True\n            json_encoders = {dt.datetime: serialize_datetime}\n\n        @pydantic.root_validator(pre=True)\n        def _coerce_field_names_to_aliases(cls, values: Any) -> Any:\n            \"\"\"\n            Pydantic v1 equivalent of _coerce_field_names_to_aliases.\n            \"\"\"\n            if not isinstance(values, Mapping):\n                return values\n\n            fields = getattr(cls, \"__fields__\", {})\n            name_to_alias: Dict[str, str] = {}\n            alias_to_name: Dict[str, str] = {}\n\n            for name, field in fields.items():\n                alias = getattr(field, \"alias\", None) or name\n                name_to_alias[name] = alias\n                if alias != name:\n                    alias_to_name[alias] = name\n\n            ambiguous_keys = set(alias_to_name.keys()).intersection(set(name_to_alias.keys()))\n            for key in ambiguous_keys:\n                if key in values and name_to_alias[key] not in values:\n                    raise ValueError(\n                        f\"Ambiguous input key '{key}': it is both a field name and an alias. \"\n                        \"Provide the explicit alias key to disambiguate.\"\n                    )\n\n            original_keys = set(values.keys())\n            rewritten: Dict[str, Any] = dict(values)\n            for name, alias in name_to_alias.items():\n                if alias != name and name in original_keys and alias not in rewritten:\n                    rewritten[alias] = rewritten.pop(name)\n\n            return rewritten\n\n    @classmethod\n    def model_construct(cls: Type[\"Model\"], _fields_set: Optional[Set[str]] = None, **values: Any) -> \"Model\":\n        dealiased_object = convert_and_respect_annotation_metadata(object_=values, annotation=cls, direction=\"read\")\n        return cls.construct(_fields_set, **dealiased_object)\n\n    @classmethod\n    def construct(cls: Type[\"Model\"], _fields_set: Optional[Set[str]] = None, **values: Any) -> \"Model\":\n        dealiased_object = convert_and_respect_annotation_metadata(object_=values, annotation=cls, direction=\"read\")\n        if IS_PYDANTIC_V2:\n            return super().model_construct(_fields_set, **dealiased_object)  # type: ignore[misc]\n        return super().construct(_fields_set, **dealiased_object)\n\n    def json(self, **kwargs: Any) -> str:\n        kwargs_with_defaults = {\n            \"by_alias\": True,\n            \"exclude_unset\": True,\n            **kwargs,\n        }\n        if IS_PYDANTIC_V2:\n            return super().model_dump_json(**kwargs_with_defaults)  # type: ignore[misc]\n        return super().json(**kwargs_with_defaults)\n\n    def dict(self, **kwargs: Any) -> Dict[str, Any]:\n        \"\"\"\n        Override the default dict method to `exclude_unset` by default. This function patches\n        `exclude_unset` to work include fields within non-None default values.\n        \"\"\"\n        # Note: the logic here is multiplexed given the levers exposed in Pydantic V1 vs V2\n        # Pydantic V1's .dict can be extremely slow, so we do not want to call it twice.\n        #\n        # We'd ideally do the same for Pydantic V2, but it shells out to a library to serialize models\n        # that we have less control over, and this is less intrusive than custom serializers for now.\n        if IS_PYDANTIC_V2:\n            kwargs_with_defaults_exclude_unset = {\n                **kwargs,\n                \"by_alias\": True,\n                \"exclude_unset\": True,\n                \"exclude_none\": False,\n            }\n            kwargs_with_defaults_exclude_none = {\n                **kwargs,\n                \"by_alias\": True,\n                \"exclude_none\": True,\n                \"exclude_unset\": False,\n            }\n            dict_dump = deep_union_pydantic_dicts(\n                super().model_dump(**kwargs_with_defaults_exclude_unset),  # type: ignore[misc]\n                super().model_dump(**kwargs_with_defaults_exclude_none),  # type: ignore[misc]\n            )\n\n        else:\n            _fields_set = self.__fields_set__.copy()\n\n            fields = _get_model_fields(self.__class__)\n            for name, field in fields.items():\n                if name not in _fields_set:\n                    default = _get_field_default(field)\n\n                    # If the default values are non-null act like they've been set\n                    # This effectively allows exclude_unset to work like exclude_none where\n                    # the latter passes through intentionally set none values.\n                    if default is not None or (\"exclude_unset\" in kwargs and not kwargs[\"exclude_unset\"]):\n                        _fields_set.add(name)\n\n                        if default is not None:\n                            self.__fields_set__.add(name)\n\n            kwargs_with_defaults_exclude_unset_include_fields = {\n                \"by_alias\": True,\n                \"exclude_unset\": True,\n                \"include\": _fields_set,\n                **kwargs,\n            }\n\n            dict_dump = super().dict(**kwargs_with_defaults_exclude_unset_include_fields)\n\n        return cast(\n            Dict[str, Any],\n            convert_and_respect_annotation_metadata(object_=dict_dump, annotation=self.__class__, direction=\"write\"),\n        )\n\n\ndef _union_list_of_pydantic_dicts(source: List[Any], destination: List[Any]) -> List[Any]:\n    converted_list: List[Any] = []\n    for i, item in enumerate(source):\n        destination_value = destination[i]\n        if isinstance(item, dict):\n            converted_list.append(deep_union_pydantic_dicts(item, destination_value))\n        elif isinstance(item, list):\n            converted_list.append(_union_list_of_pydantic_dicts(item, destination_value))\n        else:\n            converted_list.append(item)\n    return converted_list\n\n\ndef deep_union_pydantic_dicts(source: Dict[str, Any], destination: Dict[str, Any]) -> Dict[str, Any]:\n    for key, value in source.items():\n        node = destination.setdefault(key, {})\n        if isinstance(value, dict):\n            deep_union_pydantic_dicts(value, node)\n        # Note: we do not do this same processing for sets given we do not have sets of models\n        # and given the sets are unordered, the processing of the set and matching objects would\n        # be non-trivial.\n        elif isinstance(value, list):\n            destination[key] = _union_list_of_pydantic_dicts(value, node)\n        else:\n            destination[key] = value\n\n    return destination\n\n\nif IS_PYDANTIC_V2:\n\n    class V2RootModel(UniversalBaseModel, pydantic.RootModel):  # type: ignore[misc, name-defined, type-arg]\n        pass\n\n    UniversalRootModel: TypeAlias = V2RootModel  # type: ignore[misc]\nelse:\n    UniversalRootModel: TypeAlias = UniversalBaseModel  # type: ignore[misc, no-redef]\n\n\ndef encode_by_type(o: Any) -> Any:\n    encoders_by_class_tuples: Dict[Callable[[Any], Any], Tuple[Any, ...]] = defaultdict(tuple)\n    for type_, encoder in encoders_by_type.items():\n        encoders_by_class_tuples[encoder] += (type_,)\n\n    if type(o) in encoders_by_type:\n        return encoders_by_type[type(o)](o)\n    for encoder, classes_tuple in encoders_by_class_tuples.items():\n        if isinstance(o, classes_tuple):\n            return encoder(o)\n\n\ndef update_forward_refs(model: Type[\"Model\"], **localns: Any) -> None:\n    if IS_PYDANTIC_V2:\n        model.model_rebuild(raise_errors=False)  # type: ignore[attr-defined]\n    else:\n        model.update_forward_refs(**localns)\n\n\n# Mirrors Pydantic's internal typing\nAnyCallable = Callable[..., Any]\n\n\ndef universal_root_validator(\n    pre: bool = False,\n) -> Callable[[AnyCallable], AnyCallable]:\n    def decorator(func: AnyCallable) -> AnyCallable:\n        if IS_PYDANTIC_V2:\n            # In Pydantic v2, for RootModel we always use \"before\" mode\n            # The custom validators transform the input value before the model is created\n            return cast(AnyCallable, pydantic.model_validator(mode=\"before\")(func))  # type: ignore[attr-defined]\n        return cast(AnyCallable, pydantic.root_validator(pre=pre)(func))  # type: ignore[call-overload]\n\n    return decorator\n\n\ndef universal_field_validator(field_name: str, pre: bool = False) -> Callable[[AnyCallable], AnyCallable]:\n    def decorator(func: AnyCallable) -> AnyCallable:\n        if IS_PYDANTIC_V2:\n            return cast(AnyCallable, pydantic.field_validator(field_name, mode=\"before\" if pre else \"after\")(func))  # type: ignore[attr-defined]\n        return cast(AnyCallable, pydantic.validator(field_name, pre=pre)(func))\n\n    return decorator\n\n\nPydanticField = Union[ModelField, _FieldInfo]\n\n\ndef _get_model_fields(model: Type[\"Model\"]) -> Mapping[str, PydanticField]:\n    if IS_PYDANTIC_V2:\n        return cast(Mapping[str, PydanticField], model.model_fields)  # type: ignore[attr-defined]\n    return cast(Mapping[str, PydanticField], model.__fields__)\n\n\ndef _get_field_default(field: PydanticField) -> Any:\n    try:\n        value = field.get_default()  # type: ignore[union-attr]\n    except:\n        value = field.default\n    if IS_PYDANTIC_V2:\n        from pydantic_core import PydanticUndefined\n\n        if value == PydanticUndefined:\n            return None\n        return value\n    return value\n"
  },
  {
    "path": "src/cohere/core/query_encoder.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Any, Dict, List, Optional, Tuple\n\nimport pydantic\n\n\n# Flattens dicts to be of the form {\"key[subkey][subkey2]\": value} where value is not a dict\ndef traverse_query_dict(dict_flat: Dict[str, Any], key_prefix: Optional[str] = None) -> List[Tuple[str, Any]]:\n    result = []\n    for k, v in dict_flat.items():\n        key = f\"{key_prefix}[{k}]\" if key_prefix is not None else k\n        if isinstance(v, dict):\n            result.extend(traverse_query_dict(v, key))\n        elif isinstance(v, list):\n            for arr_v in v:\n                if isinstance(arr_v, dict):\n                    result.extend(traverse_query_dict(arr_v, key))\n                else:\n                    result.append((key, arr_v))\n        else:\n            result.append((key, v))\n    return result\n\n\ndef single_query_encoder(query_key: str, query_value: Any) -> List[Tuple[str, Any]]:\n    if isinstance(query_value, pydantic.BaseModel) or isinstance(query_value, dict):\n        if isinstance(query_value, pydantic.BaseModel):\n            obj_dict = query_value.dict(by_alias=True)\n        else:\n            obj_dict = query_value\n        return traverse_query_dict(obj_dict, query_key)\n    elif isinstance(query_value, list):\n        encoded_values: List[Tuple[str, Any]] = []\n        for value in query_value:\n            if isinstance(value, pydantic.BaseModel) or isinstance(value, dict):\n                if isinstance(value, pydantic.BaseModel):\n                    obj_dict = value.dict(by_alias=True)\n                elif isinstance(value, dict):\n                    obj_dict = value\n\n                encoded_values.extend(single_query_encoder(query_key, obj_dict))\n            else:\n                encoded_values.append((query_key, value))\n\n        return encoded_values\n\n    return [(query_key, query_value)]\n\n\ndef encode_query(query: Optional[Dict[str, Any]]) -> Optional[List[Tuple[str, Any]]]:\n    if query is None:\n        return None\n\n    encoded_query = []\n    for k, v in query.items():\n        encoded_query.extend(single_query_encoder(k, v))\n    return encoded_query\n"
  },
  {
    "path": "src/cohere/core/remove_none_from_dict.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom typing import Any, Dict, Mapping, Optional\n\n\ndef remove_none_from_dict(original: Mapping[str, Optional[Any]]) -> Dict[str, Any]:\n    new: Dict[str, Any] = {}\n    for key, value in original.items():\n        if value is not None:\n            new[key] = value\n    return new\n"
  },
  {
    "path": "src/cohere/core/request_options.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\ntry:\n    from typing import NotRequired  # type: ignore\nexcept ImportError:\n    from typing_extensions import NotRequired\n\n\nclass RequestOptions(typing.TypedDict, total=False):\n    \"\"\"\n    Additional options for request-specific configuration when calling APIs via the SDK.\n    This is used primarily as an optional final parameter for service functions.\n\n    Attributes:\n        - timeout_in_seconds: int. The number of seconds to await an API call before timing out.\n\n        - max_retries: int. The max number of retries to attempt if the API call fails.\n\n        - additional_headers: typing.Dict[str, typing.Any]. A dictionary containing additional parameters to spread into the request's header dict\n\n        - additional_query_parameters: typing.Dict[str, typing.Any]. A dictionary containing additional parameters to spread into the request's query parameters dict\n\n        - additional_body_parameters: typing.Dict[str, typing.Any]. A dictionary containing additional parameters to spread into the request's body parameters dict\n\n        - chunk_size: int. The size, in bytes, to process each chunk of data being streamed back within the response. This equates to leveraging `chunk_size` within `requests` or `httpx`, and is only leveraged for file downloads.\n    \"\"\"\n\n    timeout_in_seconds: NotRequired[int]\n    max_retries: NotRequired[int]\n    additional_headers: NotRequired[typing.Dict[str, typing.Any]]\n    additional_query_parameters: NotRequired[typing.Dict[str, typing.Any]]\n    additional_body_parameters: NotRequired[typing.Dict[str, typing.Any]]\n    chunk_size: NotRequired[int]\n"
  },
  {
    "path": "src/cohere/core/serialization.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport collections\nimport inspect\nimport typing\n\nimport pydantic\nimport typing_extensions\n\n\nclass FieldMetadata:\n    \"\"\"\n    Metadata class used to annotate fields to provide additional information.\n\n    Example:\n    class MyDict(TypedDict):\n        field: typing.Annotated[str, FieldMetadata(alias=\"field_name\")]\n\n    Will serialize: `{\"field\": \"value\"}`\n    To: `{\"field_name\": \"value\"}`\n    \"\"\"\n\n    alias: str\n\n    def __init__(self, *, alias: str) -> None:\n        self.alias = alias\n\n\ndef convert_and_respect_annotation_metadata(\n    *,\n    object_: typing.Any,\n    annotation: typing.Any,\n    inner_type: typing.Optional[typing.Any] = None,\n    direction: typing.Literal[\"read\", \"write\"],\n) -> typing.Any:\n    \"\"\"\n    Respect the metadata annotations on a field, such as aliasing. This function effectively\n    manipulates the dict-form of an object to respect the metadata annotations. This is primarily used for\n    TypedDicts, which cannot support aliasing out of the box, and can be extended for additional\n    utilities, such as defaults.\n\n    Parameters\n    ----------\n    object_ : typing.Any\n\n    annotation : type\n        The type we're looking to apply typing annotations from\n\n    inner_type : typing.Optional[type]\n\n    Returns\n    -------\n    typing.Any\n    \"\"\"\n\n    if object_ is None:\n        return None\n    if inner_type is None:\n        inner_type = annotation\n\n    clean_type = _remove_annotations(inner_type)\n    # Pydantic models\n    if (\n        inspect.isclass(clean_type)\n        and issubclass(clean_type, pydantic.BaseModel)\n        and isinstance(object_, typing.Mapping)\n    ):\n        return _convert_mapping(object_, clean_type, direction)\n    # TypedDicts\n    if typing_extensions.is_typeddict(clean_type) and isinstance(object_, typing.Mapping):\n        return _convert_mapping(object_, clean_type, direction)\n\n    if (\n        typing_extensions.get_origin(clean_type) == typing.Dict\n        or typing_extensions.get_origin(clean_type) == dict\n        or clean_type == typing.Dict\n    ) and isinstance(object_, typing.Dict):\n        key_type = typing_extensions.get_args(clean_type)[0]\n        value_type = typing_extensions.get_args(clean_type)[1]\n\n        return {\n            key: convert_and_respect_annotation_metadata(\n                object_=value,\n                annotation=annotation,\n                inner_type=value_type,\n                direction=direction,\n            )\n            for key, value in object_.items()\n        }\n\n    # If you're iterating on a string, do not bother to coerce it to a sequence.\n    if not isinstance(object_, str):\n        if (\n            typing_extensions.get_origin(clean_type) == typing.Set\n            or typing_extensions.get_origin(clean_type) == set\n            or clean_type == typing.Set\n        ) and isinstance(object_, typing.Set):\n            inner_type = typing_extensions.get_args(clean_type)[0]\n            return {\n                convert_and_respect_annotation_metadata(\n                    object_=item,\n                    annotation=annotation,\n                    inner_type=inner_type,\n                    direction=direction,\n                )\n                for item in object_\n            }\n        elif (\n            (\n                typing_extensions.get_origin(clean_type) == typing.List\n                or typing_extensions.get_origin(clean_type) == list\n                or clean_type == typing.List\n            )\n            and isinstance(object_, typing.List)\n        ) or (\n            (\n                typing_extensions.get_origin(clean_type) == typing.Sequence\n                or typing_extensions.get_origin(clean_type) == collections.abc.Sequence\n                or clean_type == typing.Sequence\n            )\n            and isinstance(object_, typing.Sequence)\n        ):\n            inner_type = typing_extensions.get_args(clean_type)[0]\n            return [\n                convert_and_respect_annotation_metadata(\n                    object_=item,\n                    annotation=annotation,\n                    inner_type=inner_type,\n                    direction=direction,\n                )\n                for item in object_\n            ]\n\n    if typing_extensions.get_origin(clean_type) == typing.Union:\n        # We should be able to ~relatively~ safely try to convert keys against all\n        # member types in the union, the edge case here is if one member aliases a field\n        # of the same name to a different name from another member\n        # Or if another member aliases a field of the same name that another member does not.\n        for member in typing_extensions.get_args(clean_type):\n            object_ = convert_and_respect_annotation_metadata(\n                object_=object_,\n                annotation=annotation,\n                inner_type=member,\n                direction=direction,\n            )\n        return object_\n\n    annotated_type = _get_annotation(annotation)\n    if annotated_type is None:\n        return object_\n\n    # If the object is not a TypedDict, a Union, or other container (list, set, sequence, etc.)\n    # Then we can safely call it on the recursive conversion.\n    return object_\n\n\ndef _convert_mapping(\n    object_: typing.Mapping[str, object],\n    expected_type: typing.Any,\n    direction: typing.Literal[\"read\", \"write\"],\n) -> typing.Mapping[str, object]:\n    converted_object: typing.Dict[str, object] = {}\n    try:\n        annotations = typing_extensions.get_type_hints(expected_type, include_extras=True)\n    except NameError:\n        # The TypedDict contains a circular reference, so\n        # we use the __annotations__ attribute directly.\n        annotations = getattr(expected_type, \"__annotations__\", {})\n    aliases_to_field_names = _get_alias_to_field_name(annotations)\n    for key, value in object_.items():\n        if direction == \"read\" and key in aliases_to_field_names:\n            dealiased_key = aliases_to_field_names.get(key)\n            if dealiased_key is not None:\n                type_ = annotations.get(dealiased_key)\n        else:\n            type_ = annotations.get(key)\n        # Note you can't get the annotation by the field name if you're in read mode, so you must check the aliases map\n        #\n        # So this is effectively saying if we're in write mode, and we don't have a type, or if we're in read mode and we don't have an alias\n        # then we can just pass the value through as is\n        if type_ is None:\n            converted_object[key] = value\n        elif direction == \"read\" and key not in aliases_to_field_names:\n            converted_object[key] = convert_and_respect_annotation_metadata(\n                object_=value, annotation=type_, direction=direction\n            )\n        else:\n            converted_object[_alias_key(key, type_, direction, aliases_to_field_names)] = (\n                convert_and_respect_annotation_metadata(object_=value, annotation=type_, direction=direction)\n            )\n    return converted_object\n\n\ndef _get_annotation(type_: typing.Any) -> typing.Optional[typing.Any]:\n    maybe_annotated_type = typing_extensions.get_origin(type_)\n    if maybe_annotated_type is None:\n        return None\n\n    if maybe_annotated_type == typing_extensions.NotRequired:\n        type_ = typing_extensions.get_args(type_)[0]\n        maybe_annotated_type = typing_extensions.get_origin(type_)\n\n    if maybe_annotated_type == typing_extensions.Annotated:\n        return type_\n\n    return None\n\n\ndef _remove_annotations(type_: typing.Any) -> typing.Any:\n    maybe_annotated_type = typing_extensions.get_origin(type_)\n    if maybe_annotated_type is None:\n        return type_\n\n    if maybe_annotated_type == typing_extensions.NotRequired:\n        return _remove_annotations(typing_extensions.get_args(type_)[0])\n\n    if maybe_annotated_type == typing_extensions.Annotated:\n        return _remove_annotations(typing_extensions.get_args(type_)[0])\n\n    return type_\n\n\ndef get_alias_to_field_mapping(type_: typing.Any) -> typing.Dict[str, str]:\n    annotations = typing_extensions.get_type_hints(type_, include_extras=True)\n    return _get_alias_to_field_name(annotations)\n\n\ndef get_field_to_alias_mapping(type_: typing.Any) -> typing.Dict[str, str]:\n    annotations = typing_extensions.get_type_hints(type_, include_extras=True)\n    return _get_field_to_alias_name(annotations)\n\n\ndef _get_alias_to_field_name(\n    field_to_hint: typing.Dict[str, typing.Any],\n) -> typing.Dict[str, str]:\n    aliases = {}\n    for field, hint in field_to_hint.items():\n        maybe_alias = _get_alias_from_type(hint)\n        if maybe_alias is not None:\n            aliases[maybe_alias] = field\n    return aliases\n\n\ndef _get_field_to_alias_name(\n    field_to_hint: typing.Dict[str, typing.Any],\n) -> typing.Dict[str, str]:\n    aliases = {}\n    for field, hint in field_to_hint.items():\n        maybe_alias = _get_alias_from_type(hint)\n        if maybe_alias is not None:\n            aliases[field] = maybe_alias\n    return aliases\n\n\ndef _get_alias_from_type(type_: typing.Any) -> typing.Optional[str]:\n    maybe_annotated_type = _get_annotation(type_)\n\n    if maybe_annotated_type is not None:\n        # The actual annotations are 1 onward, the first is the annotated type\n        annotations = typing_extensions.get_args(maybe_annotated_type)[1:]\n\n        for annotation in annotations:\n            if isinstance(annotation, FieldMetadata) and annotation.alias is not None:\n                return annotation.alias\n    return None\n\n\ndef _alias_key(\n    key: str,\n    type_: typing.Any,\n    direction: typing.Literal[\"read\", \"write\"],\n    aliases_to_field_names: typing.Dict[str, str],\n) -> str:\n    if direction == \"read\":\n        return aliases_to_field_names.get(key, key)\n    return _get_alias_from_type(type_=type_) or key\n"
  },
  {
    "path": "src/cohere/core/unchecked_base_model.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport enum\nimport inspect\nimport sys\nimport typing\nimport uuid\n\nimport pydantic\nimport typing_extensions\nfrom .pydantic_utilities import (  # type: ignore[attr-defined]\n    IS_PYDANTIC_V2,\n    ModelField,\n    UniversalBaseModel,\n    get_args,\n    get_origin,\n    is_literal_type,\n    is_union,\n    parse_date,\n    parse_datetime,\n    parse_obj_as,\n)\nfrom .serialization import get_field_to_alias_mapping\nfrom pydantic_core import PydanticUndefined\n\n\nclass UnionMetadata:\n    discriminant: str\n\n    def __init__(self, *, discriminant: str) -> None:\n        self.discriminant = discriminant\n\n\nModel = typing.TypeVar(\"Model\", bound=pydantic.BaseModel)\n\n\ndef _maybe_resolve_forward_ref(\n    type_: typing.Any,\n    host: typing.Optional[typing.Type[typing.Any]],\n) -> typing.Any:\n    \"\"\"Resolve a ForwardRef using the module where *host* is defined.\n\n    Pydantic v2 + ``from __future__ import annotations`` can leave field\n    annotations as ``list[ForwardRef('Block')]`` even after ``model_rebuild``.\n    Without resolution, ``construct_type`` sees a ForwardRef (not a class) and\n    skips recursive model construction, leaving nested data as raw dicts.\n    \"\"\"\n    if host is None or not isinstance(type_, typing.ForwardRef):\n        return type_\n    mod = sys.modules.get(host.__module__)\n    if mod is None:\n        return type_\n    try:\n        return eval(type_.__forward_arg__, vars(mod))\n    except Exception:\n        return type_\n\n\nclass UncheckedBaseModel(UniversalBaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            extra = pydantic.Extra.allow\n\n    @classmethod\n    def model_construct(\n        cls: typing.Type[\"Model\"],\n        _fields_set: typing.Optional[typing.Set[str]] = None,\n        **values: typing.Any,\n    ) -> \"Model\":\n        # Fallback construct function to the specified override below.\n        return cls.construct(_fields_set=_fields_set, **values)\n\n    # Allow construct to not validate model\n    # Implementation taken from: https://github.com/pydantic/pydantic/issues/1168#issuecomment-817742836\n    @classmethod\n    def construct(\n        cls: typing.Type[\"Model\"],\n        _fields_set: typing.Optional[typing.Set[str]] = None,\n        **values: typing.Any,\n    ) -> \"Model\":\n        m = cls.__new__(cls)\n        fields_values = {}\n\n        if _fields_set is None:\n            _fields_set = set(values.keys())\n\n        fields = _get_model_fields(cls)\n        populate_by_name = _get_is_populate_by_name(cls)\n        field_aliases = get_field_to_alias_mapping(cls)\n\n        for name, field in fields.items():\n            # Key here is only used to pull data from the values dict\n            # you should always use the NAME of the field to for field_values, etc.\n            # because that's how the object is constructed from a pydantic perspective\n            key = field.alias\n            if (key is None or field.alias == name) and name in field_aliases:\n                key = field_aliases[name]\n\n            if key is None or (key not in values and populate_by_name):  # Added this to allow population by field name\n                key = name\n\n            if key in values:\n                if IS_PYDANTIC_V2:\n                    type_ = field.annotation  # type: ignore # Pydantic v2\n                else:\n                    type_ = typing.cast(typing.Type, field.outer_type_)  # type: ignore # Pydantic < v1.10.15\n\n                fields_values[name] = (\n                    construct_type(object_=values[key], type_=type_, host=cls) if type_ is not None else values[key]\n                )\n                _fields_set.add(name)\n            else:\n                default = _get_field_default(field)\n                fields_values[name] = default\n\n                # If the default values are non-null act like they've been set\n                # This effectively allows exclude_unset to work like exclude_none where\n                # the latter passes through intentionally set none values.\n                if default != None and default != PydanticUndefined:\n                    _fields_set.add(name)\n\n        # Add extras back in\n        extras = {}\n        pydantic_alias_fields = [field.alias for field in fields.values()]\n        internal_alias_fields = list(field_aliases.values())\n        for key, value in values.items():\n            # If the key is not a field by name, nor an alias to a field, then it's extra\n            if (key not in pydantic_alias_fields and key not in internal_alias_fields) and key not in fields:\n                if IS_PYDANTIC_V2:\n                    extras[key] = value\n                else:\n                    _fields_set.add(key)\n                    fields_values[key] = value\n\n        object.__setattr__(m, \"__dict__\", fields_values)\n\n        if IS_PYDANTIC_V2:\n            object.__setattr__(m, \"__pydantic_private__\", None)\n            object.__setattr__(m, \"__pydantic_extra__\", extras)\n            object.__setattr__(m, \"__pydantic_fields_set__\", _fields_set)\n        else:\n            object.__setattr__(m, \"__fields_set__\", _fields_set)\n            m._init_private_attributes()  # type: ignore # Pydantic v1\n        return m\n\n\ndef _validate_collection_items_compatible(collection: typing.Any, target_type: typing.Type[typing.Any]) -> bool:\n    \"\"\"\n    Validate that all items in a collection are compatible with the target type.\n\n    Args:\n        collection: The collection to validate (list, set, or dict values)\n        target_type: The target type to validate against\n\n    Returns:\n        True if all items are compatible, False otherwise\n    \"\"\"\n    if inspect.isclass(target_type) and issubclass(target_type, pydantic.BaseModel):\n        for item in collection:\n            try:\n                # Try to validate the item against the target type\n                if isinstance(item, dict):\n                    parse_obj_as(target_type, item)\n                else:\n                    # If it's not a dict, it might already be the right type\n                    if not isinstance(item, target_type):\n                        return False\n            except Exception:\n                return False\n    return True\n\n\ndef _get_literal_field_value(\n    inner_type: typing.Type[typing.Any], field_name: str, field: typing.Any, object_: typing.Any\n) -> typing.Any:\n    \"\"\"Get the value of a Literal field from *object_*, checking both alias and field name.\"\"\"\n    name_or_alias = get_field_to_alias_mapping(inner_type).get(field_name, field_name)\n    pydantic_alias = getattr(field, \"alias\", None)\n    if isinstance(object_, dict):\n        if name_or_alias in object_:\n            return object_[name_or_alias]\n        if pydantic_alias and pydantic_alias != name_or_alias and pydantic_alias in object_:\n            return object_[pydantic_alias]\n        return None\n    return getattr(object_, name_or_alias, getattr(object_, pydantic_alias, None) if pydantic_alias else None)\n\n\ndef _literal_fields_match_strict(inner_type: typing.Type[typing.Any], object_: typing.Any) -> bool:\n    \"\"\"Return True iff every Literal-typed field in *inner_type* is **present** in\n    *object_* and its value equals the field's declared default.\n\n    This prevents models whose fields are all optional (e.g. ``FigureDetails``)\n    from vacuously matching inputs that don't carry the discriminant key at all\n    (e.g. ``{}`` for text blocks).  For types with no Literal fields this\n    returns True unconditionally.\n    \"\"\"\n    fields = _get_model_fields(inner_type)\n    for field_name, field in fields.items():\n        if IS_PYDANTIC_V2:\n            field_type = field.annotation  # type: ignore # Pydantic v2\n        else:\n            field_type = field.outer_type_  # type: ignore # Pydantic v1\n\n        if is_literal_type(field_type):  # type: ignore[arg-type]\n            field_default = _get_field_default(field)\n            object_value = _get_literal_field_value(inner_type, field_name, field, object_)\n            if field_default != object_value:\n                return False\n    return True\n\n\ndef _convert_undiscriminated_union_type(\n    union_type: typing.Type[typing.Any],\n    object_: typing.Any,\n    host: typing.Optional[typing.Type[typing.Any]] = None,\n) -> typing.Any:\n    inner_types = get_args(union_type)\n    if typing.Any in inner_types:\n        return object_\n\n    # When any union member carries a Literal discriminant field, require the\n    # discriminant key to be present AND matching before accepting a candidate.\n    # This prevents models with all-optional fields (e.g. FigureDetails) from\n    # greedily matching inputs that belong to a different variant or to a\n    # plain-dict fallback (e.g. EmptyBlockDetails = Dict[str, Any]).\n    has_literal_discriminant = any(\n        inspect.isclass(t)\n        and issubclass(t, pydantic.BaseModel)\n        and any(\n            is_literal_type(\n                f.annotation if IS_PYDANTIC_V2 else f.outer_type_  # type: ignore\n            )\n            for f in _get_model_fields(t).values()\n        )\n        for t in inner_types\n    )\n\n    for inner_type in inner_types:\n        # Handle lists of objects that need parsing\n        if get_origin(inner_type) is list and isinstance(object_, list):\n            list_inner_type = _maybe_resolve_forward_ref(get_args(inner_type)[0], host)\n            try:\n                if inspect.isclass(list_inner_type) and issubclass(list_inner_type, pydantic.BaseModel):\n                    # Validate that all items in the list are compatible with the target type\n                    if _validate_collection_items_compatible(object_, list_inner_type):\n                        parsed_list = [parse_obj_as(object_=item, type_=list_inner_type) for item in object_]\n                        return parsed_list\n            except Exception:\n                pass\n\n        try:\n            if inspect.isclass(inner_type) and issubclass(inner_type, pydantic.BaseModel):\n                if has_literal_discriminant and not _literal_fields_match_strict(inner_type, object_):\n                    continue\n                # Attempt a validated parse until one works\n                return parse_obj_as(inner_type, object_)\n        except Exception:\n            continue\n\n    # First pass: try types where all literal fields match the object's values.\n    for inner_type in inner_types:\n        if inspect.isclass(inner_type) and issubclass(inner_type, pydantic.BaseModel):\n            if has_literal_discriminant:\n                if not _literal_fields_match_strict(inner_type, object_):\n                    continue\n            else:\n                # Legacy lenient check: skip only when a Literal value is\n                # present but doesn't match (allows absent-discriminant inputs).\n                fields = _get_model_fields(inner_type)\n                literal_fields_match = True\n                for field_name, field in fields.items():\n                    if IS_PYDANTIC_V2:\n                        field_type = field.annotation  # type: ignore # Pydantic v2\n                    else:\n                        field_type = field.outer_type_  # type: ignore # Pydantic v1\n\n                    if is_literal_type(field_type):  # type: ignore[arg-type]\n                        field_default = _get_field_default(field)\n                        object_value = _get_literal_field_value(inner_type, field_name, field, object_)\n                        if object_value is not None and field_default != object_value:\n                            literal_fields_match = False\n                            break\n\n                if not literal_fields_match:\n                    continue\n\n            try:\n                return construct_type(object_=object_, type_=inner_type, host=host)\n            except Exception:\n                continue\n\n    # Second pass: if no literal matches, return the first successful cast.\n    # When a Literal discriminant is present, skip Pydantic models whose\n    # discriminant doesn't match so that plain-dict fallback types are reached.\n    for inner_type in inner_types:\n        try:\n            if has_literal_discriminant and inspect.isclass(inner_type) and issubclass(inner_type, pydantic.BaseModel):\n                if not _literal_fields_match_strict(inner_type, object_):\n                    continue\n            return construct_type(object_=object_, type_=inner_type, host=host)\n        except Exception:\n            continue\n\n\ndef _convert_union_type(\n    type_: typing.Type[typing.Any],\n    object_: typing.Any,\n    host: typing.Optional[typing.Type[typing.Any]] = None,\n) -> typing.Any:\n    base_type = get_origin(type_) or type_\n    union_type = type_\n    if base_type == typing_extensions.Annotated:  # type: ignore[comparison-overlap]\n        union_type = get_args(type_)[0]\n        annotated_metadata = get_args(type_)[1:]\n        for metadata in annotated_metadata:\n            if isinstance(metadata, UnionMetadata):\n                try:\n                    # Cast to the correct type, based on the discriminant\n                    for inner_type in get_args(union_type):\n                        try:\n                            objects_discriminant = getattr(object_, metadata.discriminant)\n                        except:\n                            objects_discriminant = object_[metadata.discriminant]\n                        if inner_type.__fields__[metadata.discriminant].default == objects_discriminant:\n                            return construct_type(object_=object_, type_=inner_type, host=host)\n                except Exception:\n                    # Allow to fall through to our regular union handling\n                    pass\n    return _convert_undiscriminated_union_type(union_type, object_, host)\n\n\ndef construct_type(\n    *,\n    type_: typing.Type[typing.Any],\n    object_: typing.Any,\n    host: typing.Optional[typing.Type[typing.Any]] = None,\n) -> typing.Any:\n    \"\"\"\n    Here we are essentially creating the same `construct` method in spirit as the above, but for all types, not just\n    Pydantic models.\n    The idea is to essentially attempt to coerce object_ to type_ (recursively)\n    \"\"\"\n    # Short circuit when dealing with optionals, don't try to coerces None to a type\n    if object_ is None:\n        return None\n\n    base_type = get_origin(type_) or type_\n    is_annotated = base_type == typing_extensions.Annotated  # type: ignore[comparison-overlap]\n    maybe_annotation_members = get_args(type_)\n    is_annotated_union = is_annotated and is_union(get_origin(maybe_annotation_members[0]))\n\n    if base_type == typing.Any:  # type: ignore[comparison-overlap]\n        return object_\n\n    if base_type == dict:\n        if not isinstance(object_, typing.Mapping):\n            return object_\n\n        key_type, items_type = get_args(type_)\n        key_type = _maybe_resolve_forward_ref(key_type, host)\n        items_type = _maybe_resolve_forward_ref(items_type, host)\n        d = {\n            construct_type(object_=key, type_=key_type, host=host): construct_type(\n                object_=item, type_=items_type, host=host\n            )\n            for key, item in object_.items()\n        }\n        return d\n\n    if base_type == list:\n        if not isinstance(object_, list):\n            return object_\n\n        inner_type = _maybe_resolve_forward_ref(get_args(type_)[0], host)\n        return [construct_type(object_=entry, type_=inner_type, host=host) for entry in object_]\n\n    if base_type == set:\n        if not isinstance(object_, set) and not isinstance(object_, list):\n            return object_\n\n        inner_type = _maybe_resolve_forward_ref(get_args(type_)[0], host)\n        return {construct_type(object_=entry, type_=inner_type, host=host) for entry in object_}\n\n    if is_union(base_type) or is_annotated_union:\n        return _convert_union_type(type_, object_, host)\n\n    # Cannot do an `issubclass` with a literal type, let's also just confirm we have a class before this call\n    if (\n        object_ is not None\n        and not is_literal_type(type_)\n        and (\n            (inspect.isclass(base_type) and issubclass(base_type, pydantic.BaseModel))\n            or (\n                is_annotated\n                and inspect.isclass(maybe_annotation_members[0])\n                and issubclass(maybe_annotation_members[0], pydantic.BaseModel)\n            )\n        )\n    ):\n        if IS_PYDANTIC_V2:\n            return type_.model_construct(**object_)\n        else:\n            return type_.construct(**object_)\n\n    if base_type == dt.datetime:\n        try:\n            return parse_datetime(object_)\n        except Exception:\n            return object_\n\n    if base_type == dt.date:\n        try:\n            return parse_date(object_)\n        except Exception:\n            return object_\n\n    if base_type == uuid.UUID:\n        try:\n            return uuid.UUID(object_)\n        except Exception:\n            return object_\n\n    if base_type == int:\n        try:\n            return int(object_)\n        except Exception:\n            return object_\n\n    if base_type == bool:\n        try:\n            if isinstance(object_, str):\n                stringified_object = object_.lower()\n                return stringified_object == \"true\" or stringified_object == \"1\"\n\n            return bool(object_)\n        except Exception:\n            return object_\n\n    if inspect.isclass(base_type) and issubclass(base_type, enum.Enum):\n        try:\n            return base_type(object_)\n        except (ValueError, KeyError):\n            return object_\n\n    return object_\n\n\ndef _get_is_populate_by_name(model: typing.Type[\"Model\"]) -> bool:\n    if IS_PYDANTIC_V2:\n        return model.model_config.get(\"populate_by_name\", False)  # type: ignore # Pydantic v2\n    return model.__config__.allow_population_by_field_name  # type: ignore # Pydantic v1\n\n\nfrom pydantic.fields import FieldInfo as _FieldInfo\n\nPydanticField = typing.Union[ModelField, _FieldInfo]\n\n\n# Pydantic V1 swapped the typing of __fields__'s values from ModelField to FieldInfo\n# And so we try to handle both V1 cases, as well as V2 (FieldInfo from model.model_fields)\ndef _get_model_fields(\n    model: typing.Type[\"Model\"],\n) -> typing.Mapping[str, PydanticField]:\n    if IS_PYDANTIC_V2:\n        return model.model_fields  # type: ignore # Pydantic v2\n    else:\n        return model.__fields__  # type: ignore # Pydantic v1\n\n\ndef _get_field_default(field: PydanticField) -> typing.Any:\n    try:\n        value = field.get_default()  # type: ignore # Pydantic < v1.10.15\n    except:\n        value = field.default\n    if IS_PYDANTIC_V2:\n        from pydantic_core import PydanticUndefined\n\n        if value == PydanticUndefined:\n            return None\n        return value\n    return value\n"
  },
  {
    "path": "src/cohere/datasets/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import DatasetsCreateResponse, DatasetsGetResponse, DatasetsGetUsageResponse, DatasetsListResponse\n_dynamic_imports: typing.Dict[str, str] = {\n    \"DatasetsCreateResponse\": \".types\",\n    \"DatasetsGetResponse\": \".types\",\n    \"DatasetsGetUsageResponse\": \".types\",\n    \"DatasetsListResponse\": \".types\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"DatasetsCreateResponse\", \"DatasetsGetResponse\", \"DatasetsGetUsageResponse\", \"DatasetsListResponse\"]\n"
  },
  {
    "path": "src/cohere/datasets/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nfrom .. import core\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom ..types.dataset_type import DatasetType\nfrom ..types.dataset_validation_status import DatasetValidationStatus\nfrom .raw_client import AsyncRawDatasetsClient, RawDatasetsClient\nfrom .types.datasets_create_response import DatasetsCreateResponse\nfrom .types.datasets_get_response import DatasetsGetResponse\nfrom .types.datasets_get_usage_response import DatasetsGetUsageResponse\nfrom .types.datasets_list_response import DatasetsListResponse\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass DatasetsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawDatasetsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawDatasetsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawDatasetsClient\n        \"\"\"\n        return self._raw_client\n\n    def list(\n        self,\n        *,\n        dataset_type: typing.Optional[str] = None,\n        before: typing.Optional[dt.datetime] = None,\n        after: typing.Optional[dt.datetime] = None,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        validation_status: typing.Optional[DatasetValidationStatus] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> DatasetsListResponse:\n        \"\"\"\n        List datasets that have been created.\n\n        Parameters\n        ----------\n        dataset_type : typing.Optional[str]\n            optional filter by dataset type\n\n        before : typing.Optional[dt.datetime]\n            optional filter before a date\n\n        after : typing.Optional[dt.datetime]\n            optional filter after a date\n\n        limit : typing.Optional[float]\n            optional limit to number of results\n\n        offset : typing.Optional[float]\n            optional offset to start of results\n\n        validation_status : typing.Optional[DatasetValidationStatus]\n            optional filter by validation status\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsListResponse\n            A successful response.\n\n        Examples\n        --------\n        import datetime\n\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.datasets.list(\n            dataset_type=\"datasetType\",\n            before=datetime.datetime.fromisoformat(\n                \"2024-01-15 09:30:00+00:00\",\n            ),\n            after=datetime.datetime.fromisoformat(\n                \"2024-01-15 09:30:00+00:00\",\n            ),\n            limit=1.1,\n            offset=1.1,\n            validation_status=\"unknown\",\n        )\n        \"\"\"\n        _response = self._raw_client.list(\n            dataset_type=dataset_type,\n            before=before,\n            after=after,\n            limit=limit,\n            offset=offset,\n            validation_status=validation_status,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def create(\n        self,\n        *,\n        name: str,\n        type: DatasetType,\n        data: core.File,\n        keep_original_file: typing.Optional[bool] = None,\n        skip_malformed_input: typing.Optional[bool] = None,\n        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        text_separator: typing.Optional[str] = None,\n        csv_delimiter: typing.Optional[str] = None,\n        eval_data: typing.Optional[core.File] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> DatasetsCreateResponse:\n        \"\"\"\n        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.\n\n        Parameters\n        ----------\n        name : str\n            The name of the uploaded dataset.\n\n        type : DatasetType\n            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.\n\n        data : core.File\n            See core.File for more documentation\n\n        keep_original_file : typing.Optional[bool]\n            Indicates if the original file should be stored.\n\n        skip_malformed_input : typing.Optional[bool]\n            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.\n\n        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.\n\n        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.\n\n        text_separator : typing.Optional[str]\n            Raw .txt uploads will be split into entries using the text_separator value.\n\n        csv_delimiter : typing.Optional[str]\n            The delimiter used for .csv uploads.\n\n        eval_data : typing.Optional[core.File]\n            See core.File for more documentation\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsCreateResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.datasets.create(\n            name=\"name\",\n            type=\"embed-input\",\n            keep_original_file=True,\n            skip_malformed_input=True,\n            text_separator=\"text_separator\",\n            csv_delimiter=\"csv_delimiter\",\n        )\n        \"\"\"\n        _response = self._raw_client.create(\n            name=name,\n            type=type,\n            data=data,\n            keep_original_file=keep_original_file,\n            skip_malformed_input=skip_malformed_input,\n            keep_fields=keep_fields,\n            optional_fields=optional_fields,\n            text_separator=text_separator,\n            csv_delimiter=csv_delimiter,\n            eval_data=eval_data,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:\n        \"\"\"\n        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsGetUsageResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.datasets.get_usage()\n        \"\"\"\n        _response = self._raw_client.get_usage(request_options=request_options)\n        return _response.data\n\n    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetResponse:\n        \"\"\"\n        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsGetResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.datasets.get(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> typing.Dict[str, typing.Any]:\n        \"\"\"\n        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        typing.Dict[str, typing.Any]\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.datasets.delete(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.delete(id, request_options=request_options)\n        return _response.data\n\n\nclass AsyncDatasetsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawDatasetsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawDatasetsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawDatasetsClient\n        \"\"\"\n        return self._raw_client\n\n    async def list(\n        self,\n        *,\n        dataset_type: typing.Optional[str] = None,\n        before: typing.Optional[dt.datetime] = None,\n        after: typing.Optional[dt.datetime] = None,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        validation_status: typing.Optional[DatasetValidationStatus] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> DatasetsListResponse:\n        \"\"\"\n        List datasets that have been created.\n\n        Parameters\n        ----------\n        dataset_type : typing.Optional[str]\n            optional filter by dataset type\n\n        before : typing.Optional[dt.datetime]\n            optional filter before a date\n\n        after : typing.Optional[dt.datetime]\n            optional filter after a date\n\n        limit : typing.Optional[float]\n            optional limit to number of results\n\n        offset : typing.Optional[float]\n            optional offset to start of results\n\n        validation_status : typing.Optional[DatasetValidationStatus]\n            optional filter by validation status\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsListResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n        import datetime\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.datasets.list(\n                dataset_type=\"datasetType\",\n                before=datetime.datetime.fromisoformat(\n                    \"2024-01-15 09:30:00+00:00\",\n                ),\n                after=datetime.datetime.fromisoformat(\n                    \"2024-01-15 09:30:00+00:00\",\n                ),\n                limit=1.1,\n                offset=1.1,\n                validation_status=\"unknown\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list(\n            dataset_type=dataset_type,\n            before=before,\n            after=after,\n            limit=limit,\n            offset=offset,\n            validation_status=validation_status,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def create(\n        self,\n        *,\n        name: str,\n        type: DatasetType,\n        data: core.File,\n        keep_original_file: typing.Optional[bool] = None,\n        skip_malformed_input: typing.Optional[bool] = None,\n        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        text_separator: typing.Optional[str] = None,\n        csv_delimiter: typing.Optional[str] = None,\n        eval_data: typing.Optional[core.File] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> DatasetsCreateResponse:\n        \"\"\"\n        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.\n\n        Parameters\n        ----------\n        name : str\n            The name of the uploaded dataset.\n\n        type : DatasetType\n            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.\n\n        data : core.File\n            See core.File for more documentation\n\n        keep_original_file : typing.Optional[bool]\n            Indicates if the original file should be stored.\n\n        skip_malformed_input : typing.Optional[bool]\n            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.\n\n        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.\n\n        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.\n\n        text_separator : typing.Optional[str]\n            Raw .txt uploads will be split into entries using the text_separator value.\n\n        csv_delimiter : typing.Optional[str]\n            The delimiter used for .csv uploads.\n\n        eval_data : typing.Optional[core.File]\n            See core.File for more documentation\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsCreateResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.datasets.create(\n                name=\"name\",\n                type=\"embed-input\",\n                keep_original_file=True,\n                skip_malformed_input=True,\n                text_separator=\"text_separator\",\n                csv_delimiter=\"csv_delimiter\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create(\n            name=name,\n            type=type,\n            data=data,\n            keep_original_file=keep_original_file,\n            skip_malformed_input=skip_malformed_input,\n            keep_fields=keep_fields,\n            optional_fields=optional_fields,\n            text_separator=text_separator,\n            csv_delimiter=csv_delimiter,\n            eval_data=eval_data,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def get_usage(self, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetUsageResponse:\n        \"\"\"\n        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsGetUsageResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.datasets.get_usage()\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get_usage(request_options=request_options)\n        return _response.data\n\n    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> DatasetsGetResponse:\n        \"\"\"\n        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DatasetsGetResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.datasets.get(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    async def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> typing.Dict[str, typing.Any]:\n        \"\"\"\n        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        typing.Dict[str, typing.Any]\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.datasets.delete(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.delete(id, request_options=request_options)\n        return _response.data\n"
  },
  {
    "path": "src/cohere/datasets/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom .. import core\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.datetime_utils import serialize_datetime\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.client_closed_request_error import ClientClosedRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.gateway_timeout_error import GatewayTimeoutError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.invalid_token_error import InvalidTokenError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.not_implemented_error import NotImplementedError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.too_many_requests_error import TooManyRequestsError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom ..errors.unprocessable_entity_error import UnprocessableEntityError\nfrom ..types.dataset_type import DatasetType\nfrom ..types.dataset_validation_status import DatasetValidationStatus\nfrom .types.datasets_create_response import DatasetsCreateResponse\nfrom .types.datasets_get_response import DatasetsGetResponse\nfrom .types.datasets_get_usage_response import DatasetsGetUsageResponse\nfrom .types.datasets_list_response import DatasetsListResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawDatasetsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def list(\n        self,\n        *,\n        dataset_type: typing.Optional[str] = None,\n        before: typing.Optional[dt.datetime] = None,\n        after: typing.Optional[dt.datetime] = None,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        validation_status: typing.Optional[DatasetValidationStatus] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[DatasetsListResponse]:\n        \"\"\"\n        List datasets that have been created.\n\n        Parameters\n        ----------\n        dataset_type : typing.Optional[str]\n            optional filter by dataset type\n\n        before : typing.Optional[dt.datetime]\n            optional filter before a date\n\n        after : typing.Optional[dt.datetime]\n            optional filter after a date\n\n        limit : typing.Optional[float]\n            optional limit to number of results\n\n        offset : typing.Optional[float]\n            optional offset to start of results\n\n        validation_status : typing.Optional[DatasetValidationStatus]\n            optional filter by validation status\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DatasetsListResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/datasets\",\n            method=\"GET\",\n            params={\n                \"datasetType\": dataset_type,\n                \"before\": serialize_datetime(before) if before is not None else None,\n                \"after\": serialize_datetime(after) if after is not None else None,\n                \"limit\": limit,\n                \"offset\": offset,\n                \"validationStatus\": validation_status,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsListResponse,\n                    construct_type(\n                        type_=DatasetsListResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def create(\n        self,\n        *,\n        name: str,\n        type: DatasetType,\n        data: core.File,\n        keep_original_file: typing.Optional[bool] = None,\n        skip_malformed_input: typing.Optional[bool] = None,\n        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        text_separator: typing.Optional[str] = None,\n        csv_delimiter: typing.Optional[str] = None,\n        eval_data: typing.Optional[core.File] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[DatasetsCreateResponse]:\n        \"\"\"\n        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.\n\n        Parameters\n        ----------\n        name : str\n            The name of the uploaded dataset.\n\n        type : DatasetType\n            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.\n\n        data : core.File\n            See core.File for more documentation\n\n        keep_original_file : typing.Optional[bool]\n            Indicates if the original file should be stored.\n\n        skip_malformed_input : typing.Optional[bool]\n            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.\n\n        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.\n\n        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.\n\n        text_separator : typing.Optional[str]\n            Raw .txt uploads will be split into entries using the text_separator value.\n\n        csv_delimiter : typing.Optional[str]\n            The delimiter used for .csv uploads.\n\n        eval_data : typing.Optional[core.File]\n            See core.File for more documentation\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DatasetsCreateResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/datasets\",\n            method=\"POST\",\n            params={\n                \"name\": name,\n                \"type\": type,\n                \"keep_original_file\": keep_original_file,\n                \"skip_malformed_input\": skip_malformed_input,\n                \"keep_fields\": keep_fields,\n                \"optional_fields\": optional_fields,\n                \"text_separator\": text_separator,\n                \"csv_delimiter\": csv_delimiter,\n            },\n            data={},\n            files={\n                \"data\": data,\n                **({\"eval_data\": eval_data} if eval_data is not None else {}),\n            },\n            request_options=request_options,\n            omit=OMIT,\n            force_multipart=True,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsCreateResponse,\n                    construct_type(\n                        type_=DatasetsCreateResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def get_usage(\n        self, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[DatasetsGetUsageResponse]:\n        \"\"\"\n        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DatasetsGetUsageResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/datasets/usage\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsGetUsageResponse,\n                    construct_type(\n                        type_=DatasetsGetUsageResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def get(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[DatasetsGetResponse]:\n        \"\"\"\n        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DatasetsGetResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/datasets/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsGetResponse,\n                    construct_type(\n                        type_=DatasetsGetResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[typing.Dict[str, typing.Any]]:\n        \"\"\"\n        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[typing.Dict[str, typing.Any]]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/datasets/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    typing.Dict[str, typing.Any],\n                    construct_type(\n                        type_=typing.Dict[str, typing.Any],  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawDatasetsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def list(\n        self,\n        *,\n        dataset_type: typing.Optional[str] = None,\n        before: typing.Optional[dt.datetime] = None,\n        after: typing.Optional[dt.datetime] = None,\n        limit: typing.Optional[float] = None,\n        offset: typing.Optional[float] = None,\n        validation_status: typing.Optional[DatasetValidationStatus] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[DatasetsListResponse]:\n        \"\"\"\n        List datasets that have been created.\n\n        Parameters\n        ----------\n        dataset_type : typing.Optional[str]\n            optional filter by dataset type\n\n        before : typing.Optional[dt.datetime]\n            optional filter before a date\n\n        after : typing.Optional[dt.datetime]\n            optional filter after a date\n\n        limit : typing.Optional[float]\n            optional limit to number of results\n\n        offset : typing.Optional[float]\n            optional offset to start of results\n\n        validation_status : typing.Optional[DatasetValidationStatus]\n            optional filter by validation status\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DatasetsListResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/datasets\",\n            method=\"GET\",\n            params={\n                \"datasetType\": dataset_type,\n                \"before\": serialize_datetime(before) if before is not None else None,\n                \"after\": serialize_datetime(after) if after is not None else None,\n                \"limit\": limit,\n                \"offset\": offset,\n                \"validationStatus\": validation_status,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsListResponse,\n                    construct_type(\n                        type_=DatasetsListResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def create(\n        self,\n        *,\n        name: str,\n        type: DatasetType,\n        data: core.File,\n        keep_original_file: typing.Optional[bool] = None,\n        skip_malformed_input: typing.Optional[bool] = None,\n        keep_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        optional_fields: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None,\n        text_separator: typing.Optional[str] = None,\n        csv_delimiter: typing.Optional[str] = None,\n        eval_data: typing.Optional[core.File] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[DatasetsCreateResponse]:\n        \"\"\"\n        Create a dataset by uploading a file. See ['Dataset Creation'](https://docs.cohere.com/docs/datasets#dataset-creation) for more information.\n\n        Parameters\n        ----------\n        name : str\n            The name of the uploaded dataset.\n\n        type : DatasetType\n            The dataset type, which is used to validate the data. The only valid type is `embed-input` used in conjunction with the Embed Jobs API.\n\n        data : core.File\n            See core.File for more documentation\n\n        keep_original_file : typing.Optional[bool]\n            Indicates if the original file should be stored.\n\n        skip_malformed_input : typing.Optional[bool]\n            Indicates whether rows with malformed input should be dropped (instead of failing the validation check). Dropped rows will be returned in the warnings field.\n\n        keep_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `keep_fields` are missing from the uploaded file, Dataset validation will fail.\n\n        optional_fields : typing.Optional[typing.Union[str, typing.Sequence[str]]]\n            List of names of fields that will be persisted in the Dataset. By default the Dataset will retain only the required fields indicated in the [schema for the corresponding Dataset type](https://docs.cohere.com/docs/datasets#dataset-types). For example, Datasets of type `embed-input` will drop all fields other than the required `text` field. If any of the fields in `optional_fields` are missing from the uploaded file, Dataset validation will pass.\n\n        text_separator : typing.Optional[str]\n            Raw .txt uploads will be split into entries using the text_separator value.\n\n        csv_delimiter : typing.Optional[str]\n            The delimiter used for .csv uploads.\n\n        eval_data : typing.Optional[core.File]\n            See core.File for more documentation\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DatasetsCreateResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/datasets\",\n            method=\"POST\",\n            params={\n                \"name\": name,\n                \"type\": type,\n                \"keep_original_file\": keep_original_file,\n                \"skip_malformed_input\": skip_malformed_input,\n                \"keep_fields\": keep_fields,\n                \"optional_fields\": optional_fields,\n                \"text_separator\": text_separator,\n                \"csv_delimiter\": csv_delimiter,\n            },\n            data={},\n            files={\n                \"data\": data,\n                **({\"eval_data\": eval_data} if eval_data is not None else {}),\n            },\n            request_options=request_options,\n            omit=OMIT,\n            force_multipart=True,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsCreateResponse,\n                    construct_type(\n                        type_=DatasetsCreateResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def get_usage(\n        self, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[DatasetsGetUsageResponse]:\n        \"\"\"\n        View the dataset storage usage for your Organization. Each Organization can have up to 10GB of storage across all their users.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DatasetsGetUsageResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/datasets/usage\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsGetUsageResponse,\n                    construct_type(\n                        type_=DatasetsGetUsageResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def get(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[DatasetsGetResponse]:\n        \"\"\"\n        Retrieve a dataset by ID. See ['Datasets'](https://docs.cohere.com/docs/datasets) for more information.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DatasetsGetResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/datasets/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DatasetsGetResponse,\n                    construct_type(\n                        type_=DatasetsGetResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def delete(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[typing.Dict[str, typing.Any]]:\n        \"\"\"\n        Delete a dataset by ID. Datasets are automatically deleted after 30 days, but they can also be deleted manually.\n\n        Parameters\n        ----------\n        id : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[typing.Dict[str, typing.Any]]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/datasets/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    typing.Dict[str, typing.Any],\n                    construct_type(\n                        type_=typing.Dict[str, typing.Any],  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/datasets/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .datasets_create_response import DatasetsCreateResponse\n    from .datasets_get_response import DatasetsGetResponse\n    from .datasets_get_usage_response import DatasetsGetUsageResponse\n    from .datasets_list_response import DatasetsListResponse\n_dynamic_imports: typing.Dict[str, str] = {\n    \"DatasetsCreateResponse\": \".datasets_create_response\",\n    \"DatasetsGetResponse\": \".datasets_get_response\",\n    \"DatasetsGetUsageResponse\": \".datasets_get_usage_response\",\n    \"DatasetsListResponse\": \".datasets_list_response\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"DatasetsCreateResponse\", \"DatasetsGetResponse\", \"DatasetsGetUsageResponse\", \"DatasetsListResponse\"]\n"
  },
  {
    "path": "src/cohere/datasets/types/datasets_create_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\n\n\nclass DatasetsCreateResponse(UncheckedBaseModel):\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The dataset ID\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/datasets/types/datasets_get_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom ...types.dataset import Dataset\n\n\nclass DatasetsGetResponse(UncheckedBaseModel):\n    dataset: Dataset\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/datasets/types/datasets_get_usage_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\n\n\nclass DatasetsGetUsageResponse(UncheckedBaseModel):\n    organization_usage: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The total number of bytes used by the organization.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/datasets/types/datasets_list_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom ...types.dataset import Dataset\n\n\nclass DatasetsListResponse(UncheckedBaseModel):\n    datasets: typing.Optional[typing.List[Dataset]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/embed_jobs/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import CreateEmbedJobRequestTruncate\n_dynamic_imports: typing.Dict[str, str] = {\"CreateEmbedJobRequestTruncate\": \".types\"}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"CreateEmbedJobRequestTruncate\"]\n"
  },
  {
    "path": "src/cohere/embed_jobs/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom ..types.create_embed_job_response import CreateEmbedJobResponse\nfrom ..types.embed_input_type import EmbedInputType\nfrom ..types.embed_job import EmbedJob\nfrom ..types.embedding_type import EmbeddingType\nfrom ..types.list_embed_job_response import ListEmbedJobResponse\nfrom .raw_client import AsyncRawEmbedJobsClient, RawEmbedJobsClient\nfrom .types.create_embed_job_request_truncate import CreateEmbedJobRequestTruncate\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass EmbedJobsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawEmbedJobsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawEmbedJobsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawEmbedJobsClient\n        \"\"\"\n        return self._raw_client\n\n    def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> ListEmbedJobResponse:\n        \"\"\"\n        The list embed job endpoint allows users to view all embed jobs history for that specific user.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListEmbedJobResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.embed_jobs.list()\n        \"\"\"\n        _response = self._raw_client.list(request_options=request_options)\n        return _response.data\n\n    def create(\n        self,\n        *,\n        model: str,\n        dataset_id: str,\n        input_type: EmbedInputType,\n        name: typing.Optional[str] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> CreateEmbedJobResponse:\n        \"\"\"\n        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.\n\n        Parameters\n        ----------\n        model : str\n            ID of the embedding model.\n\n            Available models and corresponding embedding dimensions:\n\n            - `embed-english-v3.0` : 1024\n            - `embed-multilingual-v3.0` : 1024\n            - `embed-english-light-v3.0` : 384\n            - `embed-multilingual-light-v3.0` : 384\n\n        dataset_id : str\n            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`\n\n        input_type : EmbedInputType\n\n        name : typing.Optional[str]\n            The name of the embed job.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Valid for all models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Valid for v3 and newer model versions.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Valid for v3 and newer model versions.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Valid for v3 and newer model versions.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Valid for v3 and newer model versions.\n\n        truncate : typing.Optional[CreateEmbedJobRequestTruncate]\n            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateEmbedJobResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.embed_jobs.create(\n            model=\"model\",\n            dataset_id=\"dataset_id\",\n            input_type=\"search_document\",\n        )\n        \"\"\"\n        _response = self._raw_client.create(\n            model=model,\n            dataset_id=dataset_id,\n            input_type=input_type,\n            name=name,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EmbedJob:\n        \"\"\"\n        This API retrieves the details about an embed job started by the same user.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedJob\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.embed_jobs.get(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:\n        \"\"\"\n        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to cancel.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        None\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.embed_jobs.cancel(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.cancel(id, request_options=request_options)\n        return _response.data\n\n\nclass AsyncEmbedJobsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawEmbedJobsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawEmbedJobsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawEmbedJobsClient\n        \"\"\"\n        return self._raw_client\n\n    async def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> ListEmbedJobResponse:\n        \"\"\"\n        The list embed job endpoint allows users to view all embed jobs history for that specific user.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListEmbedJobResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.embed_jobs.list()\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list(request_options=request_options)\n        return _response.data\n\n    async def create(\n        self,\n        *,\n        model: str,\n        dataset_id: str,\n        input_type: EmbedInputType,\n        name: typing.Optional[str] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> CreateEmbedJobResponse:\n        \"\"\"\n        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.\n\n        Parameters\n        ----------\n        model : str\n            ID of the embedding model.\n\n            Available models and corresponding embedding dimensions:\n\n            - `embed-english-v3.0` : 1024\n            - `embed-multilingual-v3.0` : 1024\n            - `embed-english-light-v3.0` : 384\n            - `embed-multilingual-light-v3.0` : 384\n\n        dataset_id : str\n            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`\n\n        input_type : EmbedInputType\n\n        name : typing.Optional[str]\n            The name of the embed job.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Valid for all models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Valid for v3 and newer model versions.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Valid for v3 and newer model versions.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Valid for v3 and newer model versions.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Valid for v3 and newer model versions.\n\n        truncate : typing.Optional[CreateEmbedJobRequestTruncate]\n            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateEmbedJobResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.embed_jobs.create(\n                model=\"model\",\n                dataset_id=\"dataset_id\",\n                input_type=\"search_document\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create(\n            model=model,\n            dataset_id=dataset_id,\n            input_type=input_type,\n            name=name,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> EmbedJob:\n        \"\"\"\n        This API retrieves the details about an embed job started by the same user.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedJob\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.embed_jobs.get(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get(id, request_options=request_options)\n        return _response.data\n\n    async def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> None:\n        \"\"\"\n        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to cancel.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        None\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.embed_jobs.cancel(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.cancel(id, request_options=request_options)\n        return _response.data\n"
  },
  {
    "path": "src/cohere/embed_jobs/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.client_closed_request_error import ClientClosedRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.gateway_timeout_error import GatewayTimeoutError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.invalid_token_error import InvalidTokenError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.not_implemented_error import NotImplementedError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.too_many_requests_error import TooManyRequestsError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom ..errors.unprocessable_entity_error import UnprocessableEntityError\nfrom ..types.create_embed_job_response import CreateEmbedJobResponse\nfrom ..types.embed_input_type import EmbedInputType\nfrom ..types.embed_job import EmbedJob\nfrom ..types.embedding_type import EmbeddingType\nfrom ..types.list_embed_job_response import ListEmbedJobResponse\nfrom .types.create_embed_job_request_truncate import CreateEmbedJobRequestTruncate\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawEmbedJobsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def list(self, *, request_options: typing.Optional[RequestOptions] = None) -> HttpResponse[ListEmbedJobResponse]:\n        \"\"\"\n        The list embed job endpoint allows users to view all embed jobs history for that specific user.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListEmbedJobResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/embed-jobs\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListEmbedJobResponse,\n                    construct_type(\n                        type_=ListEmbedJobResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def create(\n        self,\n        *,\n        model: str,\n        dataset_id: str,\n        input_type: EmbedInputType,\n        name: typing.Optional[str] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[CreateEmbedJobResponse]:\n        \"\"\"\n        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.\n\n        Parameters\n        ----------\n        model : str\n            ID of the embedding model.\n\n            Available models and corresponding embedding dimensions:\n\n            - `embed-english-v3.0` : 1024\n            - `embed-multilingual-v3.0` : 1024\n            - `embed-english-light-v3.0` : 384\n            - `embed-multilingual-light-v3.0` : 384\n\n        dataset_id : str\n            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`\n\n        input_type : EmbedInputType\n\n        name : typing.Optional[str]\n            The name of the embed job.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Valid for all models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Valid for v3 and newer model versions.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Valid for v3 and newer model versions.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Valid for v3 and newer model versions.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Valid for v3 and newer model versions.\n\n        truncate : typing.Optional[CreateEmbedJobRequestTruncate]\n            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CreateEmbedJobResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/embed-jobs\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"dataset_id\": dataset_id,\n                \"input_type\": input_type,\n                \"name\": name,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateEmbedJobResponse,\n                    construct_type(\n                        type_=CreateEmbedJobResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def get(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> HttpResponse[EmbedJob]:\n        \"\"\"\n        This API retrieves the details about an embed job started by the same user.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[EmbedJob]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/embed-jobs/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedJob,\n                    construct_type(\n                        type_=EmbedJob,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def cancel(self, id: str, *, request_options: typing.Optional[RequestOptions] = None) -> HttpResponse[None]:\n        \"\"\"\n        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to cancel.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[None]\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/embed-jobs/{jsonable_encoder(id)}/cancel\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                return HttpResponse(response=_response, data=None)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawEmbedJobsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def list(\n        self, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[ListEmbedJobResponse]:\n        \"\"\"\n        The list embed job endpoint allows users to view all embed jobs history for that specific user.\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListEmbedJobResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/embed-jobs\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListEmbedJobResponse,\n                    construct_type(\n                        type_=ListEmbedJobResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def create(\n        self,\n        *,\n        model: str,\n        dataset_id: str,\n        input_type: EmbedInputType,\n        name: typing.Optional[str] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[CreateEmbedJobRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[CreateEmbedJobResponse]:\n        \"\"\"\n        This API launches an async Embed job for a [Dataset](https://docs.cohere.com/docs/datasets) of type `embed-input`. The result of a completed embed job is new Dataset of type `embed-output`, which contains the original text entries and the corresponding embeddings.\n\n        Parameters\n        ----------\n        model : str\n            ID of the embedding model.\n\n            Available models and corresponding embedding dimensions:\n\n            - `embed-english-v3.0` : 1024\n            - `embed-multilingual-v3.0` : 1024\n            - `embed-english-light-v3.0` : 384\n            - `embed-multilingual-light-v3.0` : 384\n\n        dataset_id : str\n            ID of a [Dataset](https://docs.cohere.com/docs/datasets). The Dataset must be of type `embed-input` and must have a validation status `Validated`\n\n        input_type : EmbedInputType\n\n        name : typing.Optional[str]\n            The name of the embed job.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Valid for all models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Valid for v3 and newer model versions.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Valid for v3 and newer model versions.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Valid for v3 and newer model versions.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Valid for v3 and newer model versions.\n\n        truncate : typing.Optional[CreateEmbedJobRequestTruncate]\n            One of `START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CreateEmbedJobResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/embed-jobs\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"dataset_id\": dataset_id,\n                \"input_type\": input_type,\n                \"name\": name,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateEmbedJobResponse,\n                    construct_type(\n                        type_=CreateEmbedJobResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def get(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[EmbedJob]:\n        \"\"\"\n        This API retrieves the details about an embed job started by the same user.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to retrieve.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[EmbedJob]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/embed-jobs/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedJob,\n                    construct_type(\n                        type_=EmbedJob,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def cancel(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[None]:\n        \"\"\"\n        This API allows users to cancel an active embed job. Once invoked, the embedding process will be terminated, and users will be charged for the embeddings processed up to the cancellation point. It's important to note that partial results will not be available to users after cancellation.\n\n        Parameters\n        ----------\n        id : str\n            The ID of the embed job to cancel.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[None]\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/embed-jobs/{jsonable_encoder(id)}/cancel\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                return AsyncHttpResponse(response=_response, data=None)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/embed_jobs/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .create_embed_job_request_truncate import CreateEmbedJobRequestTruncate\n_dynamic_imports: typing.Dict[str, str] = {\"CreateEmbedJobRequestTruncate\": \".create_embed_job_request_truncate\"}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\"CreateEmbedJobRequestTruncate\"]\n"
  },
  {
    "path": "src/cohere/embed_jobs/types/create_embed_job_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nCreateEmbedJobRequestTruncate = typing.Union[typing.Literal[\"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/environment.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport enum\n\n\nclass ClientEnvironment(enum.Enum):\n    PRODUCTION = \"https://api.cohere.com\"\n"
  },
  {
    "path": "src/cohere/errors/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .bad_request_error import BadRequestError\n    from .client_closed_request_error import ClientClosedRequestError\n    from .forbidden_error import ForbiddenError\n    from .gateway_timeout_error import GatewayTimeoutError\n    from .internal_server_error import InternalServerError\n    from .invalid_token_error import InvalidTokenError\n    from .not_found_error import NotFoundError\n    from .not_implemented_error import NotImplementedError\n    from .service_unavailable_error import ServiceUnavailableError\n    from .too_many_requests_error import TooManyRequestsError\n    from .unauthorized_error import UnauthorizedError\n    from .unprocessable_entity_error import UnprocessableEntityError\n_dynamic_imports: typing.Dict[str, str] = {\n    \"BadRequestError\": \".bad_request_error\",\n    \"ClientClosedRequestError\": \".client_closed_request_error\",\n    \"ForbiddenError\": \".forbidden_error\",\n    \"GatewayTimeoutError\": \".gateway_timeout_error\",\n    \"InternalServerError\": \".internal_server_error\",\n    \"InvalidTokenError\": \".invalid_token_error\",\n    \"NotFoundError\": \".not_found_error\",\n    \"NotImplementedError\": \".not_implemented_error\",\n    \"ServiceUnavailableError\": \".service_unavailable_error\",\n    \"TooManyRequestsError\": \".too_many_requests_error\",\n    \"UnauthorizedError\": \".unauthorized_error\",\n    \"UnprocessableEntityError\": \".unprocessable_entity_error\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"BadRequestError\",\n    \"ClientClosedRequestError\",\n    \"ForbiddenError\",\n    \"GatewayTimeoutError\",\n    \"InternalServerError\",\n    \"InvalidTokenError\",\n    \"NotFoundError\",\n    \"NotImplementedError\",\n    \"ServiceUnavailableError\",\n    \"TooManyRequestsError\",\n    \"UnauthorizedError\",\n    \"UnprocessableEntityError\",\n]\n"
  },
  {
    "path": "src/cohere/errors/bad_request_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass BadRequestError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=400, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/client_closed_request_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass ClientClosedRequestError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=499, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/forbidden_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass ForbiddenError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=403, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/gateway_timeout_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass GatewayTimeoutError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=504, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/internal_server_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass InternalServerError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=500, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/invalid_token_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass InvalidTokenError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=498, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/not_found_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass NotFoundError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=404, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/not_implemented_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass NotImplementedError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=501, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/service_unavailable_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass ServiceUnavailableError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=503, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/too_many_requests_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass TooManyRequestsError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=429, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/unauthorized_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass UnauthorizedError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=401, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/errors/unprocessable_entity_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.api_error import ApiError\n\n\nclass UnprocessableEntityError(ApiError):\n    def __init__(self, body: typing.Any, headers: typing.Optional[typing.Dict[str, str]] = None):\n        super().__init__(status_code=422, headers=headers, body=body)\n"
  },
  {
    "path": "src/cohere/finetuning/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from . import finetuning\n    from .finetuning import (\n        BaseModel,\n        BaseType,\n        CreateFinetunedModelResponse,\n        DeleteFinetunedModelResponse,\n        Event,\n        FinetunedModel,\n        GetFinetunedModelResponse,\n        Hyperparameters,\n        ListEventsResponse,\n        ListFinetunedModelsResponse,\n        ListTrainingStepMetricsResponse,\n        LoraTargetModules,\n        Settings,\n        Status,\n        Strategy,\n        TrainingStepMetrics,\n        UpdateFinetunedModelResponse,\n        WandbConfig,\n    )\n_dynamic_imports: typing.Dict[str, str] = {\n    \"BaseModel\": \".finetuning\",\n    \"BaseType\": \".finetuning\",\n    \"CreateFinetunedModelResponse\": \".finetuning\",\n    \"DeleteFinetunedModelResponse\": \".finetuning\",\n    \"Event\": \".finetuning\",\n    \"FinetunedModel\": \".finetuning\",\n    \"GetFinetunedModelResponse\": \".finetuning\",\n    \"Hyperparameters\": \".finetuning\",\n    \"ListEventsResponse\": \".finetuning\",\n    \"ListFinetunedModelsResponse\": \".finetuning\",\n    \"ListTrainingStepMetricsResponse\": \".finetuning\",\n    \"LoraTargetModules\": \".finetuning\",\n    \"Settings\": \".finetuning\",\n    \"Status\": \".finetuning\",\n    \"Strategy\": \".finetuning\",\n    \"TrainingStepMetrics\": \".finetuning\",\n    \"UpdateFinetunedModelResponse\": \".finetuning\",\n    \"WandbConfig\": \".finetuning\",\n    \"finetuning\": \".finetuning\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"BaseModel\",\n    \"BaseType\",\n    \"CreateFinetunedModelResponse\",\n    \"DeleteFinetunedModelResponse\",\n    \"Event\",\n    \"FinetunedModel\",\n    \"GetFinetunedModelResponse\",\n    \"Hyperparameters\",\n    \"ListEventsResponse\",\n    \"ListFinetunedModelsResponse\",\n    \"ListTrainingStepMetricsResponse\",\n    \"LoraTargetModules\",\n    \"Settings\",\n    \"Status\",\n    \"Strategy\",\n    \"TrainingStepMetrics\",\n    \"UpdateFinetunedModelResponse\",\n    \"WandbConfig\",\n    \"finetuning\",\n]\n"
  },
  {
    "path": "src/cohere/finetuning/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom .finetuning.types.create_finetuned_model_response import CreateFinetunedModelResponse\nfrom .finetuning.types.delete_finetuned_model_response import DeleteFinetunedModelResponse\nfrom .finetuning.types.finetuned_model import FinetunedModel\nfrom .finetuning.types.get_finetuned_model_response import GetFinetunedModelResponse\nfrom .finetuning.types.list_events_response import ListEventsResponse\nfrom .finetuning.types.list_finetuned_models_response import ListFinetunedModelsResponse\nfrom .finetuning.types.list_training_step_metrics_response import ListTrainingStepMetricsResponse\nfrom .finetuning.types.settings import Settings\nfrom .finetuning.types.update_finetuned_model_response import UpdateFinetunedModelResponse\nfrom .raw_client import AsyncRawFinetuningClient, RawFinetuningClient\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass FinetuningClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawFinetuningClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawFinetuningClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawFinetuningClient\n        \"\"\"\n        return self._raw_client\n\n    def list_finetuned_models(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListFinetunedModelsResponse:\n        \"\"\"\n        Returns a list of fine-tuned models that the user has access to.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListFinetunedModelsResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.list_finetuned_models(\n            page_size=1,\n            page_token=\"page_token\",\n            order_by=\"order_by\",\n        )\n        \"\"\"\n        _response = self._raw_client.list_finetuned_models(\n            page_size=page_size, page_token=page_token, order_by=order_by, request_options=request_options\n        )\n        return _response.data\n\n    def create_finetuned_model(\n        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None\n    ) -> CreateFinetunedModelResponse:\n        \"\"\"\n        Creates a new fine-tuned model. The model will be trained on the dataset specified in the request body. The training process may take some time, and the model will be available once the training is complete.\n\n        Parameters\n        ----------\n        request : FinetunedModel\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n        from cohere.finetuning.finetuning import BaseModel, FinetunedModel, Settings\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.create_finetuned_model(\n            request=FinetunedModel(\n                name=\"name\",\n                settings=Settings(\n                    base_model=BaseModel(\n                        base_type=\"BASE_TYPE_UNSPECIFIED\",\n                    ),\n                    dataset_id=\"dataset_id\",\n                ),\n            ),\n        )\n        \"\"\"\n        _response = self._raw_client.create_finetuned_model(request=request, request_options=request_options)\n        return _response.data\n\n    def get_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> GetFinetunedModelResponse:\n        \"\"\"\n        Retrieve a fine-tuned model by its ID.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.get_finetuned_model(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.get_finetuned_model(id, request_options=request_options)\n        return _response.data\n\n    def delete_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> DeleteFinetunedModelResponse:\n        \"\"\"\n        Deletes a fine-tuned model. The model will be removed from the system and will no longer be available for use.\n        This operation is irreversible.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DeleteFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.delete_finetuned_model(\n            id=\"id\",\n        )\n        \"\"\"\n        _response = self._raw_client.delete_finetuned_model(id, request_options=request_options)\n        return _response.data\n\n    def update_finetuned_model(\n        self, id: str, *, name: str, settings: Settings, request_options: typing.Optional[RequestOptions] = None\n    ) -> UpdateFinetunedModelResponse:\n        \"\"\"\n        Updates the fine-tuned model with the given ID. The model will be updated with the new settings and name provided in the request body.\n\n        Parameters\n        ----------\n        id : str\n            FinetunedModel ID.\n\n        name : str\n            FinetunedModel name (e.g. `foobar`).\n\n        settings : Settings\n            FinetunedModel settings such as dataset, hyperparameters...\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        UpdateFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n        from cohere.finetuning.finetuning import BaseModel, Settings\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.update_finetuned_model(\n            id=\"id\",\n            name=\"name\",\n            settings=Settings(\n                base_model=BaseModel(\n                    base_type=\"BASE_TYPE_UNSPECIFIED\",\n                ),\n                dataset_id=\"dataset_id\",\n            ),\n        )\n        \"\"\"\n        _response = self._raw_client.update_finetuned_model(\n            id, name=name, settings=settings, request_options=request_options\n        )\n        return _response.data\n\n    def list_events(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListEventsResponse:\n        \"\"\"\n        Returns a list of events that occurred during the life-cycle of the fine-tuned model.\n        The events are ordered by creation time, with the most recent event first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListEventsResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.list_events(\n            finetuned_model_id=\"finetuned_model_id\",\n            page_size=1,\n            page_token=\"page_token\",\n            order_by=\"order_by\",\n        )\n        \"\"\"\n        _response = self._raw_client.list_events(\n            finetuned_model_id,\n            page_size=page_size,\n            page_token=page_token,\n            order_by=order_by,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def list_training_step_metrics(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListTrainingStepMetricsResponse:\n        \"\"\"\n        Returns a list of metrics measured during the training of a fine-tuned model.\n        The metrics are ordered by step number, with the most recent step first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListTrainingStepMetricsResponse\n            A successful response.\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.finetuning.list_training_step_metrics(\n            finetuned_model_id=\"finetuned_model_id\",\n            page_size=1,\n            page_token=\"page_token\",\n        )\n        \"\"\"\n        _response = self._raw_client.list_training_step_metrics(\n            finetuned_model_id, page_size=page_size, page_token=page_token, request_options=request_options\n        )\n        return _response.data\n\n\nclass AsyncFinetuningClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawFinetuningClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawFinetuningClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawFinetuningClient\n        \"\"\"\n        return self._raw_client\n\n    async def list_finetuned_models(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListFinetunedModelsResponse:\n        \"\"\"\n        Returns a list of fine-tuned models that the user has access to.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListFinetunedModelsResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.list_finetuned_models(\n                page_size=1,\n                page_token=\"page_token\",\n                order_by=\"order_by\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list_finetuned_models(\n            page_size=page_size, page_token=page_token, order_by=order_by, request_options=request_options\n        )\n        return _response.data\n\n    async def create_finetuned_model(\n        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None\n    ) -> CreateFinetunedModelResponse:\n        \"\"\"\n        Creates a new fine-tuned model. The model will be trained on the dataset specified in the request body. The training process may take some time, and the model will be available once the training is complete.\n\n        Parameters\n        ----------\n        request : FinetunedModel\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        CreateFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n        from cohere.finetuning.finetuning import BaseModel, FinetunedModel, Settings\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.create_finetuned_model(\n                request=FinetunedModel(\n                    name=\"name\",\n                    settings=Settings(\n                        base_model=BaseModel(\n                            base_type=\"BASE_TYPE_UNSPECIFIED\",\n                        ),\n                        dataset_id=\"dataset_id\",\n                    ),\n                ),\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.create_finetuned_model(request=request, request_options=request_options)\n        return _response.data\n\n    async def get_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> GetFinetunedModelResponse:\n        \"\"\"\n        Retrieve a fine-tuned model by its ID.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.get_finetuned_model(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get_finetuned_model(id, request_options=request_options)\n        return _response.data\n\n    async def delete_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> DeleteFinetunedModelResponse:\n        \"\"\"\n        Deletes a fine-tuned model. The model will be removed from the system and will no longer be available for use.\n        This operation is irreversible.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        DeleteFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.delete_finetuned_model(\n                id=\"id\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.delete_finetuned_model(id, request_options=request_options)\n        return _response.data\n\n    async def update_finetuned_model(\n        self, id: str, *, name: str, settings: Settings, request_options: typing.Optional[RequestOptions] = None\n    ) -> UpdateFinetunedModelResponse:\n        \"\"\"\n        Updates the fine-tuned model with the given ID. The model will be updated with the new settings and name provided in the request body.\n\n        Parameters\n        ----------\n        id : str\n            FinetunedModel ID.\n\n        name : str\n            FinetunedModel name (e.g. `foobar`).\n\n        settings : Settings\n            FinetunedModel settings such as dataset, hyperparameters...\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        UpdateFinetunedModelResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n        from cohere.finetuning.finetuning import BaseModel, Settings\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.update_finetuned_model(\n                id=\"id\",\n                name=\"name\",\n                settings=Settings(\n                    base_model=BaseModel(\n                        base_type=\"BASE_TYPE_UNSPECIFIED\",\n                    ),\n                    dataset_id=\"dataset_id\",\n                ),\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.update_finetuned_model(\n            id, name=name, settings=settings, request_options=request_options\n        )\n        return _response.data\n\n    async def list_events(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListEventsResponse:\n        \"\"\"\n        Returns a list of events that occurred during the life-cycle of the fine-tuned model.\n        The events are ordered by creation time, with the most recent event first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListEventsResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.list_events(\n                finetuned_model_id=\"finetuned_model_id\",\n                page_size=1,\n                page_token=\"page_token\",\n                order_by=\"order_by\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list_events(\n            finetuned_model_id,\n            page_size=page_size,\n            page_token=page_token,\n            order_by=order_by,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def list_training_step_metrics(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListTrainingStepMetricsResponse:\n        \"\"\"\n        Returns a list of metrics measured during the training of a fine-tuned model.\n        The metrics are ordered by step number, with the most recent step first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListTrainingStepMetricsResponse\n            A successful response.\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.finetuning.list_training_step_metrics(\n                finetuned_model_id=\"finetuned_model_id\",\n                page_size=1,\n                page_token=\"page_token\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list_training_step_metrics(\n            finetuned_model_id, page_size=page_size, page_token=page_token, request_options=request_options\n        )\n        return _response.data\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import (\n        BaseModel,\n        BaseType,\n        CreateFinetunedModelResponse,\n        DeleteFinetunedModelResponse,\n        Event,\n        FinetunedModel,\n        GetFinetunedModelResponse,\n        Hyperparameters,\n        ListEventsResponse,\n        ListFinetunedModelsResponse,\n        ListTrainingStepMetricsResponse,\n        LoraTargetModules,\n        Settings,\n        Status,\n        Strategy,\n        TrainingStepMetrics,\n        UpdateFinetunedModelResponse,\n        WandbConfig,\n    )\n_dynamic_imports: typing.Dict[str, str] = {\n    \"BaseModel\": \".types\",\n    \"BaseType\": \".types\",\n    \"CreateFinetunedModelResponse\": \".types\",\n    \"DeleteFinetunedModelResponse\": \".types\",\n    \"Event\": \".types\",\n    \"FinetunedModel\": \".types\",\n    \"GetFinetunedModelResponse\": \".types\",\n    \"Hyperparameters\": \".types\",\n    \"ListEventsResponse\": \".types\",\n    \"ListFinetunedModelsResponse\": \".types\",\n    \"ListTrainingStepMetricsResponse\": \".types\",\n    \"LoraTargetModules\": \".types\",\n    \"Settings\": \".types\",\n    \"Status\": \".types\",\n    \"Strategy\": \".types\",\n    \"TrainingStepMetrics\": \".types\",\n    \"UpdateFinetunedModelResponse\": \".types\",\n    \"WandbConfig\": \".types\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"BaseModel\",\n    \"BaseType\",\n    \"CreateFinetunedModelResponse\",\n    \"DeleteFinetunedModelResponse\",\n    \"Event\",\n    \"FinetunedModel\",\n    \"GetFinetunedModelResponse\",\n    \"Hyperparameters\",\n    \"ListEventsResponse\",\n    \"ListFinetunedModelsResponse\",\n    \"ListTrainingStepMetricsResponse\",\n    \"LoraTargetModules\",\n    \"Settings\",\n    \"Status\",\n    \"Strategy\",\n    \"TrainingStepMetrics\",\n    \"UpdateFinetunedModelResponse\",\n    \"WandbConfig\",\n]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .base_model import BaseModel\n    from .base_type import BaseType\n    from .create_finetuned_model_response import CreateFinetunedModelResponse\n    from .delete_finetuned_model_response import DeleteFinetunedModelResponse\n    from .event import Event\n    from .finetuned_model import FinetunedModel\n    from .get_finetuned_model_response import GetFinetunedModelResponse\n    from .hyperparameters import Hyperparameters\n    from .list_events_response import ListEventsResponse\n    from .list_finetuned_models_response import ListFinetunedModelsResponse\n    from .list_training_step_metrics_response import ListTrainingStepMetricsResponse\n    from .lora_target_modules import LoraTargetModules\n    from .settings import Settings\n    from .status import Status\n    from .strategy import Strategy\n    from .training_step_metrics import TrainingStepMetrics\n    from .update_finetuned_model_response import UpdateFinetunedModelResponse\n    from .wandb_config import WandbConfig\n_dynamic_imports: typing.Dict[str, str] = {\n    \"BaseModel\": \".base_model\",\n    \"BaseType\": \".base_type\",\n    \"CreateFinetunedModelResponse\": \".create_finetuned_model_response\",\n    \"DeleteFinetunedModelResponse\": \".delete_finetuned_model_response\",\n    \"Event\": \".event\",\n    \"FinetunedModel\": \".finetuned_model\",\n    \"GetFinetunedModelResponse\": \".get_finetuned_model_response\",\n    \"Hyperparameters\": \".hyperparameters\",\n    \"ListEventsResponse\": \".list_events_response\",\n    \"ListFinetunedModelsResponse\": \".list_finetuned_models_response\",\n    \"ListTrainingStepMetricsResponse\": \".list_training_step_metrics_response\",\n    \"LoraTargetModules\": \".lora_target_modules\",\n    \"Settings\": \".settings\",\n    \"Status\": \".status\",\n    \"Strategy\": \".strategy\",\n    \"TrainingStepMetrics\": \".training_step_metrics\",\n    \"UpdateFinetunedModelResponse\": \".update_finetuned_model_response\",\n    \"WandbConfig\": \".wandb_config\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"BaseModel\",\n    \"BaseType\",\n    \"CreateFinetunedModelResponse\",\n    \"DeleteFinetunedModelResponse\",\n    \"Event\",\n    \"FinetunedModel\",\n    \"GetFinetunedModelResponse\",\n    \"Hyperparameters\",\n    \"ListEventsResponse\",\n    \"ListFinetunedModelsResponse\",\n    \"ListTrainingStepMetricsResponse\",\n    \"LoraTargetModules\",\n    \"Settings\",\n    \"Status\",\n    \"Strategy\",\n    \"TrainingStepMetrics\",\n    \"UpdateFinetunedModelResponse\",\n    \"WandbConfig\",\n]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/base_model.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .base_type import BaseType\nfrom .strategy import Strategy\n\n\nclass BaseModel(UncheckedBaseModel):\n    \"\"\"\n    The base model used for fine-tuning.\n    \"\"\"\n\n    name: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The name of the base model.\n    \"\"\"\n\n    version: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. The version of the base model.\n    \"\"\"\n\n    base_type: BaseType = pydantic.Field()\n    \"\"\"\n    The type of the base model.\n    \"\"\"\n\n    strategy: typing.Optional[Strategy] = pydantic.Field(default=None)\n    \"\"\"\n    Deprecated: The fine-tuning strategy.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/base_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nBaseType = typing.Union[\n    typing.Literal[\n        \"BASE_TYPE_UNSPECIFIED\",\n        \"BASE_TYPE_GENERATIVE\",\n        \"BASE_TYPE_CLASSIFICATION\",\n        \"BASE_TYPE_RERANK\",\n        \"BASE_TYPE_CHAT\",\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/create_finetuned_model_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .finetuned_model import FinetunedModel\n\n\nclass CreateFinetunedModelResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to request to create a fine-tuned model.\n    \"\"\"\n\n    finetuned_model: typing.Optional[FinetunedModel] = pydantic.Field(default=None)\n    \"\"\"\n    Information about the fine-tuned model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/delete_finetuned_model_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nDeleteFinetunedModelResponse = typing.Dict[str, typing.Any]\n\"\"\"\nResponse to request to delete a fine-tuned model.\n\"\"\"\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .status import Status\n\n\nclass Event(UncheckedBaseModel):\n    \"\"\"\n    A change in status of a fine-tuned model.\n    \"\"\"\n\n    user_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    ID of the user who initiated the event. Empty if initiated by the system.\n    \"\"\"\n\n    status: typing.Optional[Status] = pydantic.Field(default=None)\n    \"\"\"\n    Status of the fine-tuned model.\n    \"\"\"\n\n    created_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    Timestamp when the event happened.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/finetuned_model.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .settings import Settings\nfrom .status import Status\n\n\nclass FinetunedModel(UncheckedBaseModel):\n    \"\"\"\n    This resource represents a fine-tuned model.\n    \"\"\"\n\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. FinetunedModel ID.\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    FinetunedModel name (e.g. `foobar`).\n    \"\"\"\n\n    creator_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. User ID of the creator.\n    \"\"\"\n\n    organization_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Organization ID.\n    \"\"\"\n\n    settings: Settings = pydantic.Field()\n    \"\"\"\n    FinetunedModel settings such as dataset, hyperparameters...\n    \"\"\"\n\n    status: typing.Optional[Status] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Current stage in the life-cycle of the fine-tuned model.\n    \"\"\"\n\n    created_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Creation timestamp.\n    \"\"\"\n\n    updated_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Latest update timestamp.\n    \"\"\"\n\n    completed_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Timestamp for the completed fine-tuning.\n    \"\"\"\n\n    last_used: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Deprecated: Timestamp for the latest request to this fine-tuned model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/get_finetuned_model_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .finetuned_model import FinetunedModel\n\n\nclass GetFinetunedModelResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to get a fine-tuned model.\n    \"\"\"\n\n    finetuned_model: typing.Optional[FinetunedModel] = pydantic.Field(default=None)\n    \"\"\"\n    Information about the fine-tuned model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/hyperparameters.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .lora_target_modules import LoraTargetModules\n\n\nclass Hyperparameters(UncheckedBaseModel):\n    \"\"\"\n    The fine-tuning hyperparameters.\n    \"\"\"\n\n    early_stopping_patience: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Stops training if the loss metric does not improve beyond the value of\n    `early_stopping_threshold` after this many times of evaluation.\n    \"\"\"\n\n    early_stopping_threshold: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    How much the loss must improve to prevent early stopping.\n    \"\"\"\n\n    train_batch_size: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The batch size is the number of training examples included in a single\n    training pass.\n    \"\"\"\n\n    train_epochs: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of epochs to train for.\n    \"\"\"\n\n    learning_rate: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The learning rate to be used during training.\n    \"\"\"\n\n    lora_alpha: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Controls the scaling factor for LoRA updates. Higher values make the\n    updates more impactful.\n    \"\"\"\n\n    lora_rank: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Specifies the rank for low-rank matrices. Lower ranks reduce parameters\n    but may limit model flexibility.\n    \"\"\"\n\n    lora_target_modules: typing.Optional[LoraTargetModules] = pydantic.Field(default=None)\n    \"\"\"\n    The combination of LoRA modules to target.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/list_events_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .event import Event\n\n\nclass ListEventsResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to list events of a fine-tuned model.\n    \"\"\"\n\n    events: typing.Optional[typing.List[Event]] = pydantic.Field(default=None)\n    \"\"\"\n    List of events for the fine-tuned model.\n    \"\"\"\n\n    next_page_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Pagination token to retrieve the next page of results. If the value is \"\",\n    it means no further results for the request.\n    \"\"\"\n\n    total_size: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Total count of results.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/list_finetuned_models_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .finetuned_model import FinetunedModel\n\n\nclass ListFinetunedModelsResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to list fine-tuned models.\n    \"\"\"\n\n    finetuned_models: typing.Optional[typing.List[FinetunedModel]] = pydantic.Field(default=None)\n    \"\"\"\n    List of fine-tuned models matching the request.\n    \"\"\"\n\n    next_page_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Pagination token to retrieve the next page of results. If the value is \"\",\n    it means no further results for the request.\n    \"\"\"\n\n    total_size: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Total count of results.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/list_training_step_metrics_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .training_step_metrics import TrainingStepMetrics\n\n\nclass ListTrainingStepMetricsResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to list training-step metrics of a fine-tuned model.\n    \"\"\"\n\n    step_metrics: typing.Optional[typing.List[TrainingStepMetrics]] = pydantic.Field(default=None)\n    \"\"\"\n    The metrics for each step the evaluation was run on.\n    \"\"\"\n\n    next_page_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Pagination token to retrieve the next page of results. If the value is \"\",\n    it means no further results for the request.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/lora_target_modules.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nLoraTargetModules = typing.Union[\n    typing.Literal[\n        \"LORA_TARGET_MODULES_UNSPECIFIED\",\n        \"LORA_TARGET_MODULES_QV\",\n        \"LORA_TARGET_MODULES_QKVO\",\n        \"LORA_TARGET_MODULES_QKVO_FFN\",\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/settings.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .base_model import BaseModel\nfrom .hyperparameters import Hyperparameters\nfrom .wandb_config import WandbConfig\n\n\nclass Settings(UncheckedBaseModel):\n    \"\"\"\n    The configuration used for fine-tuning.\n    \"\"\"\n\n    base_model: BaseModel = pydantic.Field()\n    \"\"\"\n    The base model to fine-tune.\n    \"\"\"\n\n    dataset_id: str = pydantic.Field()\n    \"\"\"\n    The data used for training and evaluating the fine-tuned model.\n    \"\"\"\n\n    hyperparameters: typing.Optional[Hyperparameters] = pydantic.Field(default=None)\n    \"\"\"\n    Fine-tuning hyper-parameters.\n    \"\"\"\n\n    multi_label: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    read-only. Whether the model is single-label or multi-label (only for classification).\n    \"\"\"\n\n    wandb: typing.Optional[WandbConfig] = pydantic.Field(default=None)\n    \"\"\"\n    The Weights & Biases configuration (Chat fine-tuning only).\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/status.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nStatus = typing.Union[\n    typing.Literal[\n        \"STATUS_UNSPECIFIED\",\n        \"STATUS_FINETUNING\",\n        \"STATUS_DEPLOYING_API\",\n        \"STATUS_READY\",\n        \"STATUS_FAILED\",\n        \"STATUS_DELETED\",\n        \"STATUS_TEMPORARILY_OFFLINE\",\n        \"STATUS_PAUSED\",\n        \"STATUS_QUEUED\",\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/strategy.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nStrategy = typing.Union[typing.Literal[\"STRATEGY_UNSPECIFIED\", \"STRATEGY_VANILLA\", \"STRATEGY_TFEW\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/training_step_metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\n\n\nclass TrainingStepMetrics(UncheckedBaseModel):\n    \"\"\"\n    The evaluation metrics at a given step of the training of a fine-tuned model.\n    \"\"\"\n\n    created_at: typing.Optional[dt.datetime] = pydantic.Field(default=None)\n    \"\"\"\n    Creation timestamp.\n    \"\"\"\n\n    step_number: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Step number.\n    \"\"\"\n\n    metrics: typing.Optional[typing.Dict[str, float]] = pydantic.Field(default=None)\n    \"\"\"\n    Map of names and values for each evaluation metrics.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/update_finetuned_model_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\nfrom .finetuned_model import FinetunedModel\n\n\nclass UpdateFinetunedModelResponse(UncheckedBaseModel):\n    \"\"\"\n    Response to a request to update a fine-tuned model.\n    \"\"\"\n\n    finetuned_model: typing.Optional[FinetunedModel] = pydantic.Field(default=None)\n    \"\"\"\n    Information about the fine-tuned model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/finetuning/types/wandb_config.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ....core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ....core.unchecked_base_model import UncheckedBaseModel\n\n\nclass WandbConfig(UncheckedBaseModel):\n    \"\"\"\n    The Weights & Biases configuration.\n    \"\"\"\n\n    project: str = pydantic.Field()\n    \"\"\"\n    The WandB project name to be used during training.\n    \"\"\"\n\n    api_key: str = pydantic.Field()\n    \"\"\"\n    The WandB API key to be used during training.\n    \"\"\"\n\n    entity: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The WandB entity name to be used during training.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/finetuning/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.serialization import convert_and_respect_annotation_metadata\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom .finetuning.types.create_finetuned_model_response import CreateFinetunedModelResponse\nfrom .finetuning.types.delete_finetuned_model_response import DeleteFinetunedModelResponse\nfrom .finetuning.types.finetuned_model import FinetunedModel\nfrom .finetuning.types.get_finetuned_model_response import GetFinetunedModelResponse\nfrom .finetuning.types.list_events_response import ListEventsResponse\nfrom .finetuning.types.list_finetuned_models_response import ListFinetunedModelsResponse\nfrom .finetuning.types.list_training_step_metrics_response import ListTrainingStepMetricsResponse\nfrom .finetuning.types.settings import Settings\nfrom .finetuning.types.update_finetuned_model_response import UpdateFinetunedModelResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawFinetuningClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def list_finetuned_models(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListFinetunedModelsResponse]:\n        \"\"\"\n        Returns a list of fine-tuned models that the user has access to.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListFinetunedModelsResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/finetuning/finetuned-models\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListFinetunedModelsResponse,\n                    construct_type(\n                        type_=ListFinetunedModelsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def create_finetuned_model(\n        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[CreateFinetunedModelResponse]:\n        \"\"\"\n        Creates a new fine-tuned model. The model will be trained on the dataset specified in the request body. The training process may take some time, and the model will be available once the training is complete.\n\n        Parameters\n        ----------\n        request : FinetunedModel\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CreateFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/finetuning/finetuned-models\",\n            method=\"POST\",\n            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction=\"write\"),\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateFinetunedModelResponse,\n                    construct_type(\n                        type_=CreateFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def get_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[GetFinetunedModelResponse]:\n        \"\"\"\n        Retrieve a fine-tuned model by its ID.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[GetFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetFinetunedModelResponse,\n                    construct_type(\n                        type_=GetFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def delete_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[DeleteFinetunedModelResponse]:\n        \"\"\"\n        Deletes a fine-tuned model. The model will be removed from the system and will no longer be available for use.\n        This operation is irreversible.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DeleteFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DeleteFinetunedModelResponse,\n                    construct_type(\n                        type_=DeleteFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def update_finetuned_model(\n        self, id: str, *, name: str, settings: Settings, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[UpdateFinetunedModelResponse]:\n        \"\"\"\n        Updates the fine-tuned model with the given ID. The model will be updated with the new settings and name provided in the request body.\n\n        Parameters\n        ----------\n        id : str\n            FinetunedModel ID.\n\n        name : str\n            FinetunedModel name (e.g. `foobar`).\n\n        settings : Settings\n            FinetunedModel settings such as dataset, hyperparameters...\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[UpdateFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"PATCH\",\n            json={\n                \"name\": name,\n                \"settings\": convert_and_respect_annotation_metadata(\n                    object_=settings, annotation=Settings, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    UpdateFinetunedModelResponse,\n                    construct_type(\n                        type_=UpdateFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def list_events(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListEventsResponse]:\n        \"\"\"\n        Returns a list of events that occurred during the life-cycle of the fine-tuned model.\n        The events are ordered by creation time, with the most recent event first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListEventsResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListEventsResponse,\n                    construct_type(\n                        type_=ListEventsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def list_training_step_metrics(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListTrainingStepMetricsResponse]:\n        \"\"\"\n        Returns a list of metrics measured during the training of a fine-tuned model.\n        The metrics are ordered by step number, with the most recent step first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListTrainingStepMetricsResponse]\n            A successful response.\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListTrainingStepMetricsResponse,\n                    construct_type(\n                        type_=ListTrainingStepMetricsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawFinetuningClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def list_finetuned_models(\n        self,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListFinetunedModelsResponse]:\n        \"\"\"\n        Returns a list of fine-tuned models that the user has access to.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListFinetunedModelsResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/finetuning/finetuned-models\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListFinetunedModelsResponse,\n                    construct_type(\n                        type_=ListFinetunedModelsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def create_finetuned_model(\n        self, *, request: FinetunedModel, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[CreateFinetunedModelResponse]:\n        \"\"\"\n        Creates a new fine-tuned model. The model will be trained on the dataset specified in the request body. The training process may take some time, and the model will be available once the training is complete.\n\n        Parameters\n        ----------\n        request : FinetunedModel\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CreateFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/finetuning/finetuned-models\",\n            method=\"POST\",\n            json=convert_and_respect_annotation_metadata(object_=request, annotation=FinetunedModel, direction=\"write\"),\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CreateFinetunedModelResponse,\n                    construct_type(\n                        type_=CreateFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def get_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[GetFinetunedModelResponse]:\n        \"\"\"\n        Retrieve a fine-tuned model by its ID.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[GetFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetFinetunedModelResponse,\n                    construct_type(\n                        type_=GetFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def delete_finetuned_model(\n        self, id: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[DeleteFinetunedModelResponse]:\n        \"\"\"\n        Deletes a fine-tuned model. The model will be removed from the system and will no longer be available for use.\n        This operation is irreversible.\n\n        Parameters\n        ----------\n        id : str\n            The fine-tuned model ID.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DeleteFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"DELETE\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DeleteFinetunedModelResponse,\n                    construct_type(\n                        type_=DeleteFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def update_finetuned_model(\n        self, id: str, *, name: str, settings: Settings, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[UpdateFinetunedModelResponse]:\n        \"\"\"\n        Updates the fine-tuned model with the given ID. The model will be updated with the new settings and name provided in the request body.\n\n        Parameters\n        ----------\n        id : str\n            FinetunedModel ID.\n\n        name : str\n            FinetunedModel name (e.g. `foobar`).\n\n        settings : Settings\n            FinetunedModel settings such as dataset, hyperparameters...\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[UpdateFinetunedModelResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(id)}\",\n            method=\"PATCH\",\n            json={\n                \"name\": name,\n                \"settings\": convert_and_respect_annotation_metadata(\n                    object_=settings, annotation=Settings, direction=\"write\"\n                ),\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    UpdateFinetunedModelResponse,\n                    construct_type(\n                        type_=UpdateFinetunedModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def list_events(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        order_by: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListEventsResponse]:\n        \"\"\"\n        Returns a list of events that occurred during the life-cycle of the fine-tuned model.\n        The events are ordered by creation time, with the most recent event first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        order_by : typing.Optional[str]\n            Comma separated list of fields. For example: \"created_at,name\". The default\n            sorting order is ascending. To specify descending order for a field, append\n            \" desc\" to the field name. For example: \"created_at desc,name\".\n\n            Supported sorting fields:\n              - created_at (default)\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListEventsResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/events\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"order_by\": order_by,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListEventsResponse,\n                    construct_type(\n                        type_=ListEventsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def list_training_step_metrics(\n        self,\n        finetuned_model_id: str,\n        *,\n        page_size: typing.Optional[int] = None,\n        page_token: typing.Optional[str] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListTrainingStepMetricsResponse]:\n        \"\"\"\n        Returns a list of metrics measured during the training of a fine-tuned model.\n        The metrics are ordered by step number, with the most recent step first.\n        The list can be paginated using `page_size` and `page_token` parameters.\n\n        Parameters\n        ----------\n        finetuned_model_id : str\n            The parent fine-tuned model ID.\n\n        page_size : typing.Optional[int]\n            Maximum number of results to be returned by the server. If 0, defaults to\n            50.\n\n        page_token : typing.Optional[str]\n            Request a specific page of the list results.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListTrainingStepMetricsResponse]\n            A successful response.\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/finetuning/finetuned-models/{jsonable_encoder(finetuned_model_id)}/training-step-metrics\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListTrainingStepMetricsResponse,\n                    construct_type(\n                        type_=ListTrainingStepMetricsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/manually_maintained/__init__.py",
    "content": "# This module ensures overrides are applied early in the import process\n# Import overrides to trigger backwards compatibility patches\nfrom .. import overrides  # noqa: F401\n"
  },
  {
    "path": "src/cohere/manually_maintained/cache.py",
    "content": "import typing\nimport time\n\n\nclass CacheMixin:\n    # A simple in-memory cache with TTL (thread safe). This is used to cache tokenizers at the moment.\n    _cache: typing.Dict[str, typing.Tuple[typing.Optional[float], typing.Any]] = dict()\n\n    def _cache_get(self, key: str) -> typing.Any:\n        val = self._cache.get(key)\n        if val is None:\n            return None\n        expiry_timestamp, value = val\n        if expiry_timestamp is None or expiry_timestamp > time.time():\n            return value\n\n        del self._cache[key]  # remove expired cache entry\n\n    def _cache_set(self, key: str, value: typing.Any, ttl: int = 60 * 60) -> None:\n        expiry_timestamp = None\n        if ttl is not None:\n            expiry_timestamp = time.time() + ttl\n        self._cache[key] = (expiry_timestamp, value)\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/__init__.py",
    "content": "from .client import Client\nfrom .error import CohereError\nfrom .mode import Mode\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/chat.py",
    "content": "from .response import CohereObject\nfrom .error import CohereError\nfrom .mode import Mode\nfrom typing import List, Optional, Generator, Dict, Any, Union\nfrom enum import Enum\nimport json\n\n# Tools\n\nclass ToolParameterDefinitionsValue(CohereObject, dict):\n    def __init__(\n        self,\n        type: str,\n        description: str,\n        required: Optional[bool] = None,\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.__dict__ = self\n        self.type = type\n        self.description = description\n        if required is not None:\n            self.required = required\n\n\nclass Tool(CohereObject, dict):\n    def __init__(\n        self,\n        name: str,\n        description: str,\n        parameter_definitions: Optional[Dict[str, ToolParameterDefinitionsValue]] = None,\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.__dict__ = self\n        self.name = name\n        self.description = description\n        if parameter_definitions is not None:\n            self.parameter_definitions = parameter_definitions\n\n\nclass ToolCall(CohereObject, dict):\n    def __init__(\n        self,\n        name: str,\n        parameters: Dict[str, Any],\n        generation_id: str,\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.__dict__ = self\n        self.name = name\n        self.parameters = parameters\n        self.generation_id = generation_id\n\n    @classmethod\n    def from_dict(cls, tool_call_res: Dict[str, Any]) -> \"ToolCall\":\n        return cls(\n            name=tool_call_res.get(\"name\"),\n            parameters=tool_call_res.get(\"parameters\"),\n            generation_id=tool_call_res.get(\"generation_id\"),\n        )\n\n    @classmethod\n    def from_list(cls, tool_calls_res: Optional[List[Dict[str, Any]]]) -> Optional[List[\"ToolCall\"]]:\n        if tool_calls_res is None or not isinstance(tool_calls_res, list):\n            return None\n\n        return [ToolCall.from_dict(tc) for tc in tool_calls_res]\n\n# Chat\n\nclass Chat(CohereObject):\n    def __init__(\n        self,\n        response_id: str,\n        generation_id: str,\n        text: str,\n        chat_history: Optional[List[Dict[str, Any]]] = None,\n        preamble: Optional[str] = None,\n        finish_reason: Optional[str] = None,\n        token_count: Optional[Dict[str, int]] = None,\n        tool_calls: Optional[List[ToolCall]] = None,\n        citations: Optional[List[Dict[str, Any]]] = None,\n        documents: Optional[List[Dict[str, Any]]] = None,\n        search_results: Optional[List[Dict[str, Any]]] = None,\n        search_queries: Optional[List[Dict[str, Any]]] = None,\n        is_search_required: Optional[bool] = None,\n    ) -> None:\n        self.response_id = response_id\n        self.generation_id = generation_id\n        self.text = text\n        self.chat_history = chat_history\n        self.preamble = preamble\n        self.finish_reason = finish_reason\n        self.token_count = token_count\n        self.tool_calls = tool_calls\n        self.citations = citations\n        self.documents = documents\n        self.search_results = search_results\n        self.search_queries = search_queries\n        self.is_search_required = is_search_required\n\n    @classmethod\n    def from_dict(cls, response: Dict[str, Any]) -> \"Chat\":\n        return cls(\n            response_id=response[\"response_id\"],\n            generation_id=response.get(\"generation_id\"),  # optional\n            text=response.get(\"text\"),\n            chat_history=response.get(\"chat_history\"),  # optional\n            preamble=response.get(\"preamble\"),  # optional\n            token_count=response.get(\"token_count\"),\n            is_search_required=response.get(\"is_search_required\"),  # optional\n            citations=response.get(\"citations\"),  # optional\n            documents=response.get(\"documents\"),  # optional\n            search_results=response.get(\"search_results\"),  # optional\n            search_queries=response.get(\"search_queries\"),  # optional\n            finish_reason=response.get(\"finish_reason\"),\n            tool_calls=ToolCall.from_list(response.get(\"tool_calls\")),  # optional\n        )\n\n# ---------------|\n# Steaming event |\n# ---------------|\n\nclass StreamEvent(str, Enum):\n    STREAM_START = \"stream-start\"\n    SEARCH_QUERIES_GENERATION = \"search-queries-generation\"\n    SEARCH_RESULTS = \"search-results\"\n    TEXT_GENERATION = \"text-generation\"\n    TOOL_CALLS_GENERATION = \"tool-calls-generation\"\n    CITATION_GENERATION = \"citation-generation\"\n    STREAM_END = \"stream-end\"\n\nclass StreamResponse(CohereObject):\n    def __init__(\n        self,\n        is_finished: bool,\n        event_type: Union[StreamEvent, str],\n        index: Optional[int],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.is_finished = is_finished\n        self.index = index\n        self.event_type = event_type\n\n\nclass StreamStart(StreamResponse):\n    def __init__(\n        self,\n        generation_id: str,\n        conversation_id: Optional[str],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.generation_id = generation_id\n        self.conversation_id = conversation_id\n\n\nclass StreamTextGeneration(StreamResponse):\n    def __init__(\n        self,\n        text: str,\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.text = text\n\n\nclass StreamCitationGeneration(StreamResponse):\n    def __init__(\n        self,\n        citations: Optional[List[Dict[str, Any]]],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.citations = citations\n\n\nclass StreamQueryGeneration(StreamResponse):\n    def __init__(\n        self,\n        search_queries: Optional[List[Dict[str, Any]]],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.search_queries = search_queries\n\n\nclass StreamSearchResults(StreamResponse):\n    def __init__(\n        self,\n        search_results: Optional[List[Dict[str, Any]]],\n        documents: Optional[List[Dict[str, Any]]],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.search_results = search_results\n        self.documents = documents\n\n\nclass StreamEnd(StreamResponse):\n    def __init__(\n        self,\n        finish_reason: str,\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.finish_reason = finish_reason\n\n\nclass ChatToolCallsGenerationEvent(StreamResponse):\n    def __init__(\n        self,\n        tool_calls: Optional[List[ToolCall]],\n        **kwargs,\n    ) -> None:\n        super().__init__(**kwargs)\n        self.tool_calls = tool_calls\n\nclass StreamingChat(CohereObject):\n    def __init__(self, stream_response, mode):\n        self.stream_response = stream_response\n        self.text = None\n        self.response_id = None\n        self.generation_id = None\n        self.preamble = None\n        self.prompt = None\n        self.chat_history = None\n        self.finish_reason = None\n        self.token_count = None\n        self.is_search_required = None\n        self.citations = None\n        self.documents = None\n        self.search_results = None\n        self.search_queries = None\n        self.tool_calls = None\n\n        self.bytes = bytearray()\n        if mode == Mode.SAGEMAKER:\n            self.payload_key = \"PayloadPart\"\n            self.bytes_key = \"Bytes\"\n        elif mode == Mode.BEDROCK:\n            self.payload_key = \"chunk\"\n            self.bytes_key = \"bytes\"\n\n    def _make_response_item(self, index, streaming_item) -> Any:\n        event_type = streaming_item.get(\"event_type\")\n\n        if event_type == StreamEvent.STREAM_START:\n            self.conversation_id = streaming_item.get(\"conversation_id\")\n            self.generation_id = streaming_item.get(\"generation_id\")\n            return StreamStart(\n                conversation_id=self.conversation_id,\n                generation_id=self.generation_id,\n                is_finished=False,\n                event_type=event_type,\n                index=index,\n            )\n        elif event_type == StreamEvent.SEARCH_QUERIES_GENERATION:\n            search_queries = streaming_item.get(\"search_queries\")\n            return StreamQueryGeneration(\n                search_queries=search_queries, is_finished=False, event_type=event_type, index=index\n            )\n        elif event_type == StreamEvent.SEARCH_RESULTS:\n            search_results = streaming_item.get(\"search_results\")\n            documents = streaming_item.get(\"documents\")\n            return StreamSearchResults(\n                search_results=search_results,\n                documents=documents,\n                is_finished=False,\n                event_type=event_type,\n                index=index,\n            )\n        elif event_type == StreamEvent.TEXT_GENERATION:\n            text = streaming_item.get(\"text\")\n            return StreamTextGeneration(text=text, is_finished=False, event_type=event_type, index=index)\n        elif event_type == StreamEvent.CITATION_GENERATION:\n            citations = streaming_item.get(\"citations\")\n            return StreamCitationGeneration(citations=citations, is_finished=False, event_type=event_type, index=index)\n        elif event_type == StreamEvent.TOOL_CALLS_GENERATION:\n            tool_calls = ToolCall.from_list(streaming_item.get(\"tool_calls\"))\n            return ChatToolCallsGenerationEvent(\n                tool_calls=tool_calls, is_finished=False, event_type=event_type, index=index\n            )\n        elif event_type == StreamEvent.STREAM_END:\n            response = streaming_item.get(\"response\")\n            finish_reason = streaming_item.get(\"finish_reason\")\n            self.finish_reason = finish_reason\n\n            if response is None:\n                return None\n\n            self.response_id = response.get(\"response_id\")\n            self.conversation_id = response.get(\"conversation_id\")\n            self.text = response.get(\"text\")\n            self.generation_id = response.get(\"generation_id\")\n            self.preamble = response.get(\"preamble\")\n            self.prompt = response.get(\"prompt\")\n            self.chat_history = response.get(\"chat_history\")\n            self.token_count = response.get(\"token_count\")\n            self.is_search_required = response.get(\"is_search_required\")  # optional\n            self.citations = response.get(\"citations\")  # optional\n            self.documents = response.get(\"documents\")  # optional\n            self.search_results = response.get(\"search_results\")  # optional\n            self.search_queries = response.get(\"search_queries\")  # optional\n            self.tool_calls = ToolCall.from_list(response.get(\"tool_calls\"))  # optional\n            return StreamEnd(finish_reason=finish_reason, is_finished=True, event_type=event_type, index=index)\n        return None\n\n    def __iter__(self) -> Generator[StreamResponse, None, None]:\n        index = 0\n        for payload in self.stream_response:\n            self.bytes.extend(payload[self.payload_key][self.bytes_key])\n            try:\n                item = self._make_response_item(index, json.loads(self.bytes))\n            except json.decoder.JSONDecodeError:\n                # payload contained only a partion JSON object\n                continue\n\n            self.bytes = bytearray()\n            if item is not None:\n                index += 1\n                yield item\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/classification.py",
    "content": "from .response import CohereObject\nfrom typing import Any, Dict, Iterator, List, Literal, Union\n\nPrediction = Union[str, int, List[str], List[int]]\nClassificationDict = Dict[Literal[\"prediction\", \"confidence\", \"text\"], Any]\n\n\nclass Classification(CohereObject):\n    def __init__(self, classification: Union[Prediction, ClassificationDict]) -> None:\n        # Prediction is the old format (version 1 of classification-finetuning)\n        # ClassificationDict is the new format (version 2 of classification-finetuning).\n        # It also contains the original text and the labels' confidence scores of the prediction\n        self.classification = classification\n\n    def is_multilabel(self) -> bool:\n        if isinstance(self.classification, list):\n            return True\n        elif isinstance(self.classification, (int, str)):\n            return False\n        return isinstance(self.classification[\"prediction\"], list)\n\n    @property\n    def prediction(self) -> Prediction:\n        if isinstance(self.classification, (list, int, str)):\n            return self.classification\n        return self.classification[\"prediction\"]\n\n    @property\n    def confidence(self) -> List[float]:\n        if isinstance(self.classification, (list, int, str)):\n            raise ValueError(\n                \"Confidence scores are not available for version prior to 2.0 of Cohere Classification Finetuning AWS package\"\n            )\n        return self.classification[\"confidence\"]\n\n    @property\n    def text(self) -> str:\n        if isinstance(self.classification, (list, int, str)):\n            raise ValueError(\n                \"Original text is not available for version prior to 2.0 of Cohere Classification Finetuning AWS package\"\n            )\n        return self.classification[\"text\"]\n\n\nclass Classifications(CohereObject):\n    def __init__(self, classifications: List[Classification]) -> None:\n        self.classifications = classifications\n        if len(self.classifications) > 0:\n            assert all(\n                [c.is_multilabel() == self.is_multilabel() for c in self.classifications]\n            ), \"All classifications must be of the same type (single-label or multi-label)\"\n\n    def __iter__(self) -> Iterator:\n        return iter(self.classifications)\n\n    def __len__(self) -> int:\n        return len(self.classifications)\n\n    def is_multilabel(self) -> bool:\n        return len(self.classifications) > 0 and self.classifications[0].is_multilabel()\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/client.py",
    "content": "import json\nimport os\nimport tarfile\nimport tempfile\nimport time\nfrom typing import Any, Dict, List, Optional, Union\n\nfrom .classification import Classification, Classifications\nfrom .embeddings import Embeddings\nfrom .error import CohereError\nfrom .generation import Generations, StreamingGenerations\nfrom .chat import Chat, StreamingChat\nfrom .rerank import Reranking\nfrom .summary import Summary\nfrom .mode import Mode\nimport typing\nfrom ..lazy_aws_deps import lazy_boto3, lazy_botocore, lazy_sagemaker\n\nclass Client:\n    def __init__(\n           self,\n            aws_region: typing.Optional[str] = None,\n            mode: Mode = Mode.SAGEMAKER,\n        ):\n        \"\"\"\n        By default we assume region configured in AWS CLI (`aws configure get region`). You can change the region with\n        `aws configure set region us-west-2` or override it with `region_name` parameter.\n        \"\"\"\n        self.mode = mode\n        if os.environ.get('AWS_DEFAULT_REGION') is None:\n            os.environ['AWS_DEFAULT_REGION'] = aws_region\n\n        if self.mode == Mode.SAGEMAKER:\n            self._client = lazy_boto3().client(\"sagemaker-runtime\", region_name=aws_region)\n            self._service_client = lazy_boto3().client(\"sagemaker\", region_name=aws_region)\n            self._sess = lazy_sagemaker().Session(sagemaker_client=self._service_client)\n        elif self.mode == Mode.BEDROCK:\n            self._client = lazy_boto3().client(\"bedrock-runtime\", region_name=aws_region)\n            self._service_client = lazy_boto3().client(\"bedrock\", region_name=aws_region)\n            self._sess = None\n            self._endpoint_name = None\n\n    def _require_sagemaker(self) -> None:\n        if self.mode != Mode.SAGEMAKER:\n            raise CohereError(\"This method is only supported in SageMaker mode.\")\n\n    def _does_endpoint_exist(self, endpoint_name: str) -> bool:\n        try:\n            self._service_client.describe_endpoint(EndpointName=endpoint_name)\n        except lazy_botocore().ClientError:\n            return False\n        return True\n\n    def connect_to_endpoint(self, endpoint_name: str) -> None:\n        \"\"\"Connects to an existing SageMaker endpoint.\n\n        Args:\n            endpoint_name (str): The name of the endpoint.\n\n        Raises:\n            CohereError: Connection to the endpoint failed.\n        \"\"\"\n        self._require_sagemaker()\n        if not self._does_endpoint_exist(endpoint_name):\n            raise CohereError(f\"Endpoint {endpoint_name} does not exist.\")\n        self._endpoint_name = endpoint_name\n\n    def _s3_models_dir_to_tarfile(self, s3_models_dir: str) -> str:\n        \"\"\"\n        Compress an S3 folder which contains one or several fine-tuned models to a tar file.\n        If the S3 folder contains only one fine-tuned model, it simply returns the path to that model.\n        If the S3 folder contains several fine-tuned models, it download all models, aggregates them into a single\n        tar.gz file.\n\n        Args:\n            s3_models_dir (str): S3 URI pointing to a folder\n\n        Returns:\n            str: S3 URI pointing to the `models.tar.gz` file\n        \"\"\"\n\n        s3_models_dir = s3_models_dir.rstrip(\"/\") + \"/\"\n\n        # Links of all fine-tuned models in s3_models_dir. Their format should be .tar.gz\n        s3_tar_models = [\n            s3_path\n            for s3_path in lazy_sagemaker().s3.S3Downloader.list(s3_models_dir, sagemaker_session=self._sess)\n            if (\n                s3_path.endswith(\".tar.gz\")  # only .tar.gz files\n                and (s3_path.split(\"/\")[-1] != \"models.tar.gz\")  # exclude the .tar.gz file we are creating\n                and (s3_path.rsplit(\"/\", 1)[0] == s3_models_dir[:-1])  # only files at the root of s3_models_dir\n            )\n        ]\n\n        if len(s3_tar_models) == 0:\n            raise CohereError(f\"No fine-tuned models found in {s3_models_dir}\")\n        elif len(s3_tar_models) == 1:\n            print(f\"Found one fine-tuned model: {s3_tar_models[0]}\")\n            return s3_tar_models[0]\n\n        # More than one fine-tuned model found, need to aggregate them into a single .tar.gz file\n        with tempfile.TemporaryDirectory() as tmpdir:\n            local_tar_models_dir = os.path.join(tmpdir, \"tar\")\n            local_models_dir = os.path.join(tmpdir, \"models\")\n\n            # Download and extract all fine-tuned models\n            for s3_tar_model in s3_tar_models:\n                print(f\"Adding fine-tuned model: {s3_tar_model}\")\n                lazy_sagemaker().s3.S3Downloader.download(s3_tar_model, local_tar_models_dir, sagemaker_session=self._sess)\n                with tarfile.open(os.path.join(local_tar_models_dir, s3_tar_model.split(\"/\")[-1])) as tar:\n                    tar.extractall(local_models_dir)\n\n            # Compress local_models_dir to a tar.gz file\n            model_tar = os.path.join(tmpdir, \"models.tar.gz\")\n            with tarfile.open(model_tar, \"w:gz\") as tar:\n                tar.add(local_models_dir, arcname=\".\")\n\n            # Upload the new tarfile containing all models to s3\n            # Very important to remove the trailing slash from s3_models_dir otherwise it just doesn't upload\n            model_tar_s3 = lazy_sagemaker().s3.S3Uploader.upload(model_tar, s3_models_dir[:-1], sagemaker_session=self._sess)\n\n            # sanity check\n            assert s3_models_dir + \"models.tar.gz\" in lazy_sagemaker().s3.S3Downloader.list(s3_models_dir, sagemaker_session=self._sess)\n\n        return model_tar_s3\n\n    def create_endpoint(\n        self,\n        arn: str,\n        endpoint_name: str,\n        s3_models_dir: Optional[str] = None,\n        instance_type: str = \"ml.g4dn.xlarge\",\n        n_instances: int = 1,\n        recreate: bool = False,\n        role: Optional[str] = None,\n    ) -> None:\n        \"\"\"Creates and deploys a SageMaker endpoint.\n\n        Args:\n            arn (str): The product ARN. Refers to a ready-to-use model (model package) or a fine-tuned model\n                (algorithm).\n            endpoint_name (str): The name of the endpoint.\n            s3_models_dir (str, optional): S3 URI pointing to the folder containing fine-tuned models. Defaults to None.\n            instance_type (str, optional): The EC2 instance type to deploy the endpoint to. Defaults to \"ml.g4dn.xlarge\".\n            n_instances (int, optional): Number of endpoint instances. Defaults to 1.\n            recreate (bool, optional): Force re-creation of endpoint if it already exists. Defaults to False.\n            role (str, optional): The IAM role to use for the endpoint. If not provided, sagemaker.get_execution_role()\n                will be used to get the role. This should work when one uses the client inside SageMaker. If this errors\n                out, the default role \"ServiceRoleSagemaker\" will be used, which generally works outside of SageMaker.\n        \"\"\"\n        self._require_sagemaker()\n        # First, check if endpoint already exists\n        if self._does_endpoint_exist(endpoint_name):\n            if recreate:\n                self.connect_to_endpoint(endpoint_name)\n                self.delete_endpoint()\n            else:\n                raise CohereError(f\"Endpoint {endpoint_name} already exists and recreate={recreate}.\")\n\n        kwargs = {}\n        model_data = None\n        validation_params = dict()\n        useBoto = False\n        if s3_models_dir is not None:\n            # If s3_models_dir is given, we assume to have custom fine-tuned models -> Algorithm\n            kwargs[\"algorithm_arn\"] = arn\n            model_data = self._s3_models_dir_to_tarfile(s3_models_dir)\n        else:\n            # If no s3_models_dir is given, we assume to use a pre-trained model -> ModelPackage\n            kwargs[\"model_package_arn\"] = arn\n\n            # For now only non-finetuned models can use these timeouts\n            validation_params = dict(\n                model_data_download_timeout=2400,\n                container_startup_health_check_timeout=2400\n            )\n            useBoto = True\n\n        # Out of precaution, check if there is an endpoint config and delete it if that's the case\n        # Otherwise it might block deployment\n        try:\n            self._service_client.delete_endpoint_config(EndpointConfigName=endpoint_name)\n        except lazy_botocore().ClientError:\n            pass\n\n        try:\n            self._service_client.delete_model(ModelName=endpoint_name)\n        except lazy_botocore().ClientError:\n            pass\n\n        if role is None:\n            if useBoto:\n                accountID = lazy_sagemaker().account_id()\n                role = f\"arn:aws:iam::{accountID}:role/ServiceRoleSagemaker\"\n            else:\n                try:\n                    role = lazy_sagemaker().get_execution_role()\n                except ValueError:\n                    print(\"Using default role: 'ServiceRoleSagemaker'.\")\n                    role = \"ServiceRoleSagemaker\"\n\n        # deploy fine-tuned model using sagemaker SDK\n        if s3_models_dir is not None:\n            model = lazy_sagemaker().ModelPackage(\n                role=role,\n                model_data=model_data,\n                sagemaker_session=self._sess,  # makes sure the right region is used\n                **kwargs\n            )\n\n            try:\n                model.deploy(\n                    n_instances,\n                    instance_type,\n                    endpoint_name=endpoint_name,\n                    **validation_params\n                )\n            except lazy_botocore().ParamValidationError:\n                # For at least some versions of python 3.6, SageMaker SDK does not support the validation_params\n                model.deploy(n_instances, instance_type, endpoint_name=endpoint_name)\n        else:\n            # deploy pre-trained model using boto to add InferenceAmiVersion\n            self._service_client.create_model(\n                ModelName=endpoint_name,\n                ExecutionRoleArn=role,\n                EnableNetworkIsolation=True,\n                PrimaryContainer={\n                    'ModelPackageName': arn,\n                },\n            )\n            self._service_client.create_endpoint_config(\n                EndpointConfigName=endpoint_name,\n                ProductionVariants=[\n                    {\n                        'VariantName': 'AllTraffic',\n                        'ModelName': endpoint_name,\n                        'InstanceType': instance_type,\n                        'InitialInstanceCount': n_instances,\n                        'InferenceAmiVersion': 'al2-ami-sagemaker-inference-gpu-2'\n                    },\n                ],\n            )\n            self._service_client.create_endpoint(\n                EndpointName=endpoint_name,\n                EndpointConfigName=endpoint_name,\n            )\n\n            waiter = self._service_client.get_waiter('endpoint_in_service')\n            try:\n                print(f\"Waiting for endpoint {endpoint_name} to be in service...\")\n                waiter.wait(\n                    EndpointName=endpoint_name,\n                    WaiterConfig={\n                        'Delay': 30,\n                        'MaxAttempts': 80\n                    }\n                )\n            except Exception as e:\n                raise CohereError(f\"Failed to create endpoint: {e}\")\n        self.connect_to_endpoint(endpoint_name)\n\n    def chat(\n        self,\n        message: str,\n        stream: Optional[bool] = False,\n        preamble: Optional[str] = None,\n        chat_history: Optional[List[Dict[str, Any]]] = None,\n        # should only be passed for stacked finetune deployment\n        model: Optional[str] = None,\n        # should only be passed for Bedrock mode; ignored otherwise\n        model_id: Optional[str] = None,\n        temperature: Optional[float] = None,\n        p: Optional[float] = None,\n        k: Optional[float] = None,\n        max_tokens: Optional[int] = None,\n        search_queries_only: Optional[bool] = None,\n        documents: Optional[List[Dict[str, Any]]] = None,\n        prompt_truncation: Optional[str] = None,\n        tools: Optional[List[Dict[str, Any]]] = None,\n        tool_results: Optional[List[Dict[str, Any]]] = None,\n        raw_prompting: Optional[bool] = False,\n        return_prompt: Optional[bool] = False,\n        variant: Optional[str] = None,\n    ) -> Union[Chat, StreamingChat]:\n        \"\"\"Returns a Chat object with the query reply.\n\n        Args:\n            message (str): The message to send to the chatbot.\n\n            stream (bool): Return streaming tokens.\n\n            preamble (str): (Optional) A string to override the preamble.\n            chat_history (List[Dict[str, str]]): (Optional) A list of entries used to construct the conversation. If provided, these messages will be used to build the prompt and the conversation_id will be ignored so no data will be stored to maintain state.\n\n            model (str): (Optional) The model to use for generating the response. Should only be passed for stacked finetune deployment.\n            model_id (str): (Optional) The model to use for generating the response. Should only be passed for Bedrock mode; ignored otherwise.\n            temperature (float): (Optional) The temperature to use for the response. The higher the temperature, the more random the response.\n            p (float): (Optional) The nucleus sampling probability.\n            k (float): (Optional) The top-k sampling probability.\n            max_tokens (int): (Optional) The max tokens generated for the next reply.\n\n            search_queries_only (bool): (Optional) When true, the response will only contain a list of generated `search_queries`, no reply from the model to the user's message will be generated.\n            documents (List[Dict[str, str]]): (Optional) Documents to use to generate grounded response with citations. Example:\n                documents=[\n                    {\n                        \"id\": \"national_geographic_everest\",\n                        \"title\": \"Height of Mount Everest\",\n                        \"snippet\": \"The height of Mount Everest is 29,035 feet\",\n                        \"url\": \"https://education.nationalgeographic.org/resource/mount-everest/\",\n                    },\n                    {\n                        \"id\": \"national_geographic_mariana\",\n                        \"title\": \"Depth of the Mariana Trench\",\n                        \"snippet\": \"The depth of the Mariana Trench is 36,070 feet\",\n                        \"url\": \"https://www.nationalgeographic.org/activity/mariana-trench-deepest-place-earth\",\n                    },\n                ],\n            prompt_truncation (str) (Optional): Defaults to `OFF`. Dictates how the prompt will be constructed. With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API. With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be raised.\n        Returns:\n            a Chat object if stream=False, or a StreamingChat object if stream=True\n\n        Examples:\n            A simple chat message:\n                >>> res = co.chat(message=\"Hey! How are you doing today?\")\n                >>> print(res.text)\n            Streaming chat:\n                >>> res = co.chat(\n                >>>     message=\"Hey! How are you doing today?\",\n                >>>     stream=True)\n                >>> for token in res:\n                >>>     print(token)\n            Stateless chat with chat history:\n                >>> res = co.chat(\n                >>>     chat_history=[\n                >>>         {'role': 'User', message': 'Hey! How are you doing today?'},\n                >>>         {'role': 'Chatbot', message': 'I am doing great! How can I help you?'},\n                >>>     message=\"Tell me a joke!\",\n                >>>     ])\n                >>> print(res.text)\n            Chat message with documents to use to generate the response:\n                >>> res = co.chat(\n                >>>     \"How deep in the Mariana Trench\",\n                >>>     documents=[\n                >>>         {\n                >>>            \"id\": \"national_geographic_everest\",\n                >>>            \"title\": \"Height of Mount Everest\",\n                >>>            \"snippet\": \"The height of Mount Everest is 29,035 feet\",\n                >>>            \"url\": \"https://education.nationalgeographic.org/resource/mount-everest/\",\n                >>>         },\n                >>>         {\n                >>>             \"id\": \"national_geographic_mariana\",\n                >>>             \"title\": \"Depth of the Mariana Trench\",\n                >>>             \"snippet\": \"The depth of the Mariana Trench is 36,070 feet\",\n                >>>             \"url\": \"https://www.nationalgeographic.org/activity/mariana-trench-deepest-place-earth\",\n                >>>         },\n                >>>       ])\n                >>> print(res.text)\n                >>> print(res.citations)\n                >>> print(res.documents)\n            Generate search queries for fetching documents to use in chat:\n                >>> res = co.chat(\n                >>>     \"What is the height of Mount Everest?\",\n                >>>      search_queries_only=True)\n                >>> if res.is_search_required:\n                >>>      print(res.search_queries)\n        \"\"\"\n         \n        if self.mode == Mode.SAGEMAKER and self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n        json_params = {\n            \"model\": model,\n            \"message\": message,\n            \"chat_history\": chat_history,\n            \"preamble\": preamble,\n            \"temperature\": temperature,\n            \"max_tokens\": max_tokens,\n            \"stream\": stream,\n            \"p\": p,\n            \"k\": k,\n            \"tools\": tools,\n            \"tool_results\": tool_results,\n            \"search_queries_only\": search_queries_only,\n            \"documents\": documents,\n            \"raw_prompting\": raw_prompting,\n            \"return_prompt\": return_prompt,\n            \"prompt_truncation\": prompt_truncation\n        }\n    \n        for key, value in list(json_params.items()):\n            if value is None:\n                del json_params[key]\n\n        if self.mode == Mode.SAGEMAKER:\n            return self._sagemaker_chat(json_params, variant)\n        elif self.mode == Mode.BEDROCK:\n            return self._bedrock_chat(json_params, model_id)\n        else:\n            raise CohereError(\"Unsupported mode\")\n\n    def _sagemaker_chat(self, json_params: Dict[str, Any], variant: str) :\n        json_body = json.dumps(json_params)\n        params = {\n            'EndpointName': self._endpoint_name,\n            'ContentType': 'application/json',\n            'Body': json_body,\n        }\n        if variant:\n            params['TargetVariant'] = variant\n\n        try:\n            if json_params['stream']:\n                result = self._client.invoke_endpoint_with_response_stream(\n                    **params)\n                return StreamingChat(result['Body'], self.mode)\n            else:\n                result = self._client.invoke_endpoint(**params)\n                return Chat.from_dict(json.loads(result['Body'].read().decode()))\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n    def _bedrock_chat(self, json_params: Dict[str, Any], model_id: str) :\n        if not model_id:\n            raise CohereError(\"must supply model_id arg when calling bedrock\")\n        if json_params['stream']:\n            stream = json_params['stream']\n        else:\n            stream = False\n        # Bedrock does not expect the stream key to be present in the body, use invoke_model_with_response_stream to indicate stream mode\n        del json_params['stream']\n\n        json_body = json.dumps(json_params)\n        params = {\n            'body': json_body,\n            'modelId': model_id,\n        }\n\n        try:\n            if stream:\n                result = self._client.invoke_model_with_response_stream(\n                    **params)\n                return StreamingChat(result['body'], self.mode)\n            else:\n                result = self._client.invoke_model(**params)\n                return Chat.from_dict(\n                    json.loads(result['body'].read().decode()))\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n    def generate(\n        self,\n        prompt: str,\n        # should only be passed for stacked finetune deployment\n        model: Optional[str] = None,\n        # should only be passed for Bedrock mode; ignored otherwise\n        model_id: Optional[str] = None,\n        # requires DB with presets\n        # preset: str = None,\n        num_generations: int = 1,\n        max_tokens: int = 400,\n        temperature: float = 1.0,\n        k: int = 0,\n        p: float = 0.75,\n        stop_sequences: Optional[List[str]] = None,\n        return_likelihoods: Optional[str] = None,\n        truncate: Optional[str] = None,\n        variant: Optional[str] = None,\n        stream: Optional[bool] = True,\n    ) -> Union[Generations, StreamingGenerations]:\n        if self.mode == Mode.SAGEMAKER and self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n\n        json_params = {\n            'model': model,\n            'prompt': prompt,\n            'max_tokens': max_tokens,\n            'temperature': temperature,\n            'k': k,\n            'p': p,\n            'stop_sequences': stop_sequences,\n            'return_likelihoods': return_likelihoods,\n            'truncate': truncate,\n            'stream': stream,\n        }\n        for key, value in list(json_params.items()):\n            if value is None:\n                del json_params[key]\n\n        if self.mode == Mode.SAGEMAKER:\n            # TODO: Bedrock should support this param too\n            json_params['num_generations'] = num_generations\n            return self._sagemaker_generations(json_params, variant)\n        elif self.mode == Mode.BEDROCK:\n            return self._bedrock_generations(json_params, model_id)\n        else:\n            raise CohereError(\"Unsupported mode\")\n\n    def _sagemaker_generations(self, json_params: Dict[str, Any], variant: str) :\n        json_body = json.dumps(json_params)\n        params = {\n            'EndpointName': self._endpoint_name,\n            'ContentType': 'application/json',\n            'Body': json_body,\n        }\n        if variant:\n            params['TargetVariant'] = variant\n\n        try:\n            if json_params['stream']:\n                result = self._client.invoke_endpoint_with_response_stream(\n                    **params)\n                return StreamingGenerations(result['Body'], self.mode)\n            else:\n                result = self._client.invoke_endpoint(**params)\n                return Generations(\n                    json.loads(result['Body'].read().decode())['generations'])\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n    def _bedrock_generations(self, json_params: Dict[str, Any], model_id: str) :\n        if not model_id:\n            raise CohereError(\"must supply model_id arg when calling bedrock\")\n        json_body = json.dumps(json_params)\n        params = {\n            'body': json_body,\n            'modelId': model_id,\n        }\n\n        try:\n            if json_params['stream']:\n                result = self._client.invoke_model_with_response_stream(\n                    **params)\n                return StreamingGenerations(result['body'], self.mode)\n            else:\n                result = self._client.invoke_model(**params)\n                return Generations(\n                    json.loads(result['body'].read().decode())['generations'])\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n    def embed(\n        self,\n        texts: List[str],\n        truncate: Optional[str] = None,\n        variant: Optional[str] = None,\n        input_type: Optional[str] = None,\n        model_id: Optional[str] = None,\n        output_dimension: Optional[int] = None,\n        embedding_types: Optional[List[str]] = None,\n    ) -> Union[Embeddings, Dict[str, List]]:\n        json_params = {\n            'texts': texts,\n            'truncate': truncate,\n            \"input_type\": input_type,\n            \"output_dimension\": output_dimension,\n            \"embedding_types\": embedding_types,\n        }\n        for key, value in list(json_params.items()):\n            if value is None:\n                del json_params[key]\n        \n        if self.mode == Mode.SAGEMAKER:\n            return self._sagemaker_embed(json_params, variant)\n        elif self.mode == Mode.BEDROCK:\n            return self._bedrock_embed(json_params, model_id)\n        else:\n            raise CohereError(\"Unsupported mode\")\n\n    def _sagemaker_embed(self, json_params: Dict[str, Any], variant: str):\n        if self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n        \n        json_body = json.dumps(json_params)\n        params = {\n            'EndpointName': self._endpoint_name,\n            'ContentType': 'application/json',\n            'Body': json_body,\n        }\n        if variant:\n            params['TargetVariant'] = variant\n\n        try:\n            result = self._client.invoke_endpoint(**params)\n            response = json.loads(result['Body'].read().decode())\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n        embeddings = response['embeddings']\n        if isinstance(embeddings, dict):\n            return embeddings\n        return Embeddings(embeddings)\n\n    def _bedrock_embed(self, json_params: Dict[str, Any], model_id: str):\n        if not model_id:\n            raise CohereError(\"must supply model_id arg when calling bedrock\")\n        json_body = json.dumps(json_params)\n        params = {\n            'body': json_body,\n            'modelId': model_id,\n        }\n\n        try:\n            result = self._client.invoke_model(**params)\n            response = json.loads(result['body'].read().decode())\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n        embeddings = response['embeddings']\n        if isinstance(embeddings, dict):\n            return embeddings\n        return Embeddings(embeddings)\n\n\n    def rerank(self,\n               query: str,\n               documents: Union[List[str], List[Dict[str, Any]]],\n               top_n: Optional[int] = None,\n               variant: Optional[str] = None,\n               max_chunks_per_doc: Optional[int] = None,\n               rank_fields: Optional[List[str]] = None) -> Reranking:\n        \"\"\"Returns an ordered list of documents oridered by their relevance to the provided query\n        Args:\n            query (str): The search query\n            documents (list[str], list[dict]): The documents to rerank\n            top_n (int): (optional) The number of results to return, defaults to return all results\n            max_chunks_per_doc (int): (optional) The maximum number of chunks derived from a document\n            rank_fields (list[str]): (optional) The fields used for reranking. This parameter is only supported for rerank v3 models\n        \"\"\"\n\n        if self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n\n        parsed_docs = []\n        for doc in documents:\n            if isinstance(doc, str):\n                parsed_docs.append({'text': doc})\n            elif isinstance(doc, dict):\n                parsed_docs.append(doc)\n            else:\n                raise CohereError(\n                    message='invalid format for documents, must be a list of strings or dicts')\n\n        json_params = {\n            \"query\": query,\n            \"documents\": parsed_docs,\n            \"top_n\": top_n,\n            \"return_documents\": False,\n            \"max_chunks_per_doc\" : max_chunks_per_doc,\n            \"rank_fields\": rank_fields\n        }\n        json_body = json.dumps(json_params)\n\n        params = {\n            'EndpointName': self._endpoint_name,\n            'ContentType': 'application/json',\n            'Body': json_body,\n        }\n        if variant is not None:\n            params['TargetVariant'] = variant\n\n        try:\n            result = self._client.invoke_endpoint(**params)\n            response = json.loads(result['Body'].read().decode())\n            reranking = Reranking(response)\n            for rank in reranking.results:\n                rank.document = parsed_docs[rank.index]\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n        return reranking\n\n    def classify(self, input: List[str], name: str) -> Classifications:\n\n        if self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n\n        json_params = {\"texts\": input, \"model_id\": name}\n        json_body = json.dumps(json_params)\n\n        params = {\n            \"EndpointName\": self._endpoint_name,\n            \"ContentType\": \"application/json\",\n            \"Body\": json_body,\n        }\n\n        try:\n            result = self._client.invoke_endpoint(**params)\n            response = json.loads(result[\"Body\"].read().decode())\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n        return Classifications([Classification(classification) for classification in response])\n\n    def create_finetune(\n        self,\n        name: str,\n        train_data: str,\n        s3_models_dir: str,\n        arn: Optional[str] = None,\n        eval_data: Optional[str] = None,\n        instance_type: str = \"ml.g4dn.xlarge\",\n        training_parameters: Dict[str, Any] = {},  # Optional, training algorithm specific hyper-parameters\n        role: Optional[str] = None,\n        base_model_id: Optional[str] = None,\n    ) -> Optional[str]:\n        \"\"\"Creates a fine-tuning job and returns an optional fintune job ID.\n\n        Args:\n            name (str): The name to give to the fine-tuned model.\n            train_data (str): An S3 path pointing to the training data.\n            s3_models_dir (str): An S3 path pointing to the directory where the fine-tuned model will be saved.\n            arn (str, optional): The product ARN of the fine-tuning package. Required in Sagemaker mode and ignored otherwise\n            eval_data (str, optional): An S3 path pointing to the eval data. Defaults to None.\n            instance_type (str, optional): The EC2 instance type to use for training. Defaults to \"ml.g4dn.xlarge\".\n            training_parameters (Dict[str, Any], optional): Additional training parameters. Defaults to {}.\n            role (str, optional): The IAM role to use for the endpoint. \n                In Bedrock this mode is required and is used to access s3 input and output data.\n                If not provided in sagemaker, sagemaker.get_execution_role()will be used to get the role.\n                This should work when one uses the client inside SageMaker. If this errors\n                out, the default role \"ServiceRoleSagemaker\" will be used, which generally works outside of SageMaker.\n            base_model_id (str, optional): The ID of the Bedrock base model to finetune with. Required in Bedrock mode and ignored otherwise.\n        \"\"\"\n        assert name != \"model\", \"name cannot be 'model'\"\n\n        if self.mode == Mode.BEDROCK:\n            return self._bedrock_create_finetune(name=name, train_data=train_data, s3_models_dir=s3_models_dir, base_model=base_model_id, eval_data=eval_data, training_parameters=training_parameters, role=role)\n\n        s3_models_dir = s3_models_dir.rstrip(\"/\") + \"/\"\n\n        if role is None:\n            try:\n                role = lazy_sagemaker().get_execution_role()\n            except ValueError:\n                print(\"Using default role: 'ServiceRoleSagemaker'.\")\n                role = \"ServiceRoleSagemaker\"\n\n        training_parameters.update({\"name\": name})\n        estimator = lazy_sagemaker().algorithm.AlgorithmEstimator(\n            algorithm_arn=arn,\n            role=role,\n            instance_count=1,\n            instance_type=instance_type,\n            sagemaker_session=self._sess,\n            output_path=s3_models_dir,\n            hyperparameters=training_parameters,\n        )\n\n        inputs = {}\n        if not train_data.startswith(\"s3:\"):\n            raise ValueError(\"train_data must point to an S3 location.\")\n        inputs[\"training\"] = train_data\n        if eval_data is not None:\n            if not eval_data.startswith(\"s3:\"):\n                raise ValueError(\"eval_data must point to an S3 location.\")\n            inputs[\"evaluation\"] = eval_data\n        estimator.fit(inputs=inputs)\n        job_name = estimator.latest_training_job.name\n\n        current_filepath = f\"{s3_models_dir}{job_name}/output/model.tar.gz\"\n\n        s3_resource = lazy_boto3().resource(\"s3\")\n\n        # Copy new model to root of output_model_dir\n        bucket, old_key = lazy_sagemaker().s3.parse_s3_url(current_filepath)\n        _, new_key = lazy_sagemaker().s3.parse_s3_url(f\"{s3_models_dir}{name}.tar.gz\")\n        s3_resource.Object(bucket, new_key).copy(CopySource={\"Bucket\": bucket, \"Key\": old_key})\n\n        # Delete old dir\n        bucket, old_short_key = lazy_sagemaker().s3.parse_s3_url(s3_models_dir + job_name)\n        s3_resource.Bucket(bucket).objects.filter(Prefix=old_short_key).delete()\n\n    def export_finetune(\n        self,\n        name: str,\n        s3_checkpoint_dir: str,\n        s3_output_dir: str,\n        arn: str,\n        instance_type: str = \"ml.p4de.24xlarge\",\n        role: Optional[str] = None,\n    ) -> None:\n        \"\"\"Export the merged weights to the TensorRT-LLM inference engine.\n\n        Args:\n        name (str): The name used while writing the exported model to the output directory.\n        s3_checkpoint_dir (str): An S3 path pointing to the directory of the model checkpoint (merged weights).\n        s3_output_dir (str): An S3 path pointing to the directory where the TensorRT-LLM engine will be saved.\n        arn (str): The product ARN of the bring your own finetuning algorithm.\n        instance_type (str, optional): The EC2 instance type to use for export. Defaults to \"ml.p4de.24xlarge\".\n        role (str, optional): The IAM role to use for export.\n            If not provided, sagemaker.get_execution_role() will be used to get the role.\n            This should work when one uses the client inside SageMaker. If this errors out,\n            the default role \"ServiceRoleSagemaker\" will be used, which generally works outside SageMaker.\n        \"\"\"\n        self._require_sagemaker()\n        if name == \"model\":\n            raise ValueError(\"name cannot be 'model'\")\n\n        s3_output_dir = s3_output_dir.rstrip(\"/\") + \"/\"\n\n        if role is None:\n            try:\n                role = lazy_sagemaker().get_execution_role()\n            except ValueError:\n                print(\"Using default role: 'ServiceRoleSagemaker'.\")\n                role = \"ServiceRoleSagemaker\"\n\n        export_parameters = {\"name\": name}\n\n        estimator = lazy_sagemaker().algorithm.AlgorithmEstimator(\n            algorithm_arn=arn,\n            role=role,\n            instance_count=1,\n            instance_type=instance_type,\n            sagemaker_session=self._sess,\n            output_path=s3_output_dir,\n            hyperparameters=export_parameters,\n        )\n\n        if not s3_checkpoint_dir.startswith(\"s3:\"):\n            raise ValueError(\"s3_checkpoint_dir must point to an S3 location.\")\n        inputs = {\"checkpoint\": s3_checkpoint_dir}\n\n        estimator.fit(inputs=inputs)\n\n        job_name = estimator.latest_training_job.name\n        current_filepath = f\"{s3_output_dir}{job_name}/output/model.tar.gz\"\n\n        s3_resource = lazy_boto3().resource(\"s3\")\n\n        # Copy the exported TensorRT-LLM engine to the root of s3_output_dir\n        bucket, old_key = lazy_sagemaker().s3.parse_s3_url(current_filepath)\n        _, new_key = lazy_sagemaker().s3.parse_s3_url(f\"{s3_output_dir}{name}.tar.gz\")\n        s3_resource.Object(bucket, new_key).copy(CopySource={\"Bucket\": bucket, \"Key\": old_key})\n\n        # Delete the old S3 directory\n        bucket, old_short_key = lazy_sagemaker().s3.parse_s3_url(f\"{s3_output_dir}{job_name}\")\n        s3_resource.Bucket(bucket).objects.filter(Prefix=old_short_key).delete()\n\n    def wait_for_finetune_job(self, job_id: str, timeout: int = 2*60*60) -> str:\n        \"\"\"Waits for a finetune job to complete and returns a model arn if complete. Throws an exception if timeout occurs or if job does not complete successfully\n        Args:\n            job_id (str): The arn of the model customization job\n            timeout(int, optional): Timeout in seconds\n        \"\"\"\n        end = time.time() + timeout\n        while True:\n            customization_job = self._service_client.get_model_customization_job(jobIdentifier=job_id)\n            job_status = customization_job[\"status\"]\n            if job_status in [\"Completed\", \"Failed\", \"Stopped\"]:\n                break\n            if time.time() > end:\n                raise CohereError(\"could not complete finetune within timeout\")\n            time.sleep(10)\n        \n        if job_status != \"Completed\":\n            raise CohereError(f\"finetune did not finish successfuly, ended with {job_status} status\")\n        return customization_job[\"outputModelArn\"]\n\n    def provision_throughput(\n        self,\n        model_id: str,\n        name: str,\n        model_units: int,\n        commitment_duration: Optional[str] = None\n    ) -> str:\n        \"\"\"Returns the provisined model arn\n        Args:\n            model_id (str): The ID or ARN of the model to provision\n            name (str): Name of the provisioned throughput model\n            model_units (int): Number of units to provision\n            commitment_duration (str, optional): Commitment duration, one of (\"OneMonth\", \"SixMonths\"), defaults to no commitment if unspecified\n        \"\"\"\n        if self.mode != Mode.BEDROCK:\n            raise ValueError(\"can only provision throughput in bedrock\")\n        kwargs = {}\n        if commitment_duration:\n            kwargs[\"commitmentDuration\"] = commitment_duration\n\n        response = self._service_client.create_provisioned_model_throughput(\n            provisionedModelName=name,\n            modelId=model_id,\n            modelUnits=model_units,\n            **kwargs\n        )\n        return response[\"provisionedModelArn\"]\n\n    def _bedrock_create_finetune(\n        self,\n        name: str,\n        train_data: str,\n        s3_models_dir: str,\n        base_model: str,\n        eval_data: Optional[str] = None,\n        training_parameters: Dict[str, Any] = {},  # Optional, training algorithm specific hyper-parameters\n        role: Optional[str] = None,\n    ) -> None:\n        if not name:\n            raise ValueError(\"name must not be empty\")\n        if not role:\n            raise ValueError(\"must provide a role ARN for bedrock finetuning (https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-iam-role.html)\")\n        if not train_data.startswith(\"s3:\"):\n            raise ValueError(\"train_data must point to an S3 location.\")\n        if eval_data:\n            if not eval_data.startswith(\"s3:\"):\n                raise ValueError(\"eval_data must point to an S3 location.\")\n            validationDataConfig = {\n                \"validators\": [{\n                    \"s3Uri\": eval_data\n                }]\n            }\n\n        job_name = f\"{name}-job\"\n        customization_job = self._service_client.create_model_customization_job(\n            jobName=job_name, \n            customModelName=name, \n            roleArn=role,\n            baseModelIdentifier=base_model,\n            trainingDataConfig={\"s3Uri\": train_data},\n            validationDataConfig=validationDataConfig,\n            outputDataConfig={\"s3Uri\": s3_models_dir}, \n            hyperParameters=training_parameters\n        )\n        return customization_job[\"jobArn\"]\n\n\n    def summarize(\n        self,\n        text: str,\n        length: Optional[str] = \"auto\",\n        format_: Optional[str] = \"auto\",\n        # Only summarize-xlarge is supported on Sagemaker\n        # model: Optional[str] = \"summarize-xlarge\",\n        extractiveness: Optional[str] = \"auto\",\n        temperature: Optional[float] = 0.3,\n        additional_command: Optional[str] = \"\",\n        variant: Optional[str] = None\n    ) -> Summary:\n        self._require_sagemaker()\n\n        if self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected. \"\n                              \"Run connect_to_endpoint() first.\")\n\n        json_params = {\n            'text': text,\n            'length': length,\n            'format': format_,\n            'extractiveness': extractiveness,\n            'temperature': temperature,\n            'additional_command': additional_command,\n        }\n        for key, value in list(json_params.items()):\n            if value is None:\n                del json_params[key]\n        json_body = json.dumps(json_params)\n\n        params = {\n            'EndpointName': self._endpoint_name,\n            'ContentType': 'application/json',\n            'Body': json_body,\n        }\n        if variant is not None:\n            params['TargetVariant'] = variant\n\n        try:\n            result = self._client.invoke_endpoint(**params)\n            response = json.loads(result['Body'].read().decode())\n            summary = Summary(response)\n        except lazy_botocore().EndpointConnectionError as e:\n            raise CohereError(str(e))\n        except Exception as e:\n            # TODO should be client error - distinct type from CohereError?\n            # ValidationError, e.g. when variant is bad\n            raise CohereError(str(e))\n\n        return summary\n\n\n    def delete_endpoint(self) -> None:\n        self._require_sagemaker()\n        if self._endpoint_name is None:\n            raise CohereError(\"No endpoint connected.\")\n        try:\n            self._service_client.delete_endpoint(EndpointName=self._endpoint_name)\n        except:\n            print(\"Endpoint not found, skipping deletion.\")\n\n        try:\n            self._service_client.delete_endpoint_config(EndpointConfigName=self._endpoint_name)\n        except:\n            print(\"Endpoint config not found, skipping deletion.\")\n\n    def close(self) -> None:\n        try:\n            self._client.close()\n            self._service_client.close()\n        except AttributeError:\n            print(\"SageMaker client could not be closed. This might be because you are using an old version of SageMaker.\")\n            raise\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/embeddings.py",
    "content": "from .response import CohereObject\nfrom typing import Iterator, List\n\n\nclass Embedding(CohereObject):\n\n    def __init__(self, embedding: List[float]) -> None:\n        self.embedding = embedding\n\n    def __iter__(self) -> Iterator:\n        return iter(self.embedding)\n\n    def __len__(self) -> int:\n        return len(self.embedding)\n\n\nclass Embeddings(CohereObject):\n\n    def __init__(self, embeddings: List[Embedding]) -> None:\n        self.embeddings = embeddings\n\n    def __iter__(self) -> Iterator:\n        return iter(self.embeddings)\n\n    def __len__(self) -> int:\n        return len(self.embeddings)\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/error.py",
    "content": "class CohereError(Exception):\n    def __init__(\n        self,\n        message=None,\n        http_status=None,\n        headers=None,\n    ) -> None:\n        super(CohereError, self).__init__(message)\n\n        self.message = message\n        self.http_status = http_status\n        self.headers = headers or {}\n\n    def __str__(self) -> str:\n        msg = self.message or '<empty message>'\n        return msg\n\n    def __repr__(self) -> str:\n        return '%s(message=%r, http_status=%r)' % (\n            self.__class__.__name__,\n            self.message,\n            self.http_status,\n        )\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/generation.py",
    "content": "from .response import CohereObject\nfrom .mode import Mode\nfrom typing import List, Optional, NamedTuple, Generator, Dict, Any\nimport json\n\n\nclass TokenLikelihood(CohereObject):\n    def __init__(self, token: str, likelihood: float) -> None:\n        self.token = token\n        self.likelihood = likelihood\n\n\nclass Generation(CohereObject):\n    def __init__(self,\n                 text: str,\n                 token_likelihoods: List[TokenLikelihood]) -> None:\n        self.text = text\n        self.token_likelihoods = token_likelihoods\n\n\nclass Generations(CohereObject):\n    def __init__(self,\n                 generations: List[Generation]) -> None:\n        self.generations = generations\n        self.iterator = iter(generations)\n\n    @classmethod\n    def from_dict(cls, response: Dict[str, Any]) -> List[Generation]:\n        generations: List[Generation] = []\n        for gen in response['generations']:\n            token_likelihoods = None\n\n            if 'token_likelihoods' in gen:\n                token_likelihoods = []\n                for likelihoods in gen['token_likelihoods']:\n                    if 'likelihood' in likelihoods:\n                        token_likelihood = likelihoods['likelihood']\n                    else:\n                        token_likelihood = None\n                    token_likelihoods.append(TokenLikelihood(\n                        likelihoods['token'], token_likelihood))\n            generations.append(Generation(gen['text'], token_likelihoods))\n        return cls(generations)\n\n    def __iter__(self) -> iter:\n        return self.iterator\n\n    def __next__(self) -> next:\n        return next(self.iterator)\n\n\nStreamingText = NamedTuple(\"StreamingText\",\n                           [(\"index\", Optional[int]),\n                            (\"text\", str),\n                            (\"is_finished\", bool)])\n\n\nclass StreamingGenerations(CohereObject):\n    def __init__(self, stream, mode):\n        self.stream = stream\n        self.id = None\n        self.generations = None\n        self.finish_reason = None\n        self.bytes = bytearray()\n\n        if mode == Mode.SAGEMAKER:\n            self.payload_key = \"PayloadPart\"\n            self.bytes_key = \"Bytes\"\n        elif mode == Mode.BEDROCK:\n            self.payload_key = \"chunk\"\n            self.bytes_key = \"bytes\"\n        else:\n            raise CohereError(\"Unsupported mode\")\n\n    def _make_response_item(self, streaming_item) -> Optional[StreamingText]:\n        is_finished = streaming_item.get(\"is_finished\")\n\n        if not is_finished:\n            index = streaming_item.get(\"index\", 0)\n            text = streaming_item.get(\"text\")\n            if text is None:\n                return None\n            return StreamingText(\n                text=text, is_finished=is_finished, index=index)\n\n        self.finish_reason = streaming_item.get(\"finish_reason\")\n        generation_response = streaming_item.get(\"response\")\n\n        if generation_response is None:\n            return None\n\n        self.id = generation_response.get(\"id\")\n        self.generations = Generations.from_dict(generation_response)\n        return None\n\n    def __iter__(self) -> Generator[StreamingText, None, None]:\n        for payload in self.stream:\n            self.bytes.extend(payload[self.payload_key][self.bytes_key])\n            try:\n                item = self._make_response_item(json.loads(self.bytes))\n            except json.decoder.JSONDecodeError:\n                # payload contained only a partion JSON object\n                continue\n\n            self.bytes = bytearray()\n            if item is not None:\n                yield item\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/mode.py",
    "content": "from enum import Enum\n\n\nclass Mode(Enum):\n    SAGEMAKER = 1\n    BEDROCK = 2\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/rerank.py",
    "content": "from typing import Any, Dict, Iterator, List, NamedTuple, Optional\n\nfrom .response import CohereObject\n\nRerankDocument = NamedTuple(\"Document\", [(\"text\", str)])\nRerankDocument.__doc__ = \"\"\"\nReturned by co.rerank,\ndict which always contains text but can also contain aribitrary fields\n\"\"\"\n\n\nclass RerankResult(CohereObject):\n\n    def __init__(self,\n                 document: Dict[str, Any] = None,\n                 index: int = None,\n                 relevance_score: float = None,\n                 *args, **kwargs) -> None:\n        super().__init__(*args, **kwargs)\n        self.document = document\n        self.index = index\n        self.relevance_score = relevance_score\n\n    def __repr__(self) -> str:\n        score = self.relevance_score\n        index = self.index\n        if self.document is None:\n            return f\"RerankResult<index: {index}, relevance_score: {score}>\"\n        elif 'text' in self.document:\n            text = self.document['text']\n            return f\"RerankResult<text: {text}, index: {index}, relevance_score: {score}>\"\n        else:\n            return f\"RerankResult<document: {self.document}, index: {index}, relevance_score: {score}>\"\n\n\nclass Reranking(CohereObject):\n\n    def __init__(self,\n                 response: Optional[Dict[str, Any]] = None,\n                 **kwargs) -> None:\n        super().__init__(**kwargs)\n        assert response is not None\n        self.results = self._results(response)\n\n    def _results(self, response: Dict[str, Any]) -> List[RerankResult]:\n        results = []\n        for res in response['results']:\n            if 'document' in res.keys():\n                results.append(\n                    RerankResult(res['document'], res['index'], res['relevance_score']))\n            else:\n                results.append(\n                    RerankResult(index=res['index'], relevance_score=res['relevance_score']))\n        return results\n\n    def __str__(self) -> str:\n        return str(self.results)\n\n    def __repr__(self) -> str:\n        return self.results.__repr__()\n\n    def __iter__(self) -> Iterator:\n        return iter(self.results)\n\n    def __getitem__(self, index) -> RerankResult:\n        return self.results[index]\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/response.py",
    "content": "class CohereObject():\n    def __repr__(self) -> str:\n        contents = ''\n        exclude_list = ['iterator']\n\n        for k in self.__dict__.keys():\n            if k not in exclude_list:\n                contents += f'\\t{k}: {self.__dict__[k]}\\n'\n\n        output = f'cohere.{type(self).__name__} {{\\n{contents}}}'\n        return output\n"
  },
  {
    "path": "src/cohere/manually_maintained/cohere_aws/summary.py",
    "content": "from .error import CohereError\nfrom .response import CohereObject\nfrom typing import Any, Dict, Optional\n\n\nclass Summary(CohereObject):\n    def __init__(self,\n                 response: Optional[Dict[str, Any]] = None) -> None:\n        assert response is not None\n        if not response[\"summary\"]:\n            raise CohereError(\"Response lacks a summary\")\n\n        self.result = response[\"summary\"]\n\n    def __str__(self) -> str:\n        return self.result\n"
  },
  {
    "path": "src/cohere/manually_maintained/lazy_aws_deps.py",
    "content": "\nwarning = \"AWS dependencies are not installed. Please install boto3, botocore, and sagemaker.\"\n\ndef lazy_sagemaker():\n    try:\n        import sagemaker as sage # type: ignore\n        return sage\n    except ImportError:\n        raise ImportError(warning)\n\ndef lazy_boto3():\n    try:\n        import boto3 # type: ignore\n        return boto3\n    except ImportError:\n        raise ImportError(warning)\n    \ndef lazy_botocore():\n    try:\n        import botocore # type: ignore\n        return botocore\n    except ImportError:\n        raise ImportError(warning)\n\n"
  },
  {
    "path": "src/cohere/manually_maintained/lazy_oci_deps.py",
    "content": "\"\"\"Lazy loading for optional OCI SDK dependency.\"\"\"\n\nfrom typing import Any\n\nOCI_INSTALLATION_MESSAGE = \"\"\"\nThe OCI SDK is required to use OciClient or OciClientV2.\n\nInstall it with:\n    pip install oci\n\nOr with the optional dependency group:\n    pip install cohere[oci]\n\"\"\"\n\n\ndef lazy_oci() -> Any:\n    \"\"\"\n    Lazily import the OCI SDK.\n\n    Returns:\n        The oci module\n\n    Raises:\n        ImportError: If the OCI SDK is not installed\n    \"\"\"\n    try:\n        import oci  # type: ignore[import-untyped, import-not-found]\n        return oci\n    except ImportError:\n        raise ImportError(OCI_INSTALLATION_MESSAGE)\n"
  },
  {
    "path": "src/cohere/manually_maintained/streaming_embed.py",
    "content": "\"\"\"Utilities for streaming embed responses without loading all embeddings into memory.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom typing import Iterator, List, Optional, Union\n\n\n@dataclass\nclass StreamedEmbedding:\n    \"\"\"A single embedding yielded incrementally from embed_stream().\"\"\"\n    index: int\n    embedding: Union[List[float], List[int]]\n    embedding_type: str\n    text: Optional[str] = None\n\n\ndef extract_embeddings_from_response(\n    response_data: dict,\n    batch_texts: List[str],\n    global_offset: int = 0,\n) -> Iterator[StreamedEmbedding]:\n    \"\"\"\n    Extract individual embeddings from a Cohere embed response dict.\n\n    Works for both V1 (embeddings_floats / embeddings_by_type) and V2 response formats.\n\n    Args:\n        response_data: Parsed JSON response from embed endpoint\n        batch_texts: The texts that were embedded in this batch\n        global_offset: Starting index for this batch within the full dataset\n\n    Yields:\n        StreamedEmbedding objects\n    \"\"\"\n    response_type = response_data.get(\"response_type\", \"\")\n\n    if response_type == \"embeddings_floats\":\n        embeddings = response_data.get(\"embeddings\", [])\n        for i, embedding in enumerate(embeddings):\n            yield StreamedEmbedding(\n                index=global_offset + i,\n                embedding=embedding,\n                embedding_type=\"float\",\n                text=batch_texts[i] if i < len(batch_texts) else None,\n            )\n\n    elif response_type == \"embeddings_by_type\":\n        embeddings_obj = response_data.get(\"embeddings\", {})\n        for emb_type, embeddings_list in embeddings_obj.items():\n            type_name = emb_type.rstrip(\"_\")\n            if isinstance(embeddings_list, list):\n                for i, embedding in enumerate(embeddings_list):\n                    yield StreamedEmbedding(\n                        index=global_offset + i,\n                        embedding=embedding,\n                        embedding_type=type_name,\n                        text=batch_texts[i] if i < len(batch_texts) else None,\n                    )\n\n    else:\n        # V2 format: embeddings is a dict with type keys directly\n        embeddings_obj = response_data.get(\"embeddings\", {})\n        if isinstance(embeddings_obj, dict):\n            for emb_type, embeddings_list in embeddings_obj.items():\n                type_name = emb_type.rstrip(\"_\")\n                if isinstance(embeddings_list, list):\n                    for i, embedding in enumerate(embeddings_list):\n                        yield StreamedEmbedding(\n                            index=global_offset + i,\n                            embedding=embedding,\n                            embedding_type=type_name,\n                            text=batch_texts[i] if i < len(batch_texts) else None,\n                        )\n"
  },
  {
    "path": "src/cohere/manually_maintained/tokenizers.py",
    "content": "import asyncio\nimport logging\nimport typing\n\nimport requests\nfrom tokenizers import Tokenizer  # type: ignore\n\nif typing.TYPE_CHECKING:\n    from cohere.client import AsyncClient, Client\n\nTOKENIZER_CACHE_KEY = \"tokenizers\"\nlogger = logging.getLogger(__name__)\n\n\ndef tokenizer_cache_key(model: str) -> str:\n    return f\"{TOKENIZER_CACHE_KEY}:{model}\"\n\n\ndef get_hf_tokenizer(co: \"Client\", model: str) -> Tokenizer:\n    \"\"\"Returns a HF tokenizer from a given tokenizer config URL.\"\"\"\n    tokenizer = co._cache_get(tokenizer_cache_key(model))\n    if tokenizer is not None:\n        return tokenizer\n    tokenizer_url = co.models.get(model).tokenizer_url\n    if not tokenizer_url:\n        raise ValueError(f\"No tokenizer URL found for model {model}\")\n\n    # Print the size of the tokenizer config before downloading it.\n    try:\n        size = _get_tokenizer_config_size(tokenizer_url)\n        logger.info(f\"Downloading tokenizer for model {model}. Size is {size} MBs.\")\n    except Exception as e:\n        # Skip the size logging, this is not critical.\n        logger.warn(f\"Failed to get the size of the tokenizer config: {e}\")\n\n    response = requests.get(tokenizer_url)\n    tokenizer = Tokenizer.from_str(response.text)\n\n    co._cache_set(tokenizer_cache_key(model), tokenizer)\n    return tokenizer\n\n\ndef local_tokenize(co: \"Client\", model: str, text: str) -> typing.List[int]:\n    \"\"\"Encodes a given text using a local tokenizer.\"\"\"\n    tokenizer = get_hf_tokenizer(co, model)\n    return tokenizer.encode(text, add_special_tokens=False).ids\n\n\ndef local_detokenize(co: \"Client\", model: str, tokens: typing.Sequence[int]) -> str:\n    \"\"\"Decodes a given list of tokens using a local tokenizer.\"\"\"\n    tokenizer = get_hf_tokenizer(co, model)\n    return tokenizer.decode(tokens)\n\n\nasync def async_get_hf_tokenizer(co: \"AsyncClient\", model: str) -> Tokenizer:\n    \"\"\"Returns a HF tokenizer from a given tokenizer config URL.\"\"\"\n\n    tokenizer = co._cache_get(tokenizer_cache_key(model))\n    if tokenizer is not None:\n        return tokenizer\n    tokenizer_url = (await co.models.get(model)).tokenizer_url\n    if not tokenizer_url:\n        raise ValueError(f\"No tokenizer URL found for model {model}\")\n\n    # Print the size of the tokenizer config before downloading it.\n    try:\n        size = _get_tokenizer_config_size(tokenizer_url)\n        logger.info(f\"Downloading tokenizer for model {model}. Size is {size} MBs.\")\n    except Exception as e:\n        # Skip the size logging, this is not critical.\n        logger.warn(f\"Failed to get the size of the tokenizer config: {e}\")\n\n    response = await asyncio.get_event_loop().run_in_executor(None, requests.get, tokenizer_url)\n    tokenizer = Tokenizer.from_str(response.text)\n\n    co._cache_set(tokenizer_cache_key(model), tokenizer)\n    return tokenizer\n\n\nasync def async_local_tokenize(co: \"AsyncClient\", model: str, text: str) -> typing.List[int]:\n    \"\"\"Encodes a given text using a local tokenizer.\"\"\"\n    tokenizer = await async_get_hf_tokenizer(co, model)\n    return tokenizer.encode(text, add_special_tokens=False).ids\n\n\nasync def async_local_detokenize(co: \"AsyncClient\", model: str, tokens: typing.Sequence[int]) -> str:\n    \"\"\"Decodes a given list of tokens using a local tokenizer.\"\"\"\n    tokenizer = await async_get_hf_tokenizer(co, model)\n    return tokenizer.decode(tokens)\n\n\ndef _get_tokenizer_config_size(tokenizer_url: str) -> float:\n    # Get the size of the tokenizer config before downloading it.\n    # Content-Length is not always present in the headers (if transfer-encoding: chunked).\n    head_response = requests.head(tokenizer_url)\n    size = None\n    for header in [\"x-goog-stored-content-length\", \"Content-Length\"]:\n        size = head_response.headers.get(header)\n        if size:\n            break\n\n    return round(int(typing.cast(int, size)) / 1024 / 1024, 2)\n"
  },
  {
    "path": "src/cohere/models/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\n"
  },
  {
    "path": "src/cohere/models/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom ..types.compatible_endpoint import CompatibleEndpoint\nfrom ..types.get_model_response import GetModelResponse\nfrom ..types.list_models_response import ListModelsResponse\nfrom .raw_client import AsyncRawModelsClient, RawModelsClient\n\n\nclass ModelsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawModelsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawModelsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawModelsClient\n        \"\"\"\n        return self._raw_client\n\n    def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:\n        \"\"\"\n        Returns the details of a model, provided its name.\n\n        Parameters\n        ----------\n        model : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetModelResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.models.get(\n            model=\"command-a-03-2025\",\n        )\n        \"\"\"\n        _response = self._raw_client.get(model, request_options=request_options)\n        return _response.data\n\n    def list(\n        self,\n        *,\n        page_size: typing.Optional[float] = None,\n        page_token: typing.Optional[str] = None,\n        endpoint: typing.Optional[CompatibleEndpoint] = None,\n        default_only: typing.Optional[bool] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListModelsResponse:\n        \"\"\"\n        Returns a list of models available for use.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[float]\n            Maximum number of models to include in a page\n            Defaults to `20`, min value of `1`, max value of `1000`.\n\n        page_token : typing.Optional[str]\n            Page token provided in the `next_page_token` field of a previous response.\n\n        endpoint : typing.Optional[CompatibleEndpoint]\n            When provided, filters the list of models to only those that are compatible with the specified endpoint.\n\n        default_only : typing.Optional[bool]\n            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListModelsResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.models.list(\n            page_size=1.1,\n            page_token=\"page_token\",\n            endpoint=\"chat\",\n            default_only=True,\n        )\n        \"\"\"\n        _response = self._raw_client.list(\n            page_size=page_size,\n            page_token=page_token,\n            endpoint=endpoint,\n            default_only=default_only,\n            request_options=request_options,\n        )\n        return _response.data\n\n\nclass AsyncModelsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawModelsClient(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawModelsClient:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawModelsClient\n        \"\"\"\n        return self._raw_client\n\n    async def get(self, model: str, *, request_options: typing.Optional[RequestOptions] = None) -> GetModelResponse:\n        \"\"\"\n        Returns the details of a model, provided its name.\n\n        Parameters\n        ----------\n        model : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        GetModelResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.models.get(\n                model=\"command-a-03-2025\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.get(model, request_options=request_options)\n        return _response.data\n\n    async def list(\n        self,\n        *,\n        page_size: typing.Optional[float] = None,\n        page_token: typing.Optional[str] = None,\n        endpoint: typing.Optional[CompatibleEndpoint] = None,\n        default_only: typing.Optional[bool] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> ListModelsResponse:\n        \"\"\"\n        Returns a list of models available for use.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[float]\n            Maximum number of models to include in a page\n            Defaults to `20`, min value of `1`, max value of `1000`.\n\n        page_token : typing.Optional[str]\n            Page token provided in the `next_page_token` field of a previous response.\n\n        endpoint : typing.Optional[CompatibleEndpoint]\n            When provided, filters the list of models to only those that are compatible with the specified endpoint.\n\n        default_only : typing.Optional[bool]\n            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        ListModelsResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.models.list(\n                page_size=1.1,\n                page_token=\"page_token\",\n                endpoint=\"chat\",\n                default_only=True,\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.list(\n            page_size=page_size,\n            page_token=page_token,\n            endpoint=endpoint,\n            default_only=default_only,\n            request_options=request_options,\n        )\n        return _response.data\n"
  },
  {
    "path": "src/cohere/models/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.jsonable_encoder import jsonable_encoder\nfrom ..core.parse_error import ParsingError\nfrom ..core.request_options import RequestOptions\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.client_closed_request_error import ClientClosedRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.gateway_timeout_error import GatewayTimeoutError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.invalid_token_error import InvalidTokenError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.not_implemented_error import NotImplementedError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.too_many_requests_error import TooManyRequestsError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom ..errors.unprocessable_entity_error import UnprocessableEntityError\nfrom ..types.compatible_endpoint import CompatibleEndpoint\nfrom ..types.get_model_response import GetModelResponse\nfrom ..types.list_models_response import ListModelsResponse\nfrom pydantic import ValidationError\n\n\nclass RawModelsClient:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    def get(\n        self, model: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[GetModelResponse]:\n        \"\"\"\n        Returns the details of a model, provided its name.\n\n        Parameters\n        ----------\n        model : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[GetModelResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            f\"v1/models/{jsonable_encoder(model)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetModelResponse,\n                    construct_type(\n                        type_=GetModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def list(\n        self,\n        *,\n        page_size: typing.Optional[float] = None,\n        page_token: typing.Optional[str] = None,\n        endpoint: typing.Optional[CompatibleEndpoint] = None,\n        default_only: typing.Optional[bool] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ListModelsResponse]:\n        \"\"\"\n        Returns a list of models available for use.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[float]\n            Maximum number of models to include in a page\n            Defaults to `20`, min value of `1`, max value of `1000`.\n\n        page_token : typing.Optional[str]\n            Page token provided in the `next_page_token` field of a previous response.\n\n        endpoint : typing.Optional[CompatibleEndpoint]\n            When provided, filters the list of models to only those that are compatible with the specified endpoint.\n\n        default_only : typing.Optional[bool]\n            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ListModelsResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/models\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"endpoint\": endpoint,\n                \"default_only\": default_only,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListModelsResponse,\n                    construct_type(\n                        type_=ListModelsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawModelsClient:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    async def get(\n        self, model: str, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[GetModelResponse]:\n        \"\"\"\n        Returns the details of a model, provided its name.\n\n        Parameters\n        ----------\n        model : str\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[GetModelResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            f\"v1/models/{jsonable_encoder(model)}\",\n            method=\"GET\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    GetModelResponse,\n                    construct_type(\n                        type_=GetModelResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def list(\n        self,\n        *,\n        page_size: typing.Optional[float] = None,\n        page_token: typing.Optional[str] = None,\n        endpoint: typing.Optional[CompatibleEndpoint] = None,\n        default_only: typing.Optional[bool] = None,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ListModelsResponse]:\n        \"\"\"\n        Returns a list of models available for use.\n\n        Parameters\n        ----------\n        page_size : typing.Optional[float]\n            Maximum number of models to include in a page\n            Defaults to `20`, min value of `1`, max value of `1000`.\n\n        page_token : typing.Optional[str]\n            Page token provided in the `next_page_token` field of a previous response.\n\n        endpoint : typing.Optional[CompatibleEndpoint]\n            When provided, filters the list of models to only those that are compatible with the specified endpoint.\n\n        default_only : typing.Optional[bool]\n            When provided, filters the list of models to only the default model to the endpoint. This parameter is only valid when `endpoint` is provided.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ListModelsResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/models\",\n            method=\"GET\",\n            params={\n                \"page_size\": page_size,\n                \"page_token\": page_token,\n                \"endpoint\": endpoint,\n                \"default_only\": default_only,\n            },\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ListModelsResponse,\n                    construct_type(\n                        type_=ListModelsResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/oci_client.py",
    "content": "\"\"\"Oracle Cloud Infrastructure (OCI) client for Cohere API.\"\"\"\n\nimport configparser\nimport email.utils\nimport json\nimport os\nimport typing\nimport uuid\n\nimport httpx\nimport requests\nfrom .client import Client, ClientEnvironment\nfrom .client_v2 import ClientV2\nfrom .aws_client import Streamer\nfrom .manually_maintained.lazy_oci_deps import lazy_oci\nfrom httpx import URL, ByteStream\n\n\nclass OciClient(Client):\n    \"\"\"\n    Cohere V1 API client for Oracle Cloud Infrastructure (OCI) Generative AI service.\n\n    Use this client for V1 API models (Command R family) and embeddings.\n    For V2 API models (Command A family), use OciClientV2 instead.\n\n    Supported APIs on OCI:\n    - embed(): Full support for all embedding models\n    - chat(): Full support with Command-R models\n    - chat_stream(): Streaming chat support\n\n    Supports all authentication methods:\n    - Config file (default): Uses ~/.oci/config\n    - Session-based: Uses OCI CLI session tokens\n    - Direct credentials: Pass OCI credentials directly\n    - Instance principal: For OCI compute instances\n    - Resource principal: For OCI functions\n\n    Example:\n        ```python\n        import cohere\n\n        client = cohere.OciClient(\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=\"ocid1.compartment.oc1...\",\n        )\n\n        response = client.chat(\n            model=\"command-r-08-2024\",\n            message=\"Hello!\",\n        )\n        print(response.text)\n        ```\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        oci_config_path: typing.Optional[str] = None,\n        oci_profile: typing.Optional[str] = None,\n        oci_user_id: typing.Optional[str] = None,\n        oci_fingerprint: typing.Optional[str] = None,\n        oci_tenancy_id: typing.Optional[str] = None,\n        oci_private_key_path: typing.Optional[str] = None,\n        oci_private_key_content: typing.Optional[str] = None,\n        auth_type: typing.Literal[\"api_key\", \"instance_principal\", \"resource_principal\"] = \"api_key\",\n        oci_region: typing.Optional[str] = None,\n        oci_compartment_id: str,\n        timeout: typing.Optional[float] = None,\n    ):\n        oci_config = _load_oci_config(\n            auth_type=auth_type,\n            config_path=oci_config_path,\n            profile=oci_profile,\n            user_id=oci_user_id,\n            fingerprint=oci_fingerprint,\n            tenancy_id=oci_tenancy_id,\n            private_key_path=oci_private_key_path,\n            private_key_content=oci_private_key_content,\n        )\n\n        if oci_region is None:\n            oci_region = oci_config.get(\"region\")\n            if oci_region is None:\n                raise ValueError(\"oci_region must be provided either directly or in OCI config file\")\n\n        Client.__init__(\n            self,\n            base_url=\"https://api.cohere.com\",\n            environment=ClientEnvironment.PRODUCTION,\n            client_name=\"n/a\",\n            timeout=timeout,\n            api_key=\"n/a\",\n            httpx_client=httpx.Client(\n                event_hooks=get_event_hooks(\n                    oci_config=oci_config,\n                    oci_region=oci_region,\n                    oci_compartment_id=oci_compartment_id,\n                    is_v2_client=False,\n                ),\n                timeout=timeout,\n            ),\n        )\n\n\nclass OciClientV2(ClientV2):\n    \"\"\"\n    Cohere V2 API client for Oracle Cloud Infrastructure (OCI) Generative AI service.\n\n    Supported APIs on OCI:\n    - embed(): Full support for all embedding models (returns embeddings as dict)\n    - chat(): Full support with Command-A models (command-a-03-2025)\n    - chat_stream(): Streaming chat with proper V2 event format\n\n    Note: rerank() requires fine-tuned models deployed to dedicated endpoints.\n    OCI on-demand inference does not support the rerank API.\n\n    Supports all authentication methods:\n    - Config file (default): Uses ~/.oci/config\n    - Session-based: Uses OCI CLI session tokens\n    - Direct credentials: Pass OCI credentials directly\n    - Instance principal: For OCI compute instances\n    - Resource principal: For OCI functions\n\n    Example using config file:\n        ```python\n        import cohere\n\n        client = cohere.OciClientV2(\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=\"ocid1.compartment.oc1...\",\n        )\n\n        response = client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Hello world\"],\n            input_type=\"search_document\",\n        )\n        print(response.embeddings.float_)\n\n        response = client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n        )\n        print(response.message)\n        ```\n\n    Example using direct credentials:\n        ```python\n        client = cohere.OciClientV2(\n            oci_user_id=\"ocid1.user.oc1...\",\n            oci_fingerprint=\"xx:xx:xx:...\",\n            oci_tenancy_id=\"ocid1.tenancy.oc1...\",\n            oci_private_key_path=\"~/.oci/key.pem\",\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=\"ocid1.compartment.oc1...\",\n        )\n        ```\n\n    Example using instance principal:\n        ```python\n        client = cohere.OciClientV2(\n            auth_type=\"instance_principal\",\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=\"ocid1.compartment.oc1...\",\n        )\n        ```\n    \"\"\"\n\n    def __init__(\n        self,\n        *,\n        # Authentication - Config file (default)\n        oci_config_path: typing.Optional[str] = None,\n        oci_profile: typing.Optional[str] = None,\n        # Authentication - Direct credentials\n        oci_user_id: typing.Optional[str] = None,\n        oci_fingerprint: typing.Optional[str] = None,\n        oci_tenancy_id: typing.Optional[str] = None,\n        oci_private_key_path: typing.Optional[str] = None,\n        oci_private_key_content: typing.Optional[str] = None,\n        # Authentication - Instance principal\n        auth_type: typing.Literal[\"api_key\", \"instance_principal\", \"resource_principal\"] = \"api_key\",\n        # Required for OCI Generative AI\n        oci_region: typing.Optional[str] = None,\n        oci_compartment_id: str,\n        # Standard parameters\n        timeout: typing.Optional[float] = None,\n    ):\n        # Load OCI config based on auth_type\n        oci_config = _load_oci_config(\n            auth_type=auth_type,\n            config_path=oci_config_path,\n            profile=oci_profile,\n            user_id=oci_user_id,\n            fingerprint=oci_fingerprint,\n            tenancy_id=oci_tenancy_id,\n            private_key_path=oci_private_key_path,\n            private_key_content=oci_private_key_content,\n        )\n\n        # Get region from config if not provided\n        if oci_region is None:\n            oci_region = oci_config.get(\"region\")\n            if oci_region is None:\n                raise ValueError(\"oci_region must be provided either directly or in OCI config file\")\n\n        # Create httpx client with OCI event hooks\n        ClientV2.__init__(\n            self,\n            base_url=\"https://api.cohere.com\",  # Unused, OCI URL set in hooks\n            environment=ClientEnvironment.PRODUCTION,\n            client_name=\"n/a\",\n            timeout=timeout,\n            api_key=\"n/a\",\n            httpx_client=httpx.Client(\n                event_hooks=get_event_hooks(\n                    oci_config=oci_config,\n                    oci_region=oci_region,\n                    oci_compartment_id=oci_compartment_id,\n                    is_v2_client=True,\n                ),\n                timeout=timeout,\n            ),\n        )\n\n\nEventHook = typing.Callable[..., typing.Any]\n\n\ndef _load_oci_config(\n    auth_type: str,\n    config_path: typing.Optional[str],\n    profile: typing.Optional[str],\n    **kwargs: typing.Any,\n) -> typing.Dict[str, typing.Any]:\n    \"\"\"\n    Load OCI configuration based on authentication type.\n\n    Args:\n        auth_type: Authentication method (api_key, instance_principal, resource_principal)\n        config_path: Path to OCI config file (for api_key auth)\n        profile: Profile name in config file (for api_key auth)\n        **kwargs: Direct credentials (user_id, fingerprint, etc.)\n\n    Returns:\n        Dictionary containing OCI configuration\n    \"\"\"\n    oci = lazy_oci()\n\n    if auth_type == \"instance_principal\":\n        signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()\n        return {\"signer\": signer, \"auth_type\": \"instance_principal\"}\n\n    elif auth_type == \"resource_principal\":\n        signer = oci.auth.signers.get_resource_principals_signer()\n        return {\"signer\": signer, \"auth_type\": \"resource_principal\"}\n\n    elif kwargs.get(\"user_id\"):\n        # Direct credentials provided - validate required fields\n        required_fields = [\"fingerprint\", \"tenancy_id\"]\n        missing = [f for f in required_fields if not kwargs.get(f)]\n        if missing:\n            raise ValueError(\n                f\"When providing oci_user_id, you must also provide: {', '.join('oci_' + f for f in missing)}\"\n            )\n        if not kwargs.get(\"private_key_path\") and not kwargs.get(\"private_key_content\"):\n            raise ValueError(\n                \"When providing oci_user_id, you must also provide either \"\n                \"oci_private_key_path or oci_private_key_content\"\n            )\n        config = {\n            \"user\": kwargs[\"user_id\"],\n            \"fingerprint\": kwargs[\"fingerprint\"],\n            \"tenancy\": kwargs[\"tenancy_id\"],\n        }\n        if kwargs.get(\"private_key_path\"):\n            config[\"key_file\"] = kwargs[\"private_key_path\"]\n        if kwargs.get(\"private_key_content\"):\n            config[\"key_content\"] = kwargs[\"private_key_content\"]\n        return config\n\n    else:\n        # Load from config file\n        oci_config = oci.config.from_file(\n            file_location=config_path or \"~/.oci/config\", profile_name=profile or \"DEFAULT\"\n        )\n        _remove_inherited_session_auth(oci_config, config_path=config_path, profile=profile)\n        return oci_config\n\n\ndef _remove_inherited_session_auth(\n    oci_config: typing.Dict[str, typing.Any],\n    *,\n    config_path: typing.Optional[str],\n    profile: typing.Optional[str],\n) -> None:\n    \"\"\"Drop session auth fields inherited from the OCI config DEFAULT section.\"\"\"\n    profile_name = profile or \"DEFAULT\"\n    if profile_name == \"DEFAULT\" or \"security_token_file\" not in oci_config:\n        return\n\n    config_file = os.path.expanduser(config_path or \"~/.oci/config\")\n    parser = configparser.ConfigParser(interpolation=None)\n    if not parser.read(config_file):\n        return\n\n    if not parser.has_section(profile_name):\n        oci_config.pop(\"security_token_file\", None)\n        return\n\n    explicit_security_token = False\n    current_section: typing.Optional[str] = None\n    with open(config_file, encoding=\"utf-8\") as handle:\n        for raw_line in handle:\n            line = raw_line.strip()\n            if not line or line.startswith((\"#\", \";\")):\n                continue\n            if line.startswith(\"[\") and line.endswith(\"]\"):\n                current_section = line[1:-1].strip()\n                continue\n            if current_section == profile_name and line.split(\"=\", 1)[0].strip() == \"security_token_file\":\n                explicit_security_token = True\n                break\n\n    if not explicit_security_token:\n        oci_config.pop(\"security_token_file\", None)\n\n\ndef _usage_from_oci(usage_data: typing.Optional[typing.Dict[str, typing.Any]]) -> typing.Dict[str, typing.Any]:\n    usage_data = usage_data or {}\n    input_tokens = usage_data.get(\"inputTokens\", 0)\n    output_tokens = usage_data.get(\"completionTokens\", usage_data.get(\"outputTokens\", 0))\n\n    return {\n        \"tokens\": {\n            \"input_tokens\": input_tokens,\n            \"output_tokens\": output_tokens,\n        },\n        \"billed_units\": {\n            \"input_tokens\": input_tokens,\n            \"output_tokens\": output_tokens,\n        }\n    }\n\n\ndef get_event_hooks(\n    oci_config: typing.Dict[str, typing.Any],\n    oci_region: str,\n    oci_compartment_id: str,\n    is_v2_client: bool = False,\n) -> typing.Dict[str, typing.List[EventHook]]:\n    \"\"\"\n    Create httpx event hooks for OCI request/response transformation.\n\n    Args:\n        oci_config: OCI configuration dictionary\n        oci_region: OCI region (e.g., \"us-chicago-1\")\n        oci_compartment_id: OCI compartment OCID\n        is_v2_client: Whether this is for OciClientV2 (True) or OciClient (False)\n\n    Returns:\n        Dictionary of event hooks for httpx\n    \"\"\"\n    return {\n        \"request\": [\n            map_request_to_oci(\n                oci_config=oci_config,\n                oci_region=oci_region,\n                oci_compartment_id=oci_compartment_id,\n                is_v2_client=is_v2_client,\n            ),\n        ],\n        \"response\": [map_response_from_oci()],\n    }\n\n\ndef map_request_to_oci(\n    oci_config: typing.Dict[str, typing.Any],\n    oci_region: str,\n    oci_compartment_id: str,\n    is_v2_client: bool = False,\n) -> EventHook:\n    \"\"\"\n    Create event hook that transforms Cohere requests to OCI format and signs them.\n\n    Args:\n        oci_config: OCI configuration dictionary\n        oci_region: OCI region\n        oci_compartment_id: OCI compartment OCID\n        is_v2_client: Whether this is for OciClientV2 (True) or OciClient (False)\n\n    Returns:\n        Event hook function for httpx\n    \"\"\"\n    oci = lazy_oci()\n\n    # Create OCI signer based on config type\n    # Priority order: instance/resource principal > session-based auth > API key auth\n    if \"signer\" in oci_config:\n        signer = oci_config[\"signer\"]  # Instance/resource principal\n    elif \"security_token_file\" in oci_config:\n        # Session-based authentication with security token.\n        # The token file is re-read on every request so that OCI CLI token refreshes\n        # (e.g. `oci session refresh`) are picked up without restarting the client.\n        key_file = oci_config.get(\"key_file\")\n        if not key_file:\n            raise ValueError(\n                \"OCI config profile is missing 'key_file'. \"\n                \"Session-based auth requires a key_file entry in your OCI config profile.\"\n            )\n        token_file_path = os.path.expanduser(oci_config[\"security_token_file\"])\n        private_key = oci.signer.load_private_key_from_file(os.path.expanduser(key_file))\n\n        class _RefreshingSecurityTokenSigner:\n            \"\"\"Wraps SecurityTokenSigner and re-reads the token file before each signing call.\"\"\"\n\n            def __init__(self) -> None:\n                self._token_file = token_file_path\n                self._private_key = private_key\n                self._refresh()\n\n            def _refresh(self) -> None:\n                with open(self._token_file, \"r\") as _f:\n                    _token = _f.read().strip()\n                self._signer = oci.auth.signers.SecurityTokenSigner(\n                    token=_token,\n                    private_key=self._private_key,\n                )\n\n            # Delegate all attribute access to the inner signer, refreshing first.\n            def __call__(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:\n                self._refresh()\n                return self._signer(*args, **kwargs)\n\n            def __getattr__(self, name: str) -> typing.Any:\n                if name.startswith(\"_\"):\n                    raise AttributeError(name)\n                self._refresh()\n                return getattr(self._signer, name)\n\n        signer = _RefreshingSecurityTokenSigner()\n    elif \"user\" in oci_config:\n        signer = oci.signer.Signer(\n            tenancy=oci_config[\"tenancy\"],\n            user=oci_config[\"user\"],\n            fingerprint=oci_config[\"fingerprint\"],\n            private_key_file_location=oci_config.get(\"key_file\"),\n            private_key_content=oci_config.get(\"key_content\"),\n        )\n    else:\n        # Config doesn't have user or security token - unsupported\n        raise ValueError(\n            \"OCI config is missing 'user' field and no security_token_file found. \"\n            \"Please use a profile with standard API key authentication, \"\n            \"session-based authentication, or provide direct credentials via oci_user_id parameter.\"\n        )\n\n    def _event_hook(request: httpx.Request) -> None:\n        # Extract Cohere API details\n        path_parts = request.url.path.split(\"/\")\n        endpoint = path_parts[-1]\n        body = json.loads(request.read())\n\n        # Build OCI URL\n        url = get_oci_url(\n            region=oci_region,\n            endpoint=endpoint,\n        )\n\n        # Transform request body to OCI format\n        oci_body = transform_request_to_oci(\n            endpoint=endpoint,\n            cohere_body=body,\n            compartment_id=oci_compartment_id,\n            is_v2=is_v2_client,\n        )\n\n        # Prepare request for signing\n        oci_body_bytes = json.dumps(oci_body).encode(\"utf-8\")\n\n        # Build headers for signing\n        headers = {\n            \"content-type\": \"application/json\",\n            \"date\": email.utils.formatdate(usegmt=True),\n        }\n\n        # Create a requests.PreparedRequest for OCI signing\n        oci_request = requests.Request(\n            method=request.method,\n            url=url,\n            headers=headers,\n            data=oci_body_bytes,\n        )\n        prepped_request = oci_request.prepare()\n\n        # Sign the request using OCI signer (modifies headers in place)\n        signer.do_request_sign(prepped_request)\n\n        # Update httpx request with signed headers\n        request.url = URL(url)\n        request.headers = httpx.Headers(prepped_request.headers)\n        request.stream = ByteStream(oci_body_bytes)\n        request._content = oci_body_bytes\n        request.extensions[\"endpoint\"] = endpoint\n        request.extensions[\"is_stream\"] = body.get(\"stream\", False)\n        request.extensions[\"is_v2\"] = is_v2_client\n\n    return _event_hook\n\n\ndef map_response_from_oci() -> EventHook:\n    \"\"\"\n    Create event hook that transforms OCI responses to Cohere format.\n\n    Returns:\n        Event hook function for httpx\n    \"\"\"\n\n    def _hook(response: httpx.Response) -> None:\n        endpoint = response.request.extensions[\"endpoint\"]\n        is_stream = response.request.extensions.get(\"is_stream\", False)\n        is_v2 = response.request.extensions.get(\"is_v2\", False)\n\n        output: typing.Iterator[bytes]\n\n        # Only transform successful responses (200-299)\n        # Let error responses pass through unchanged so SDK error handling works\n        if not (200 <= response.status_code < 300):\n            return\n\n        # For streaming responses, wrap the stream with a transformer\n        if is_stream:\n            original_stream = typing.cast(typing.Iterator[bytes], response.stream)\n            transformed_stream = transform_oci_stream_wrapper(original_stream, endpoint, is_v2)\n            response.stream = Streamer(transformed_stream)\n            # Reset consumption flags\n            if hasattr(response, \"_content\"):\n                del response._content\n            response.is_stream_consumed = False\n            response.is_closed = False\n            return\n\n        # Handle non-streaming responses\n        oci_response = json.loads(response.read())\n        cohere_response = transform_oci_response_to_cohere(endpoint, oci_response, is_v2)\n        output = iter([json.dumps(cohere_response).encode(\"utf-8\")])\n\n        response.stream = Streamer(output)\n\n        # Reset response for re-reading\n        if hasattr(response, \"_content\"):\n            del response._content\n        response.is_stream_consumed = False\n        response.is_closed = False\n\n    return _hook\n\n\ndef get_oci_url(\n    region: str,\n    endpoint: str,\n) -> str:\n    \"\"\"\n    Map Cohere endpoints to OCI Generative AI endpoints.\n\n    Args:\n        region: OCI region (e.g., \"us-chicago-1\")\n        endpoint: Cohere endpoint name\n    Returns:\n        Full OCI Generative AI endpoint URL\n    \"\"\"\n    base = f\"https://inference.generativeai.{region}.oci.oraclecloud.com\"\n    api_version = \"20231130\"\n\n    # Map Cohere endpoints to OCI actions\n    action_map = {\n        \"embed\": \"embedText\",\n        \"chat\": \"chat\",\n    }\n\n    action = action_map.get(endpoint)\n    if action is None:\n        raise ValueError(\n            f\"Endpoint '{endpoint}' is not supported by OCI Generative AI. \"\n            f\"Supported endpoints: {list(action_map.keys())}\"\n        )\n    return f\"{base}/{api_version}/actions/{action}\"\n\n\ndef normalize_model_for_oci(model: str) -> str:\n    \"\"\"\n    Normalize model name for OCI.\n\n    OCI accepts model names in the format \"cohere.model-name\" or full OCIDs.\n    This function ensures proper formatting for all regions.\n\n    Args:\n        model: Model name (e.g., \"command-r-08-2024\") or full OCID\n\n    Returns:\n        Normalized model identifier (e.g., \"cohere.command-r-08-2024\" or OCID)\n\n    Examples:\n        >>> normalize_model_for_oci(\"command-a-03-2025\")\n        \"cohere.command-a-03-2025\"\n        >>> normalize_model_for_oci(\"cohere.embed-english-v3.0\")\n        \"cohere.embed-english-v3.0\"\n        >>> normalize_model_for_oci(\"ocid1.generativeaimodel.oc1...\")\n        \"ocid1.generativeaimodel.oc1...\"\n    \"\"\"\n    if not model:\n        raise ValueError(\"OCI requests require a non-empty model name\")\n\n    # If it's already an OCID, return as-is (works across all regions)\n    if model.startswith(\"ocid1.\"):\n        return model\n\n    # Add \"cohere.\" prefix if not present\n    if not model.startswith(\"cohere.\"):\n        return f\"cohere.{model}\"\n\n    return model\n\n\ndef transform_request_to_oci(\n    endpoint: str,\n    cohere_body: typing.Dict[str, typing.Any],\n    compartment_id: str,\n    is_v2: bool = False,\n) -> typing.Dict[str, typing.Any]:\n    \"\"\"\n    Transform Cohere request body to OCI format.\n\n    Args:\n        endpoint: Cohere endpoint name\n        cohere_body: Original Cohere request body\n        compartment_id: OCI compartment OCID\n        is_v2: Whether this request comes from OciClientV2 (True) or OciClient (False)\n\n    Returns:\n        Transformed request body in OCI format\n    \"\"\"\n    model = normalize_model_for_oci(cohere_body.get(\"model\", \"\"))\n\n    if endpoint == \"embed\":\n        if \"texts\" in cohere_body:\n            inputs = cohere_body[\"texts\"]\n        elif \"inputs\" in cohere_body:\n            inputs = cohere_body[\"inputs\"]\n        elif \"images\" in cohere_body:\n            raise ValueError(\"OCI embed does not support the top-level 'images' parameter; use 'inputs' instead\")\n        else:\n            raise ValueError(\"OCI embed requires either 'texts' or 'inputs'\")\n\n        oci_body = {\n            \"inputs\": inputs,\n            \"servingMode\": {\n                \"servingType\": \"ON_DEMAND\",\n                \"modelId\": model,\n            },\n            \"compartmentId\": compartment_id,\n        }\n\n        # Add optional fields only if provided\n        if \"input_type\" in cohere_body:\n            oci_body[\"inputType\"] = cohere_body[\"input_type\"].upper()\n\n        if \"truncate\" in cohere_body:\n            oci_body[\"truncate\"] = cohere_body[\"truncate\"].upper()\n\n        if \"embedding_types\" in cohere_body:\n            # OCI expects lowercase embedding types (float, int8, binary, etc.)\n            oci_body[\"embeddingTypes\"] = [et.lower() for et in cohere_body[\"embedding_types\"]]\n        if \"max_tokens\" in cohere_body:\n            oci_body[\"maxTokens\"] = cohere_body[\"max_tokens\"]\n        if \"output_dimension\" in cohere_body:\n            oci_body[\"outputDimension\"] = cohere_body[\"output_dimension\"]\n        if \"priority\" in cohere_body:\n            oci_body[\"priority\"] = cohere_body[\"priority\"]\n\n        return oci_body\n\n    elif endpoint == \"chat\":\n        # Validate that the request body matches the client type\n        has_messages = \"messages\" in cohere_body\n        has_message = \"message\" in cohere_body\n        if is_v2 and not has_messages:\n            raise ValueError(\n                \"OciClientV2 requires the V2 API format ('messages' array). \"\n                \"Got a V1-style request with 'message' string. \"\n                \"Use OciClient for V1 models like Command R, \"\n                \"or switch to the V2 messages format.\"\n            )\n        if not is_v2 and has_messages and not has_message:\n            raise ValueError(\n                \"OciClient uses the V1 API format (single 'message' string). \"\n                \"Got a V2-style request with 'messages' array. \"\n                \"Use OciClientV2 for V2 models like Command A.\"\n            )\n\n        chat_request: typing.Dict[str, typing.Any] = {\n            \"apiFormat\": \"COHEREV2\" if is_v2 else \"COHERE\",\n        }\n\n        if is_v2:\n            # V2: Transform Cohere V2 messages to OCI V2 format\n            # Cohere sends: [{\"role\": \"user\", \"content\": \"text\"}]\n            # OCI expects: [{\"role\": \"USER\", \"content\": [{\"type\": \"TEXT\", \"text\": \"...\"}]}]\n            oci_messages = []\n            for msg in cohere_body[\"messages\"]:\n                oci_msg: typing.Dict[str, typing.Any] = {\n                    \"role\": msg[\"role\"].upper(),\n                }\n\n                # Transform content\n                if isinstance(msg.get(\"content\"), str):\n                    oci_msg[\"content\"] = [{\"type\": \"TEXT\", \"text\": msg[\"content\"]}]\n                elif isinstance(msg.get(\"content\"), list):\n                    transformed_content = []\n                    for item in msg[\"content\"]:\n                        if isinstance(item, dict) and \"type\" in item:\n                            transformed_item = item.copy()\n                            transformed_item[\"type\"] = item[\"type\"].upper()\n                            # OCI expects camelCase: image_url → imageUrl\n                            if \"image_url\" in transformed_item:\n                                transformed_item[\"imageUrl\"] = transformed_item.pop(\"image_url\")\n                            transformed_content.append(transformed_item)\n                        else:\n                            transformed_content.append(item)\n                    oci_msg[\"content\"] = transformed_content\n                else:\n                    oci_msg[\"content\"] = msg.get(\"content\") or []\n\n                if \"tool_calls\" in msg:\n                    oci_tool_calls = []\n                    for tc in msg[\"tool_calls\"]:\n                        oci_tc = {**tc}\n                        if \"type\" in oci_tc:\n                            oci_tc[\"type\"] = oci_tc[\"type\"].upper()\n                        oci_tool_calls.append(oci_tc)\n                    oci_msg[\"toolCalls\"] = oci_tool_calls\n                if \"tool_call_id\" in msg:\n                    oci_msg[\"toolCallId\"] = msg[\"tool_call_id\"]\n                if \"tool_plan\" in msg:\n                    oci_msg[\"toolPlan\"] = msg[\"tool_plan\"]\n\n                oci_messages.append(oci_msg)\n\n            chat_request[\"messages\"] = oci_messages\n\n            # V2 optional parameters\n            if \"max_tokens\" in cohere_body:\n                chat_request[\"maxTokens\"] = cohere_body[\"max_tokens\"]\n            if \"temperature\" in cohere_body:\n                chat_request[\"temperature\"] = cohere_body[\"temperature\"]\n            if \"k\" in cohere_body:\n                chat_request[\"topK\"] = cohere_body[\"k\"]\n            if \"p\" in cohere_body:\n                chat_request[\"topP\"] = cohere_body[\"p\"]\n            if \"seed\" in cohere_body:\n                chat_request[\"seed\"] = cohere_body[\"seed\"]\n            if \"frequency_penalty\" in cohere_body:\n                chat_request[\"frequencyPenalty\"] = cohere_body[\"frequency_penalty\"]\n            if \"presence_penalty\" in cohere_body:\n                chat_request[\"presencePenalty\"] = cohere_body[\"presence_penalty\"]\n            if \"stop_sequences\" in cohere_body:\n                chat_request[\"stopSequences\"] = cohere_body[\"stop_sequences\"]\n            if \"tools\" in cohere_body:\n                oci_tools = []\n                for tool in cohere_body[\"tools\"]:\n                    oci_tool = {**tool}\n                    if \"type\" in oci_tool:\n                        oci_tool[\"type\"] = oci_tool[\"type\"].upper()\n                    oci_tools.append(oci_tool)\n                chat_request[\"tools\"] = oci_tools\n            if \"strict_tools\" in cohere_body:\n                chat_request[\"strictTools\"] = cohere_body[\"strict_tools\"]\n            if \"documents\" in cohere_body:\n                chat_request[\"documents\"] = cohere_body[\"documents\"]\n            if \"citation_options\" in cohere_body:\n                chat_request[\"citationOptions\"] = cohere_body[\"citation_options\"]\n            if \"response_format\" in cohere_body:\n                chat_request[\"responseFormat\"] = cohere_body[\"response_format\"]\n            if \"safety_mode\" in cohere_body and cohere_body[\"safety_mode\"] is not None:\n                chat_request[\"safetyMode\"] = cohere_body[\"safety_mode\"].upper()\n            if \"logprobs\" in cohere_body:\n                chat_request[\"logprobs\"] = cohere_body[\"logprobs\"]\n            if \"tool_choice\" in cohere_body:\n                chat_request[\"toolChoice\"] = cohere_body[\"tool_choice\"]\n            if \"priority\" in cohere_body:\n                chat_request[\"priority\"] = cohere_body[\"priority\"]\n            # Thinking parameter for Command A Reasoning models\n            if \"thinking\" in cohere_body and cohere_body[\"thinking\"] is not None:\n                thinking = cohere_body[\"thinking\"]\n                oci_thinking: typing.Dict[str, typing.Any] = {}\n                if \"type\" in thinking:\n                    oci_thinking[\"type\"] = thinking[\"type\"].upper()\n                if \"token_budget\" in thinking and thinking[\"token_budget\"] is not None:\n                    oci_thinking[\"tokenBudget\"] = thinking[\"token_budget\"]\n                if oci_thinking:\n                    chat_request[\"thinking\"] = oci_thinking\n        else:\n            # V1: single message string\n            chat_request[\"message\"] = cohere_body[\"message\"]\n\n            if \"temperature\" in cohere_body:\n                chat_request[\"temperature\"] = cohere_body[\"temperature\"]\n            if \"max_tokens\" in cohere_body:\n                chat_request[\"maxTokens\"] = cohere_body[\"max_tokens\"]\n            if \"k\" in cohere_body:\n                chat_request[\"topK\"] = cohere_body[\"k\"]\n            if \"p\" in cohere_body:\n                chat_request[\"topP\"] = cohere_body[\"p\"]\n            if \"seed\" in cohere_body:\n                chat_request[\"seed\"] = cohere_body[\"seed\"]\n            if \"stop_sequences\" in cohere_body:\n                chat_request[\"stopSequences\"] = cohere_body[\"stop_sequences\"]\n            if \"frequency_penalty\" in cohere_body:\n                chat_request[\"frequencyPenalty\"] = cohere_body[\"frequency_penalty\"]\n            if \"presence_penalty\" in cohere_body:\n                chat_request[\"presencePenalty\"] = cohere_body[\"presence_penalty\"]\n            if \"preamble\" in cohere_body:\n                chat_request[\"preambleOverride\"] = cohere_body[\"preamble\"]\n            if \"chat_history\" in cohere_body:\n                chat_request[\"chatHistory\"] = cohere_body[\"chat_history\"]\n            if \"documents\" in cohere_body:\n                chat_request[\"documents\"] = cohere_body[\"documents\"]\n            if \"tools\" in cohere_body:\n                oci_tools = []\n                for tool in cohere_body[\"tools\"]:\n                    oci_tool = {**tool}\n                    if \"type\" in oci_tool:\n                        oci_tool[\"type\"] = oci_tool[\"type\"].upper()\n                    oci_tools.append(oci_tool)\n                chat_request[\"tools\"] = oci_tools\n            if \"tool_results\" in cohere_body:\n                chat_request[\"toolResults\"] = cohere_body[\"tool_results\"]\n            if \"response_format\" in cohere_body:\n                chat_request[\"responseFormat\"] = cohere_body[\"response_format\"]\n            if \"safety_mode\" in cohere_body and cohere_body[\"safety_mode\"] is not None:\n                chat_request[\"safetyMode\"] = cohere_body[\"safety_mode\"].upper()\n            if \"priority\" in cohere_body:\n                chat_request[\"priority\"] = cohere_body[\"priority\"]\n\n        # Handle streaming for both versions\n        if cohere_body.get(\"stream\"):\n            chat_request[\"isStream\"] = True\n\n        # Top level OCI request structure\n        oci_body = {\n            \"servingMode\": {\n                \"servingType\": \"ON_DEMAND\",\n                \"modelId\": model,\n            },\n            \"compartmentId\": compartment_id,\n            \"chatRequest\": chat_request,\n        }\n\n        return oci_body\n\n    raise ValueError(\n        f\"Endpoint '{endpoint}' is not supported by OCI Generative AI on-demand inference. \"\n        \"Supported endpoints: ['embed', 'chat']\"\n    )\n\n\ndef transform_oci_response_to_cohere(\n    endpoint: str, oci_response: typing.Dict[str, typing.Any], is_v2: bool = False,\n) -> typing.Dict[str, typing.Any]:\n    \"\"\"\n    Transform OCI response to Cohere format.\n\n    Args:\n        endpoint: Cohere endpoint name\n        oci_response: OCI response body\n        is_v2: Whether this is a V2 API response\n\n    Returns:\n        Transformed response in Cohere format\n    \"\"\"\n    if endpoint == \"embed\":\n        # OCI returns \"embeddings\" by default, or \"embeddingsByType\" when embeddingTypes is specified\n        embeddings_data = oci_response.get(\"embeddingsByType\") or oci_response.get(\"embeddings\", {})\n\n        if isinstance(embeddings_data, dict):\n            normalized_embeddings = {str(key).lower(): value for key, value in embeddings_data.items()}\n        else:\n            normalized_embeddings = {\"float\": embeddings_data}\n\n        if is_v2:\n            embeddings = normalized_embeddings\n        else:\n            embeddings = normalized_embeddings.get(\"float\", [])\n\n        meta = {\n            \"api_version\": {\"version\": \"1\"},\n        }\n        usage = _usage_from_oci(oci_response.get(\"usage\"))\n        if \"tokens\" in usage:\n            meta[\"tokens\"] = usage[\"tokens\"]\n        if \"billed_units\" in usage:\n            meta[\"billed_units\"] = usage[\"billed_units\"]\n\n        response_type = \"embeddings_by_type\" if is_v2 else \"embeddings_floats\"\n\n        return {\n            \"response_type\": response_type,\n            \"id\": oci_response.get(\"id\", str(uuid.uuid4())),\n            \"embeddings\": embeddings,\n            \"texts\": [],\n            \"meta\": meta,\n        }\n\n    elif endpoint == \"chat\":\n        chat_response = oci_response.get(\"chatResponse\", {})\n\n        if is_v2:\n            usage = _usage_from_oci(chat_response.get(\"usage\"))\n            message = chat_response.get(\"message\", {})\n\n            if \"role\" in message:\n                message = {**message, \"role\": message[\"role\"].lower()}\n\n            if \"content\" in message and isinstance(message[\"content\"], list):\n                transformed_content = []\n                for item in message[\"content\"]:\n                    if isinstance(item, dict):\n                        transformed_item = item.copy()\n                        if \"type\" in transformed_item:\n                            transformed_item[\"type\"] = transformed_item[\"type\"].lower()\n                        transformed_content.append(transformed_item)\n                    else:\n                        transformed_content.append(item)\n                message = {**message, \"content\": transformed_content}\n\n            if \"toolCalls\" in message:\n                tool_calls = []\n                for tc in message[\"toolCalls\"]:\n                    lowered_tc = {**tc}\n                    if \"type\" in lowered_tc:\n                        lowered_tc[\"type\"] = lowered_tc[\"type\"].lower()\n                    tool_calls.append(lowered_tc)\n                message = {k: v for k, v in message.items() if k != \"toolCalls\"}\n                message[\"tool_calls\"] = tool_calls\n            if \"toolPlan\" in message:\n                tool_plan = message[\"toolPlan\"]\n                message = {k: v for k, v in message.items() if k != \"toolPlan\"}\n                message[\"tool_plan\"] = tool_plan\n\n            return {\n                \"id\": chat_response.get(\"id\", str(uuid.uuid4())),\n                \"message\": message,\n                \"finish_reason\": chat_response.get(\"finishReason\", \"COMPLETE\"),\n                \"usage\": usage,\n            }\n\n        # V1 response\n        meta = {\n            \"api_version\": {\"version\": \"1\"},\n        }\n        usage = _usage_from_oci(chat_response.get(\"usage\"))\n        if \"tokens\" in usage:\n            meta[\"tokens\"] = usage[\"tokens\"]\n        if \"billed_units\" in usage:\n            meta[\"billed_units\"] = usage[\"billed_units\"]\n\n        return {\n            \"text\": chat_response.get(\"text\", \"\"),\n            \"generation_id\": str(uuid.uuid4()),\n            \"chat_history\": chat_response.get(\"chatHistory\", []),\n            \"finish_reason\": chat_response.get(\"finishReason\", \"COMPLETE\"),\n            \"citations\": chat_response.get(\"citations\", []),\n            \"documents\": chat_response.get(\"documents\", []),\n            \"search_queries\": chat_response.get(\"searchQueries\", []),\n            \"meta\": meta,\n        }\n\n    return oci_response\n\n\ndef transform_oci_stream_wrapper(\n    stream: typing.Iterator[bytes], endpoint: str, is_v2: bool = False,\n) -> typing.Iterator[bytes]:\n    \"\"\"\n    Wrap OCI stream and transform events to Cohere format.\n\n    Args:\n        stream: Original OCI stream iterator\n        endpoint: Cohere endpoint name\n        is_v2: Whether this is a V2 API stream\n\n    Yields:\n        Bytes of transformed streaming events\n    \"\"\"\n    generation_id = str(uuid.uuid4())\n    emitted_start = False\n    emitted_content_end = False\n    current_content_type: typing.Optional[str] = None\n    current_content_index = 0\n    final_finish_reason = \"COMPLETE\"\n    final_usage: typing.Optional[typing.Dict[str, typing.Any]] = None\n    full_v1_text = \"\"\n    final_v1_finish_reason = \"COMPLETE\"\n    buffer = b\"\"\n\n    def _emit_v2_event(event: typing.Dict[str, typing.Any]) -> bytes:\n        return b\"data: \" + json.dumps(event).encode(\"utf-8\") + b\"\\n\\n\"\n\n    def _emit_v1_event(event: typing.Dict[str, typing.Any]) -> bytes:\n        return json.dumps(event).encode(\"utf-8\") + b\"\\n\"\n\n    def _current_content_type(oci_event: typing.Dict[str, typing.Any]) -> typing.Optional[str]:\n        message = oci_event.get(\"message\")\n        if isinstance(message, dict):\n            content_list = message.get(\"content\")\n            if content_list and isinstance(content_list, list) and len(content_list) > 0:\n                oci_type = content_list[0].get(\"type\", \"TEXT\").upper()\n                return \"thinking\" if oci_type == \"THINKING\" else \"text\"\n        return None  # finish-only or non-content event — don't trigger a type transition\n\n    def _transform_v2_event(oci_event: typing.Dict[str, typing.Any]) -> typing.Iterator[bytes]:\n        nonlocal emitted_start, emitted_content_end, current_content_type, current_content_index\n        nonlocal final_finish_reason, final_usage\n\n        event_content_type = _current_content_type(oci_event)\n        open_type = event_content_type or \"text\"\n\n        if not emitted_start:\n            yield _emit_v2_event(\n                {\n                    \"type\": \"message-start\",\n                    \"id\": generation_id,\n                    \"delta\": {\"message\": {\"role\": \"assistant\"}},\n                }\n            )\n            yield _emit_v2_event(\n                {\n                    \"type\": \"content-start\",\n                    \"index\": current_content_index,\n                    \"delta\": {\"message\": {\"content\": {\"type\": open_type}}},\n                }\n            )\n            emitted_start = True\n            current_content_type = open_type\n        elif event_content_type is not None and current_content_type != event_content_type:\n            yield _emit_v2_event({\"type\": \"content-end\", \"index\": current_content_index})\n            current_content_index += 1\n            yield _emit_v2_event(\n                {\n                    \"type\": \"content-start\",\n                    \"index\": current_content_index,\n                    \"delta\": {\"message\": {\"content\": {\"type\": event_content_type}}},\n                }\n            )\n            current_content_type = event_content_type\n            emitted_content_end = False\n\n        for cohere_event in typing.cast(\n            typing.List[typing.Dict[str, typing.Any]], transform_stream_event(endpoint, oci_event, is_v2=True)\n        ):\n            if \"index\" in cohere_event:\n                cohere_event = {**cohere_event, \"index\": current_content_index}\n            if cohere_event[\"type\"] == \"content-end\":\n                emitted_content_end = True\n                final_finish_reason = oci_event.get(\"finishReason\", final_finish_reason)\n                final_usage = _usage_from_oci(oci_event.get(\"usage\"))\n            yield _emit_v2_event(cohere_event)\n\n    def _transform_v1_event(oci_event: typing.Dict[str, typing.Any]) -> typing.Iterator[bytes]:\n        nonlocal emitted_start, full_v1_text, final_v1_finish_reason\n        if not emitted_start:\n            yield _emit_v1_event({\n                \"event_type\": \"stream-start\",\n                \"generation_id\": generation_id,\n                \"is_finished\": False,\n            })\n            emitted_start = True\n        event = transform_stream_event(endpoint, oci_event, is_v2=False)\n        if isinstance(event, dict):\n            if event.get(\"event_type\") == \"text-generation\" and event.get(\"text\"):\n                full_v1_text += typing.cast(str, event[\"text\"])\n            if \"finishReason\" in oci_event:\n                final_v1_finish_reason = oci_event.get(\"finishReason\", final_v1_finish_reason)\n            yield _emit_v1_event(event)\n\n    stream_finished = False\n\n    def _emit_closing_events() -> typing.Iterator[bytes]:\n        \"\"\"Emit the final closing events for the stream.\"\"\"\n        if is_v2:\n            if emitted_start:\n                if not emitted_content_end:\n                    yield _emit_v2_event({\"type\": \"content-end\", \"index\": current_content_index})\n                message_end_event: typing.Dict[str, typing.Any] = {\n                    \"type\": \"message-end\",\n                    \"id\": generation_id,\n                    \"delta\": {\"finish_reason\": final_finish_reason},\n                }\n                if final_usage:\n                    message_end_event[\"delta\"][\"usage\"] = final_usage\n                yield _emit_v2_event(message_end_event)\n        else:\n            yield _emit_v1_event(\n                {\n                    \"event_type\": \"stream-end\",\n                    \"finish_reason\": final_v1_finish_reason,\n                    \"response\": {\n                        \"text\": full_v1_text,\n                        \"generation_id\": generation_id,\n                        \"finish_reason\": final_v1_finish_reason,\n                    },\n                }\n            )\n\n    def _process_line(line: str) -> typing.Iterator[bytes]:\n        nonlocal stream_finished\n        if not line.startswith(\"data: \"):\n            return\n\n        data_str = line[6:]\n        if data_str.strip() == \"[DONE]\":\n            for event_bytes in _emit_closing_events():\n                yield event_bytes\n            stream_finished = True\n            return\n\n        try:\n            oci_event = json.loads(data_str)\n        except json.JSONDecodeError:\n            return\n\n        try:\n            if is_v2:\n                for event_bytes in _transform_v2_event(oci_event):\n                    yield event_bytes\n            else:\n                for event_bytes in _transform_v1_event(oci_event):\n                    yield event_bytes\n        except Exception as exc:\n            raise RuntimeError(f\"OCI stream event transformation failed for endpoint '{endpoint}': {exc}\") from exc\n\n        # OCI may not send [DONE] — treat finishReason as stream termination\n        if \"finishReason\" in oci_event:\n            for event_bytes in _emit_closing_events():\n                yield event_bytes\n            stream_finished = True\n\n    for chunk in stream:\n        buffer += chunk\n        while b\"\\n\" in buffer:\n            line_bytes, buffer = buffer.split(b\"\\n\", 1)\n            line = line_bytes.decode(\"utf-8\").strip()\n            for event_bytes in _process_line(line):\n                yield event_bytes\n            if stream_finished:\n                return\n\n    if buffer.strip() and not stream_finished:\n        line = buffer.decode(\"utf-8\").strip()\n        for event_bytes in _process_line(line):\n            yield event_bytes\n\n\ndef transform_stream_event(\n    endpoint: str, oci_event: typing.Dict[str, typing.Any], is_v2: bool = False,\n) -> typing.Union[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]]]:\n    \"\"\"\n    Transform individual OCI stream event to Cohere format.\n\n    Args:\n        endpoint: Cohere endpoint name\n        oci_event: OCI stream event\n        is_v2: Whether this is a V2 API stream\n\n    Returns:\n        V2: List of transformed events. V1: Single transformed event dict.\n    \"\"\"\n    if endpoint == \"chat\":\n        if is_v2:\n            content_type = \"text\"\n            content_value = \"\"\n            message = oci_event.get(\"message\")\n\n            if \"message\" in oci_event and not isinstance(message, dict):\n                raise TypeError(\"OCI V2 stream event message must be an object\")\n\n            if isinstance(message, dict) and \"content\" in message:\n                content_list = message[\"content\"]\n                if content_list and isinstance(content_list, list) and len(content_list) > 0:\n                    first_content = content_list[0]\n                    oci_type = first_content.get(\"type\", \"TEXT\").upper()\n                    if oci_type == \"THINKING\":\n                        content_type = \"thinking\"\n                        content_value = first_content.get(\"thinking\", \"\")\n                    else:\n                        content_type = \"text\"\n                        content_value = first_content.get(\"text\", \"\")\n\n            events: typing.List[typing.Dict[str, typing.Any]] = []\n            if content_value:\n                delta_content: typing.Dict[str, typing.Any] = {}\n                if content_type == \"thinking\":\n                    delta_content[\"thinking\"] = content_value\n                else:\n                    delta_content[\"text\"] = content_value\n\n                events.append(\n                    {\n                        \"type\": \"content-delta\",\n                        \"index\": 0,\n                        \"delta\": {\n                            \"message\": {\n                                \"content\": delta_content,\n                            }\n                        },\n                    }\n                )\n\n            if \"finishReason\" in oci_event:\n                events.append(\n                    {\n                        \"type\": \"content-end\",\n                        \"index\": 0,\n                    }\n                )\n\n            return events\n\n        # V1 stream event\n        return {\n            \"event_type\": \"text-generation\",\n            \"text\": oci_event.get(\"text\", \"\"),\n            \"is_finished\": oci_event.get(\"isFinished\", False),\n        }\n\n    return [] if is_v2 else {}\n"
  },
  {
    "path": "src/cohere/overrides.py",
    "content": "import typing\nimport uuid\n\nfrom . import EmbedByTypeResponseEmbeddings\nfrom .core.pydantic_utilities import _get_model_fields, Model, IS_PYDANTIC_V2\n\nfrom pprint import pprint\n\n\ndef get_fields(obj) -> typing.List[str]:\n    return [str(x) for x in _get_model_fields(obj).keys()]\n\n\ndef get_aliases_or_field(obj) -> typing.List[str]:\n    return [\n        field_info.alias or (field_info and field_info.metadata and field_info.metadata[0] and field_info.metadata[0].alias) or field_name # type: ignore\n        for field_name, field_info\n        in _get_model_fields(obj).items()\n    ]\n\n\ndef get_aliases_and_fields(obj):\n    # merge and dedup get_fields(obj), get_aliases_or_field(obj)\n    return list(set(get_fields(obj) + get_aliases_or_field(obj)))\n\n\ndef allow_access_to_aliases(self: typing.Type[\"Model\"], name):\n    for field_name, field_info in _get_model_fields(self).items():\n        alias = field_info.alias or (\n                    field_info and field_info.metadata and field_info.metadata[0] and field_info.metadata[0].alias) # type: ignore\n        if alias == name or field_name == name:\n            return getattr(self, field_name)\n    raise AttributeError(\n        f\"'{type(self).__name__}' object has no attribute '{name}'\")\n\n\ndef make_tool_call_v2_id_optional(cls):\n    \"\"\"\n    Override ToolCallV2 to make the 'id' field optional with a default UUID.\n    This ensures backward compatibility with code that doesn't provide an id.\n\n    We wrap the __init__ method to inject a default id before Pydantic validation runs.\n    \"\"\"\n    # Store the original __init__ method\n    original_init = cls.__init__\n\n    def patched_init(self, /, **data):\n        \"\"\"Patched __init__ that injects default id if not provided.\"\"\"\n        # Inject default UUID if 'id' is not in the data\n        if 'id' not in data:\n            data['id'] = str(uuid.uuid4())\n\n        # Call the original __init__ with modified data\n        original_init(self, **data)\n\n    # Replace the __init__ method\n    cls.__init__ = patched_init\n\n    return cls\n\n\ndef run_overrides():\n    \"\"\"\n        These are overrides to allow us to make changes to generated code without touching the generated files themselves.\n        Should be used judiciously!\n    \"\"\"\n\n    # Override to allow access to aliases in EmbedByTypeResponseEmbeddings eg embeddings.float rather than embeddings.float_\n    setattr(EmbedByTypeResponseEmbeddings, \"__getattr__\", allow_access_to_aliases)\n\n    # Import ToolCallV2 lazily to avoid circular dependency issues\n    from . import ToolCallV2\n\n    # Override ToolCallV2 to make id field optional with default UUID\n    make_tool_call_v2_id_optional(ToolCallV2)\n\n\n# Run overrides immediately at module import time to ensure they're applied\n# before any code tries to use the modified classes\nrun_overrides()\n"
  },
  {
    "path": "src/cohere/py.typed",
    "content": ""
  },
  {
    "path": "src/cohere/raw_base_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport contextlib\nimport json\nimport typing\nfrom json.decoder import JSONDecodeError\n\nfrom .core.api_error import ApiError\nfrom .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom .core.http_response import AsyncHttpResponse, HttpResponse\nfrom .core.parse_error import ParsingError\nfrom .core.request_options import RequestOptions\nfrom .core.serialization import convert_and_respect_annotation_metadata\nfrom .core.unchecked_base_model import construct_type\nfrom .errors.bad_request_error import BadRequestError\nfrom .errors.client_closed_request_error import ClientClosedRequestError\nfrom .errors.forbidden_error import ForbiddenError\nfrom .errors.gateway_timeout_error import GatewayTimeoutError\nfrom .errors.internal_server_error import InternalServerError\nfrom .errors.invalid_token_error import InvalidTokenError\nfrom .errors.not_found_error import NotFoundError\nfrom .errors.not_implemented_error import NotImplementedError\nfrom .errors.service_unavailable_error import ServiceUnavailableError\nfrom .errors.too_many_requests_error import TooManyRequestsError\nfrom .errors.unauthorized_error import UnauthorizedError\nfrom .errors.unprocessable_entity_error import UnprocessableEntityError\nfrom .types.chat_connector import ChatConnector\nfrom .types.chat_document import ChatDocument\nfrom .types.chat_request_citation_quality import ChatRequestCitationQuality\nfrom .types.chat_request_prompt_truncation import ChatRequestPromptTruncation\nfrom .types.chat_request_safety_mode import ChatRequestSafetyMode\nfrom .types.chat_stream_request_citation_quality import ChatStreamRequestCitationQuality\nfrom .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation\nfrom .types.chat_stream_request_safety_mode import ChatStreamRequestSafetyMode\nfrom .types.check_api_key_response import CheckApiKeyResponse\nfrom .types.classify_example import ClassifyExample\nfrom .types.classify_request_truncate import ClassifyRequestTruncate\nfrom .types.classify_response import ClassifyResponse\nfrom .types.detokenize_response import DetokenizeResponse\nfrom .types.embed_input_type import EmbedInputType\nfrom .types.embed_request_truncate import EmbedRequestTruncate\nfrom .types.embed_response import EmbedResponse\nfrom .types.embedding_type import EmbeddingType\nfrom .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods\nfrom .types.generate_request_truncate import GenerateRequestTruncate\nfrom .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods\nfrom .types.generate_stream_request_truncate import GenerateStreamRequestTruncate\nfrom .types.generate_streamed_response import GenerateStreamedResponse\nfrom .types.generation import Generation\nfrom .types.message import Message\nfrom .types.non_streamed_chat_response import NonStreamedChatResponse\nfrom .types.rerank_request_documents_item import RerankRequestDocumentsItem\nfrom .types.rerank_response import RerankResponse\nfrom .types.response_format import ResponseFormat\nfrom .types.streamed_chat_response import StreamedChatResponse\nfrom .types.summarize_request_extractiveness import SummarizeRequestExtractiveness\nfrom .types.summarize_request_format import SummarizeRequestFormat\nfrom .types.summarize_request_length import SummarizeRequestLength\nfrom .types.summarize_response import SummarizeResponse\nfrom .types.tokenize_response import TokenizeResponse\nfrom .types.tool import Tool\nfrom .types.tool_result import ToolResult\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawBaseCohere:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    @contextlib.contextmanager\n    def chat_stream(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[HttpResponse[typing.Iterator[StreamedChatResponse]]]:\n        \"\"\"\n        Generates a streamed text response to a user message.\n\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[HttpResponse[typing.Iterator[StreamedChatResponse]]]\n\n        \"\"\"\n        with self._client_wrapper.httpx_client.stream(\n            \"v1/chat\",\n            method=\"POST\",\n            json={\n                \"message\": message,\n                \"model\": model,\n                \"preamble\": preamble,\n                \"chat_history\": convert_and_respect_annotation_metadata(\n                    object_=chat_history, annotation=typing.Sequence[Message], direction=\"write\"\n                ),\n                \"conversation_id\": conversation_id,\n                \"prompt_truncation\": prompt_truncation,\n                \"connectors\": convert_and_respect_annotation_metadata(\n                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction=\"write\"\n                ),\n                \"search_queries_only\": search_queries_only,\n                \"documents\": documents,\n                \"citation_quality\": citation_quality,\n                \"temperature\": temperature,\n                \"max_tokens\": max_tokens,\n                \"max_input_tokens\": max_input_tokens,\n                \"k\": k,\n                \"p\": p,\n                \"seed\": seed,\n                \"stop_sequences\": stop_sequences,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"raw_prompting\": raw_prompting,\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[Tool], direction=\"write\"\n                ),\n                \"tool_results\": convert_and_respect_annotation_metadata(\n                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction=\"write\"\n                ),\n                \"force_single_step\": force_single_step,\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormat, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n                \"Accepts\": str(accepts) if accepts is not None else None,\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            def _stream() -> HttpResponse[typing.Iterator[StreamedChatResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        def _iter():\n                            for _text in _response.iter_lines():\n                                try:\n                                    if len(_text) == 0:\n                                        continue\n                                    yield typing.cast(\n                                        StreamedChatResponse,\n                                        construct_type(\n                                            type_=StreamedChatResponse,  # type: ignore\n                                            object_=json.loads(_text),\n                                        ),\n                                    )\n                                except Exception:\n                                    pass\n                            return\n\n                        return HttpResponse(response=_response, data=_iter())\n                    _response.read()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield _stream()\n\n    def chat(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[NonStreamedChatResponse]:\n        \"\"\"\n        Generates a text response to a user message.\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[NonStreamedChatResponse]\n\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/chat\",\n            method=\"POST\",\n            json={\n                \"message\": message,\n                \"model\": model,\n                \"preamble\": preamble,\n                \"chat_history\": convert_and_respect_annotation_metadata(\n                    object_=chat_history, annotation=typing.Sequence[Message], direction=\"write\"\n                ),\n                \"conversation_id\": conversation_id,\n                \"prompt_truncation\": prompt_truncation,\n                \"connectors\": convert_and_respect_annotation_metadata(\n                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction=\"write\"\n                ),\n                \"search_queries_only\": search_queries_only,\n                \"documents\": documents,\n                \"citation_quality\": citation_quality,\n                \"temperature\": temperature,\n                \"max_tokens\": max_tokens,\n                \"max_input_tokens\": max_input_tokens,\n                \"k\": k,\n                \"p\": p,\n                \"seed\": seed,\n                \"stop_sequences\": stop_sequences,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"raw_prompting\": raw_prompting,\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[Tool], direction=\"write\"\n                ),\n                \"tool_results\": convert_and_respect_annotation_metadata(\n                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction=\"write\"\n                ),\n                \"force_single_step\": force_single_step,\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormat, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n                \"Accepts\": str(accepts) if accepts is not None else None,\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    NonStreamedChatResponse,\n                    construct_type(\n                        type_=NonStreamedChatResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    @contextlib.contextmanager\n    def generate_stream(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamedResponse]]]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat with Streaming API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateStreamRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[HttpResponse[typing.Iterator[GenerateStreamedResponse]]]\n\n        \"\"\"\n        with self._client_wrapper.httpx_client.stream(\n            \"v1/generate\",\n            method=\"POST\",\n            json={\n                \"prompt\": prompt,\n                \"model\": model,\n                \"num_generations\": num_generations,\n                \"max_tokens\": max_tokens,\n                \"truncate\": truncate,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"preset\": preset,\n                \"end_sequences\": end_sequences,\n                \"stop_sequences\": stop_sequences,\n                \"k\": k,\n                \"p\": p,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"return_likelihoods\": return_likelihoods,\n                \"raw_prompting\": raw_prompting,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            def _stream() -> HttpResponse[typing.Iterator[GenerateStreamedResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        def _iter():\n                            for _text in _response.iter_lines():\n                                try:\n                                    if len(_text) == 0:\n                                        continue\n                                    yield typing.cast(\n                                        GenerateStreamedResponse,\n                                        construct_type(\n                                            type_=GenerateStreamedResponse,  # type: ignore\n                                            object_=json.loads(_text),\n                                        ),\n                                    )\n                                except Exception:\n                                    pass\n                            return\n\n                        return HttpResponse(response=_response, data=_iter())\n                    _response.read()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield _stream()\n\n    def generate(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[Generation]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[Generation]\n\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/generate\",\n            method=\"POST\",\n            json={\n                \"prompt\": prompt,\n                \"model\": model,\n                \"num_generations\": num_generations,\n                \"max_tokens\": max_tokens,\n                \"truncate\": truncate,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"preset\": preset,\n                \"end_sequences\": end_sequences,\n                \"stop_sequences\": stop_sequences,\n                \"k\": k,\n                \"p\": p,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"return_likelihoods\": return_likelihoods,\n                \"raw_prompting\": raw_prompting,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    Generation,\n                    construct_type(\n                        type_=Generation,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[EmbedResponse]:\n        \"\"\"\n        This endpoint returns text and image embeddings. An embedding is a list of floating point numbers that captures semantic information about the content that it represents.\n\n        Embeddings can be used to create classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Images are only supported with Embed v3.0 and newer models.\n\n        model : typing.Optional[str]\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : typing.Optional[EmbedInputType]\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[EmbedResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/embed\",\n            method=\"POST\",\n            json={\n                \"texts\": texts,\n                \"images\": images,\n                \"model\": model,\n                \"input_type\": input_type,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedResponse,\n                    construct_type(\n                        type_=EmbedResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def rerank(\n        self,\n        *,\n        query: str,\n        documents: typing.Sequence[RerankRequestDocumentsItem],\n        model: typing.Optional[str] = OMIT,\n        top_n: typing.Optional[int] = OMIT,\n        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,\n        return_documents: typing.Optional[bool] = OMIT,\n        max_chunks_per_doc: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[RerankResponse]:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        query : str\n            The search query\n\n        documents : typing.Sequence[RerankRequestDocumentsItem]\n            A list of document objects or strings to rerank.\n            If a document is provided the text fields is required and all other fields will be preserved in the response.\n\n            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.\n\n            We recommend a maximum of 1,000 documents for optimal endpoint performance.\n\n        model : typing.Optional[str]\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        top_n : typing.Optional[int]\n            The number of most relevant documents or indices to return, defaults to the length of the documents\n\n        rank_fields : typing.Optional[typing.Sequence[str]]\n            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.\n\n        return_documents : typing.Optional[bool]\n            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.\n            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.\n\n        max_chunks_per_doc : typing.Optional[int]\n            The maximum number of chunks to produce internally from a document\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[RerankResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/rerank\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"query\": query,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction=\"write\"\n                ),\n                \"top_n\": top_n,\n                \"rank_fields\": rank_fields,\n                \"return_documents\": return_documents,\n                \"max_chunks_per_doc\": max_chunks_per_doc,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    RerankResponse,\n                    construct_type(\n                        type_=RerankResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def classify(\n        self,\n        *,\n        inputs: typing.Sequence[str],\n        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[ClassifyResponse]:\n        \"\"\"\n        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.\n        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        Parameters\n        ----------\n        inputs : typing.Sequence[str]\n            A list of up to 96 texts to be classified. Each one must be a non-empty string.\n            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the \"max tokens\" column [here](https://docs.cohere.com/docs/models).\n            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.\n\n        examples : typing.Optional[typing.Sequence[ClassifyExample]]\n            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: \"...\",label: \"...\"}`.\n            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        model : typing.Optional[str]\n            ID of a [Fine-tuned](https://docs.cohere.com/v2/docs/classify-starting-the-training) Classify model\n\n        preset : typing.Optional[str]\n            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.\n\n        truncate : typing.Optional[ClassifyRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[ClassifyResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/classify\",\n            method=\"POST\",\n            json={\n                \"inputs\": inputs,\n                \"examples\": convert_and_respect_annotation_metadata(\n                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction=\"write\"\n                ),\n                \"model\": model,\n                \"preset\": preset,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ClassifyResponse,\n                    construct_type(\n                        type_=ClassifyResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def summarize(\n        self,\n        *,\n        text: str,\n        length: typing.Optional[SummarizeRequestLength] = OMIT,\n        format: typing.Optional[SummarizeRequestFormat] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        additional_command: typing.Optional[str] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[SummarizeResponse]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates a summary in English for a given text.\n\n        Parameters\n        ----------\n        text : str\n            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.\n\n        length : typing.Optional[SummarizeRequestLength]\n            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.\n\n        format : typing.Optional[SummarizeRequestFormat]\n            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, \"light\" models are faster, while larger models will perform better.\n\n        extractiveness : typing.Optional[SummarizeRequestExtractiveness]\n            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.\n\n        temperature : typing.Optional[float]\n            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.\n\n        additional_command : typing.Optional[str]\n            A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". Eg. \"focusing on the next steps\" or \"written by Yoda\"\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[SummarizeResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/summarize\",\n            method=\"POST\",\n            json={\n                \"text\": text,\n                \"length\": length,\n                \"format\": format,\n                \"model\": model,\n                \"extractiveness\": extractiveness,\n                \"temperature\": temperature,\n                \"additional_command\": additional_command,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    SummarizeResponse,\n                    construct_type(\n                        type_=SummarizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def tokenize(\n        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[TokenizeResponse]:\n        \"\"\"\n        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        text : str\n            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.\n\n        model : str\n            The input will be tokenized by the tokenizer that is used by this model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[TokenizeResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/tokenize\",\n            method=\"POST\",\n            json={\n                \"text\": text,\n                \"model\": model,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    TokenizeResponse,\n                    construct_type(\n                        type_=TokenizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def detokenize(\n        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[DetokenizeResponse]:\n        \"\"\"\n        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        tokens : typing.Sequence[int]\n            The list of tokens to be detokenized.\n\n        model : str\n            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[DetokenizeResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/detokenize\",\n            method=\"POST\",\n            json={\n                \"tokens\": tokens,\n                \"model\": model,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DetokenizeResponse,\n                    construct_type(\n                        type_=DetokenizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def check_api_key(\n        self, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> HttpResponse[CheckApiKeyResponse]:\n        \"\"\"\n        Checks that the api key in the Authorization header is valid and active\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[CheckApiKeyResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v1/check-api-key\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CheckApiKeyResponse,\n                    construct_type(\n                        type_=CheckApiKeyResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawBaseCohere:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    @contextlib.asynccontextmanager\n    async def chat_stream(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatStreamRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatStreamRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[StreamedChatResponse]]]:\n        \"\"\"\n        Generates a streamed text response to a user message.\n\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatStreamRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatStreamRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[StreamedChatResponse]]]\n\n        \"\"\"\n        async with self._client_wrapper.httpx_client.stream(\n            \"v1/chat\",\n            method=\"POST\",\n            json={\n                \"message\": message,\n                \"model\": model,\n                \"preamble\": preamble,\n                \"chat_history\": convert_and_respect_annotation_metadata(\n                    object_=chat_history, annotation=typing.Sequence[Message], direction=\"write\"\n                ),\n                \"conversation_id\": conversation_id,\n                \"prompt_truncation\": prompt_truncation,\n                \"connectors\": convert_and_respect_annotation_metadata(\n                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction=\"write\"\n                ),\n                \"search_queries_only\": search_queries_only,\n                \"documents\": documents,\n                \"citation_quality\": citation_quality,\n                \"temperature\": temperature,\n                \"max_tokens\": max_tokens,\n                \"max_input_tokens\": max_input_tokens,\n                \"k\": k,\n                \"p\": p,\n                \"seed\": seed,\n                \"stop_sequences\": stop_sequences,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"raw_prompting\": raw_prompting,\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[Tool], direction=\"write\"\n                ),\n                \"tool_results\": convert_and_respect_annotation_metadata(\n                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction=\"write\"\n                ),\n                \"force_single_step\": force_single_step,\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormat, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n                \"Accepts\": str(accepts) if accepts is not None else None,\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[StreamedChatResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        async def _iter():\n                            async for _text in _response.aiter_lines():\n                                try:\n                                    if len(_text) == 0:\n                                        continue\n                                    yield typing.cast(\n                                        StreamedChatResponse,\n                                        construct_type(\n                                            type_=StreamedChatResponse,  # type: ignore\n                                            object_=json.loads(_text),\n                                        ),\n                                    )\n                                except Exception:\n                                    pass\n                            return\n\n                        return AsyncHttpResponse(response=_response, data=_iter())\n                    await _response.aread()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield await _stream()\n\n    async def chat(\n        self,\n        *,\n        message: str,\n        accepts: typing.Optional[typing.Literal[\"text/event-stream\"]] = None,\n        model: typing.Optional[str] = OMIT,\n        preamble: typing.Optional[str] = OMIT,\n        chat_history: typing.Optional[typing.Sequence[Message]] = OMIT,\n        conversation_id: typing.Optional[str] = OMIT,\n        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,\n        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,\n        search_queries_only: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,\n        citation_quality: typing.Optional[ChatRequestCitationQuality] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        max_input_tokens: typing.Optional[int] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,\n        tool_results: typing.Optional[typing.Sequence[ToolResult]] = OMIT,\n        force_single_step: typing.Optional[bool] = OMIT,\n        response_format: typing.Optional[ResponseFormat] = OMIT,\n        safety_mode: typing.Optional[ChatRequestSafetyMode] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[NonStreamedChatResponse]:\n        \"\"\"\n        Generates a text response to a user message.\n        To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/docs/chat-api).\n\n        Parameters\n        ----------\n        message : str\n            Text input for the model to respond to.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        accepts : typing.Optional[typing.Literal[\"text/event-stream\"]]\n            Pass text/event-stream to receive the streamed response as server-sent events. The default is `\\\\n` delimited events.\n\n        model : typing.Optional[str]\n            The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.\n\n            Compatible Deployments: Cohere Platform, Private Deployments\n\n        preamble : typing.Optional[str]\n            When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.\n\n            The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        chat_history : typing.Optional[typing.Sequence[Message]]\n            A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.\n\n            Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n            The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        conversation_id : typing.Optional[str]\n            An alternative to `chat_history`.\n\n            Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.\n\n            Compatible Deployments: Cohere Platform\n\n        prompt_truncation : typing.Optional[ChatRequestPromptTruncation]\n            Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.\n\n            Dictates how the prompt will be constructed.\n\n            With `prompt_truncation` set to \"AUTO\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.\n\n            With `prompt_truncation` set to \"AUTO_PRESERVE_ORDER\", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.\n\n            With `prompt_truncation` set to \"OFF\", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.\n\n            Compatible Deployments:\n             - AUTO: Cohere Platform Only\n             - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        connectors : typing.Optional[typing.Sequence[ChatConnector]]\n            Accepts `{\"id\": \"web-search\"}`, and/or the `\"id\"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/v1/docs/creating-and-deploying-a-connector) one.\n\n            When specified, the model's reply will be enriched with information found by querying each of the connectors (RAG).\n\n            Compatible Deployments: Cohere Platform\n\n        search_queries_only : typing.Optional[bool]\n            Defaults to `false`.\n\n            When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        documents : typing.Optional[typing.Sequence[ChatDocument]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.\n\n            Example:\n            ```\n            [\n              { \"title\": \"Tall penguins\", \"text\": \"Emperor penguins are the tallest.\" },\n              { \"title\": \"Penguin habitats\", \"text\": \"Emperor penguins only live in Antarctica.\" },\n            ]\n            ```\n\n            Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.\n\n            Some suggested keys are \"text\", \"author\", and \"date\". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.\n\n            An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.\n\n            An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The \"_excludes\" field will not be passed to the model.\n\n            See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        citation_quality : typing.Optional[ChatRequestCitationQuality]\n            Defaults to `\"enabled\"`.\n            Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        max_input_tokens : typing.Optional[int]\n            The maximum number of input tokens to send to the model. If not specified, `max_input_tokens` is the model's context length limit minus a small buffer.\n\n            Input will be truncated according to the `prompt_truncation` parameter.\n\n            Compatible Deployments: Cohere Platform\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without\n            any pre-processing.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tools : typing.Optional[typing.Sequence[Tool]]\n            A list of available tools (functions) that the model may suggest invoking before producing a text response.\n\n            When `tools` is passed (without `tool_results`), the `text` field in the response will be `\"\"` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        tool_results : typing.Optional[typing.Sequence[ToolResult]]\n            A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.\n            Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.\n\n            **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{\"status\": 200}`), make sure to wrap it in a list.\n            ```\n            tool_results = [\n              {\n                \"call\": {\n                  \"name\": <tool name>,\n                  \"parameters\": {\n                    <param name>: <param value>\n                  }\n                },\n                \"outputs\": [{\n                  <key>: <value>\n                }]\n              },\n              ...\n            ]\n            ```\n            **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        force_single_step : typing.Optional[bool]\n            Forces the chat to be single step. Defaults to `false`.\n\n        response_format : typing.Optional[ResponseFormat]\n\n        safety_mode : typing.Optional[ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `NONE` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[NonStreamedChatResponse]\n\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/chat\",\n            method=\"POST\",\n            json={\n                \"message\": message,\n                \"model\": model,\n                \"preamble\": preamble,\n                \"chat_history\": convert_and_respect_annotation_metadata(\n                    object_=chat_history, annotation=typing.Sequence[Message], direction=\"write\"\n                ),\n                \"conversation_id\": conversation_id,\n                \"prompt_truncation\": prompt_truncation,\n                \"connectors\": convert_and_respect_annotation_metadata(\n                    object_=connectors, annotation=typing.Sequence[ChatConnector], direction=\"write\"\n                ),\n                \"search_queries_only\": search_queries_only,\n                \"documents\": documents,\n                \"citation_quality\": citation_quality,\n                \"temperature\": temperature,\n                \"max_tokens\": max_tokens,\n                \"max_input_tokens\": max_input_tokens,\n                \"k\": k,\n                \"p\": p,\n                \"seed\": seed,\n                \"stop_sequences\": stop_sequences,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"raw_prompting\": raw_prompting,\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[Tool], direction=\"write\"\n                ),\n                \"tool_results\": convert_and_respect_annotation_metadata(\n                    object_=tool_results, annotation=typing.Sequence[ToolResult], direction=\"write\"\n                ),\n                \"force_single_step\": force_single_step,\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormat, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n                \"Accepts\": str(accepts) if accepts is not None else None,\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    NonStreamedChatResponse,\n                    construct_type(\n                        type_=NonStreamedChatResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    @contextlib.asynccontextmanager\n    async def generate_stream(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamedResponse]]]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat with Streaming API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateStreamRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateStreamRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[GenerateStreamedResponse]]]\n\n        \"\"\"\n        async with self._client_wrapper.httpx_client.stream(\n            \"v1/generate\",\n            method=\"POST\",\n            json={\n                \"prompt\": prompt,\n                \"model\": model,\n                \"num_generations\": num_generations,\n                \"max_tokens\": max_tokens,\n                \"truncate\": truncate,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"preset\": preset,\n                \"end_sequences\": end_sequences,\n                \"stop_sequences\": stop_sequences,\n                \"k\": k,\n                \"p\": p,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"return_likelihoods\": return_likelihoods,\n                \"raw_prompting\": raw_prompting,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[GenerateStreamedResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        async def _iter():\n                            async for _text in _response.aiter_lines():\n                                try:\n                                    if len(_text) == 0:\n                                        continue\n                                    yield typing.cast(\n                                        GenerateStreamedResponse,\n                                        construct_type(\n                                            type_=GenerateStreamedResponse,  # type: ignore\n                                            object_=json.loads(_text),\n                                        ),\n                                    )\n                                except Exception:\n                                    pass\n                            return\n\n                        return AsyncHttpResponse(response=_response, data=_iter())\n                    await _response.aread()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield await _stream()\n\n    async def generate(\n        self,\n        *,\n        prompt: str,\n        model: typing.Optional[str] = OMIT,\n        num_generations: typing.Optional[int] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,\n        raw_prompting: typing.Optional[bool] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[Generation]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates realistic text conditioned on a given input.\n\n        Parameters\n        ----------\n        prompt : str\n            The input text that serves as the starting point for generating the response.\n            Note: The prompt will be pre-processed and modified before reaching the model.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).\n            Smaller, \"light\" models are faster, while larger models will perform better. [Custom models](https://docs.cohere.com/docs/training-custom-models) can also be supplied with their full ID.\n\n        num_generations : typing.Optional[int]\n            The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.\n\n            This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.\n\n            Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.\n\n        truncate : typing.Optional[GenerateRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        temperature : typing.Optional[float]\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.\n            Defaults to `0.75`, min value of `0.0`, max value of `5.0`.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n            Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments\n\n        preset : typing.Optional[str]\n            Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.com/playground/generate).\n            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.\n\n        end_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.\n\n        k : typing.Optional[int]\n            Ensures only the top `k` most likely tokens are considered for generation at each step.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        frequency_penalty : typing.Optional[float]\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n\n            Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n            Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.\n\n        return_likelihoods : typing.Optional[GenerateRequestReturnLikelihoods]\n            One of `GENERATION|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.\n\n            If `GENERATION` is selected, the token likelihoods will only be provided for generated text.\n\n            WARNING: `ALL` is deprecated, and will be removed in a future release.\n\n        raw_prompting : typing.Optional[bool]\n            When enabled, the user's prompt will be sent to the model without any pre-processing.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[Generation]\n\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/generate\",\n            method=\"POST\",\n            json={\n                \"prompt\": prompt,\n                \"model\": model,\n                \"num_generations\": num_generations,\n                \"max_tokens\": max_tokens,\n                \"truncate\": truncate,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"preset\": preset,\n                \"end_sequences\": end_sequences,\n                \"stop_sequences\": stop_sequences,\n                \"k\": k,\n                \"p\": p,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"return_likelihoods\": return_likelihoods,\n                \"raw_prompting\": raw_prompting,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    Generation,\n                    construct_type(\n                        type_=Generation,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def embed(\n        self,\n        *,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        input_type: typing.Optional[EmbedInputType] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[EmbedResponse]:\n        \"\"\"\n        This endpoint returns text and image embeddings. An embedding is a list of floating point numbers that captures semantic information about the content that it represents.\n\n        Embeddings can be used to create classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Images are only supported with Embed v3.0 and newer models.\n\n        model : typing.Optional[str]\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : typing.Optional[EmbedInputType]\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[EmbedResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/embed\",\n            method=\"POST\",\n            json={\n                \"texts\": texts,\n                \"images\": images,\n                \"model\": model,\n                \"input_type\": input_type,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedResponse,\n                    construct_type(\n                        type_=EmbedResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def rerank(\n        self,\n        *,\n        query: str,\n        documents: typing.Sequence[RerankRequestDocumentsItem],\n        model: typing.Optional[str] = OMIT,\n        top_n: typing.Optional[int] = OMIT,\n        rank_fields: typing.Optional[typing.Sequence[str]] = OMIT,\n        return_documents: typing.Optional[bool] = OMIT,\n        max_chunks_per_doc: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[RerankResponse]:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        query : str\n            The search query\n\n        documents : typing.Sequence[RerankRequestDocumentsItem]\n            A list of document objects or strings to rerank.\n            If a document is provided the text fields is required and all other fields will be preserved in the response.\n\n            The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.\n\n            We recommend a maximum of 1,000 documents for optimal endpoint performance.\n\n        model : typing.Optional[str]\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        top_n : typing.Optional[int]\n            The number of most relevant documents or indices to return, defaults to the length of the documents\n\n        rank_fields : typing.Optional[typing.Sequence[str]]\n            If a JSON object is provided, you can specify which keys you would like to have considered for reranking. The model will rerank based on order of the fields passed in (i.e. rank_fields=['title','author','text'] will rerank using the values in title, author, text  sequentially. If the length of title, author, and text exceeds the context length of the model, the chunking will not re-consider earlier fields). If not provided, the model will use the default text field for ranking.\n\n        return_documents : typing.Optional[bool]\n            - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.\n            - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.\n\n        max_chunks_per_doc : typing.Optional[int]\n            The maximum number of chunks to produce internally from a document\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[RerankResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/rerank\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"query\": query,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[RerankRequestDocumentsItem], direction=\"write\"\n                ),\n                \"top_n\": top_n,\n                \"rank_fields\": rank_fields,\n                \"return_documents\": return_documents,\n                \"max_chunks_per_doc\": max_chunks_per_doc,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    RerankResponse,\n                    construct_type(\n                        type_=RerankResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def classify(\n        self,\n        *,\n        inputs: typing.Sequence[str],\n        examples: typing.Optional[typing.Sequence[ClassifyExample]] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        preset: typing.Optional[str] = OMIT,\n        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[ClassifyResponse]:\n        \"\"\"\n        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.\n        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        Parameters\n        ----------\n        inputs : typing.Sequence[str]\n            A list of up to 96 texts to be classified. Each one must be a non-empty string.\n            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the \"max tokens\" column [here](https://docs.cohere.com/docs/models).\n            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.\n\n        examples : typing.Optional[typing.Sequence[ClassifyExample]]\n            An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: \"...\",label: \"...\"}`.\n            Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.\n\n        model : typing.Optional[str]\n            ID of a [Fine-tuned](https://docs.cohere.com/v2/docs/classify-starting-the-training) Classify model\n\n        preset : typing.Optional[str]\n            The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.com/playground). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.\n\n        truncate : typing.Optional[ClassifyRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[ClassifyResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/classify\",\n            method=\"POST\",\n            json={\n                \"inputs\": inputs,\n                \"examples\": convert_and_respect_annotation_metadata(\n                    object_=examples, annotation=typing.Sequence[ClassifyExample], direction=\"write\"\n                ),\n                \"model\": model,\n                \"preset\": preset,\n                \"truncate\": truncate,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    ClassifyResponse,\n                    construct_type(\n                        type_=ClassifyResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def summarize(\n        self,\n        *,\n        text: str,\n        length: typing.Optional[SummarizeRequestLength] = OMIT,\n        format: typing.Optional[SummarizeRequestFormat] = OMIT,\n        model: typing.Optional[str] = OMIT,\n        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        additional_command: typing.Optional[str] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[SummarizeResponse]:\n        \"\"\"\n        <Warning>\n        This API is marked as \"Legacy\" and is no longer maintained. Follow the [migration guide](https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.\n        </Warning>\n        Generates a summary in English for a given text.\n\n        Parameters\n        ----------\n        text : str\n            The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.\n\n        length : typing.Optional[SummarizeRequestLength]\n            One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.\n\n        format : typing.Optional[SummarizeRequestFormat]\n            One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.\n\n        model : typing.Optional[str]\n            The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, \"light\" models are faster, while larger models will perform better.\n\n        extractiveness : typing.Optional[SummarizeRequestExtractiveness]\n            One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.\n\n        temperature : typing.Optional[float]\n            Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.\n\n        additional_command : typing.Optional[str]\n            A free-form instruction for modifying how the summaries get generated. Should complete the sentence \"Generate a summary _\". Eg. \"focusing on the next steps\" or \"written by Yoda\"\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[SummarizeResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/summarize\",\n            method=\"POST\",\n            json={\n                \"text\": text,\n                \"length\": length,\n                \"format\": format,\n                \"model\": model,\n                \"extractiveness\": extractiveness,\n                \"temperature\": temperature,\n                \"additional_command\": additional_command,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    SummarizeResponse,\n                    construct_type(\n                        type_=SummarizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def tokenize(\n        self, *, text: str, model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[TokenizeResponse]:\n        \"\"\"\n        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        text : str\n            The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.\n\n        model : str\n            The input will be tokenized by the tokenizer that is used by this model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[TokenizeResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/tokenize\",\n            method=\"POST\",\n            json={\n                \"text\": text,\n                \"model\": model,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    TokenizeResponse,\n                    construct_type(\n                        type_=TokenizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def detokenize(\n        self, *, tokens: typing.Sequence[int], model: str, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[DetokenizeResponse]:\n        \"\"\"\n        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.\n\n        Parameters\n        ----------\n        tokens : typing.Sequence[int]\n            The list of tokens to be detokenized.\n\n        model : str\n            An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[DetokenizeResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/detokenize\",\n            method=\"POST\",\n            json={\n                \"tokens\": tokens,\n                \"model\": model,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    DetokenizeResponse,\n                    construct_type(\n                        type_=DetokenizeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def check_api_key(\n        self, *, request_options: typing.Optional[RequestOptions] = None\n    ) -> AsyncHttpResponse[CheckApiKeyResponse]:\n        \"\"\"\n        Checks that the api key in the Authorization header is valid and active\n\n        Parameters\n        ----------\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[CheckApiKeyResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v1/check-api-key\",\n            method=\"POST\",\n            request_options=request_options,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    CheckApiKeyResponse,\n                    construct_type(\n                        type_=CheckApiKeyResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/sagemaker_client.py",
    "content": "import typing\n\nfrom .aws_client import AwsClient, AwsClientV2\nfrom .manually_maintained.cohere_aws.client import Client\nfrom .manually_maintained.cohere_aws.mode import Mode\n\n\nclass SagemakerClient(AwsClient):\n    sagemaker_finetuning: Client\n\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n    ):\n        AwsClient.__init__(\n            self,\n            service=\"sagemaker\",\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n            timeout=timeout,\n        )\n        try:\n            self.sagemaker_finetuning = Client(aws_region=aws_region)\n        except Exception:\n            pass\n\n\nclass SagemakerClientV2(AwsClientV2):\n    sagemaker_finetuning: Client\n\n    def __init__(\n            self,\n            *,\n            aws_access_key: typing.Optional[str] = None,\n            aws_secret_key: typing.Optional[str] = None,\n            aws_session_token: typing.Optional[str] = None,\n            aws_region: typing.Optional[str] = None,\n            timeout: typing.Optional[float] = None,\n    ):\n        AwsClientV2.__init__(\n            self,\n            service=\"sagemaker\",\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n            timeout=timeout,\n        )\n        try:\n            self.sagemaker_finetuning = Client(aws_region=aws_region)\n        except Exception:\n            pass"
  },
  {
    "path": "src/cohere/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .api_meta import ApiMeta\n    from .api_meta_api_version import ApiMetaApiVersion\n    from .api_meta_billed_units import ApiMetaBilledUnits\n    from .api_meta_tokens import ApiMetaTokens\n    from .assistant_message import AssistantMessage\n    from .assistant_message_response import AssistantMessageResponse\n    from .assistant_message_response_content_item import (\n        AssistantMessageResponseContentItem,\n        TextAssistantMessageResponseContentItem,\n        ThinkingAssistantMessageResponseContentItem,\n    )\n    from .assistant_message_v2content import AssistantMessageV2Content\n    from .assistant_message_v2content_one_item import (\n        AssistantMessageV2ContentOneItem,\n        TextAssistantMessageV2ContentOneItem,\n        ThinkingAssistantMessageV2ContentOneItem,\n    )\n    from .auth_token_type import AuthTokenType\n    from .chat_citation import ChatCitation\n    from .chat_citation_generation_event import ChatCitationGenerationEvent\n    from .chat_citation_type import ChatCitationType\n    from .chat_connector import ChatConnector\n    from .chat_content_delta_event import ChatContentDeltaEvent\n    from .chat_content_delta_event_delta import ChatContentDeltaEventDelta\n    from .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage\n    from .chat_content_delta_event_delta_message_content import ChatContentDeltaEventDeltaMessageContent\n    from .chat_content_end_event import ChatContentEndEvent\n    from .chat_content_start_event import ChatContentStartEvent\n    from .chat_content_start_event_delta import ChatContentStartEventDelta\n    from .chat_content_start_event_delta_message import ChatContentStartEventDeltaMessage\n    from .chat_content_start_event_delta_message_content import ChatContentStartEventDeltaMessageContent\n    from .chat_content_start_event_delta_message_content_type import ChatContentStartEventDeltaMessageContentType\n    from .chat_data_metrics import ChatDataMetrics\n    from .chat_debug_event import ChatDebugEvent\n    from .chat_document import ChatDocument\n    from .chat_document_source import ChatDocumentSource\n    from .chat_finish_reason import ChatFinishReason\n    from .chat_message import ChatMessage\n    from .chat_message_end_event import ChatMessageEndEvent\n    from .chat_message_end_event_delta import ChatMessageEndEventDelta\n    from .chat_message_start_event import ChatMessageStartEvent\n    from .chat_message_start_event_delta import ChatMessageStartEventDelta\n    from .chat_message_start_event_delta_message import ChatMessageStartEventDeltaMessage\n    from .chat_message_v2 import (\n        AssistantChatMessageV2,\n        ChatMessageV2,\n        SystemChatMessageV2,\n        ToolChatMessageV2,\n        UserChatMessageV2,\n    )\n    from .chat_messages import ChatMessages\n    from .chat_request_citation_quality import ChatRequestCitationQuality\n    from .chat_request_prompt_truncation import ChatRequestPromptTruncation\n    from .chat_request_safety_mode import ChatRequestSafetyMode\n    from .chat_search_queries_generation_event import ChatSearchQueriesGenerationEvent\n    from .chat_search_query import ChatSearchQuery\n    from .chat_search_result import ChatSearchResult\n    from .chat_search_result_connector import ChatSearchResultConnector\n    from .chat_search_results_event import ChatSearchResultsEvent\n    from .chat_stream_end_event import ChatStreamEndEvent\n    from .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason\n    from .chat_stream_event import ChatStreamEvent\n    from .chat_stream_event_type import ChatStreamEventType\n    from .chat_stream_request_citation_quality import ChatStreamRequestCitationQuality\n    from .chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation\n    from .chat_stream_request_safety_mode import ChatStreamRequestSafetyMode\n    from .chat_stream_start_event import ChatStreamStartEvent\n    from .chat_text_content import ChatTextContent\n    from .chat_text_generation_event import ChatTextGenerationEvent\n    from .chat_text_response_format import ChatTextResponseFormat\n    from .chat_text_response_format_v2 import ChatTextResponseFormatV2\n    from .chat_thinking_content import ChatThinkingContent\n    from .chat_tool_call_delta_event import ChatToolCallDeltaEvent\n    from .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta\n    from .chat_tool_call_delta_event_delta_message import ChatToolCallDeltaEventDeltaMessage\n    from .chat_tool_call_delta_event_delta_message_tool_calls import ChatToolCallDeltaEventDeltaMessageToolCalls\n    from .chat_tool_call_delta_event_delta_message_tool_calls_function import (\n        ChatToolCallDeltaEventDeltaMessageToolCallsFunction,\n    )\n    from .chat_tool_call_end_event import ChatToolCallEndEvent\n    from .chat_tool_call_start_event import ChatToolCallStartEvent\n    from .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta\n    from .chat_tool_call_start_event_delta_message import ChatToolCallStartEventDeltaMessage\n    from .chat_tool_calls_chunk_event import ChatToolCallsChunkEvent\n    from .chat_tool_calls_generation_event import ChatToolCallsGenerationEvent\n    from .chat_tool_message import ChatToolMessage\n    from .chat_tool_plan_delta_event import ChatToolPlanDeltaEvent\n    from .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta\n    from .chat_tool_plan_delta_event_delta_message import ChatToolPlanDeltaEventDeltaMessage\n    from .chat_tool_source import ChatToolSource\n    from .check_api_key_response import CheckApiKeyResponse\n    from .citation import Citation\n    from .citation_end_event import CitationEndEvent\n    from .citation_options import CitationOptions\n    from .citation_options_mode import CitationOptionsMode\n    from .citation_start_event import CitationStartEvent\n    from .citation_start_event_delta import CitationStartEventDelta\n    from .citation_start_event_delta_message import CitationStartEventDeltaMessage\n    from .citation_type import CitationType\n    from .classify_data_metrics import ClassifyDataMetrics\n    from .classify_example import ClassifyExample\n    from .classify_request_truncate import ClassifyRequestTruncate\n    from .classify_response import ClassifyResponse\n    from .classify_response_classifications_item import ClassifyResponseClassificationsItem\n    from .classify_response_classifications_item_classification_type import (\n        ClassifyResponseClassificationsItemClassificationType,\n    )\n    from .classify_response_classifications_item_labels_value import ClassifyResponseClassificationsItemLabelsValue\n    from .compatible_endpoint import CompatibleEndpoint\n    from .connector import Connector\n    from .connector_auth_status import ConnectorAuthStatus\n    from .connector_o_auth import ConnectorOAuth\n    from .content import Content, ImageUrlContent, TextContent\n    from .create_connector_o_auth import CreateConnectorOAuth\n    from .create_connector_response import CreateConnectorResponse\n    from .create_connector_service_auth import CreateConnectorServiceAuth\n    from .create_embed_job_response import CreateEmbedJobResponse\n    from .dataset import Dataset\n    from .dataset_part import DatasetPart\n    from .dataset_type import DatasetType\n    from .dataset_validation_status import DatasetValidationStatus\n    from .delete_connector_response import DeleteConnectorResponse\n    from .detokenize_response import DetokenizeResponse\n    from .document import Document\n    from .document_content import DocumentContent\n    from .embed_by_type_response import EmbedByTypeResponse\n    from .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings\n    from .embed_by_type_response_response_type import EmbedByTypeResponseResponseType\n    from .embed_content import EmbedContent, ImageUrlEmbedContent, TextEmbedContent\n    from .embed_floats_response import EmbedFloatsResponse\n    from .embed_image import EmbedImage\n    from .embed_image_url import EmbedImageUrl\n    from .embed_input import EmbedInput\n    from .embed_input_type import EmbedInputType\n    from .embed_job import EmbedJob\n    from .embed_job_status import EmbedJobStatus\n    from .embed_job_truncate import EmbedJobTruncate\n    from .embed_request_truncate import EmbedRequestTruncate\n    from .embed_response import EmbedResponse, EmbeddingsByTypeEmbedResponse, EmbeddingsFloatsEmbedResponse\n    from .embed_text import EmbedText\n    from .embedding_type import EmbeddingType\n    from .finetune_dataset_metrics import FinetuneDatasetMetrics\n    from .finish_reason import FinishReason\n    from .generate_request_return_likelihoods import GenerateRequestReturnLikelihoods\n    from .generate_request_truncate import GenerateRequestTruncate\n    from .generate_stream_end import GenerateStreamEnd\n    from .generate_stream_end_response import GenerateStreamEndResponse\n    from .generate_stream_error import GenerateStreamError\n    from .generate_stream_event import GenerateStreamEvent\n    from .generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods\n    from .generate_stream_request_truncate import GenerateStreamRequestTruncate\n    from .generate_stream_text import GenerateStreamText\n    from .generate_streamed_response import (\n        GenerateStreamedResponse,\n        StreamEndGenerateStreamedResponse,\n        StreamErrorGenerateStreamedResponse,\n        TextGenerationGenerateStreamedResponse,\n    )\n    from .generation import Generation\n    from .get_connector_response import GetConnectorResponse\n    from .get_model_response import GetModelResponse\n    from .get_model_response_sampling_defaults import GetModelResponseSamplingDefaults\n    from .image import Image\n    from .image_content import ImageContent\n    from .image_url import ImageUrl\n    from .image_url_detail import ImageUrlDetail\n    from .json_response_format import JsonResponseFormat\n    from .json_response_format_v2 import JsonResponseFormatV2\n    from .label_metric import LabelMetric\n    from .list_connectors_response import ListConnectorsResponse\n    from .list_embed_job_response import ListEmbedJobResponse\n    from .list_models_response import ListModelsResponse\n    from .logprob_item import LogprobItem\n    from .message import ChatbotMessage, Message, SystemMessage, ToolMessage, UserMessage\n    from .metrics import Metrics\n    from .non_streamed_chat_response import NonStreamedChatResponse\n    from .o_auth_authorize_response import OAuthAuthorizeResponse\n    from .parse_info import ParseInfo\n    from .rerank_document import RerankDocument\n    from .rerank_request_documents_item import RerankRequestDocumentsItem\n    from .rerank_response import RerankResponse\n    from .rerank_response_results_item import RerankResponseResultsItem\n    from .rerank_response_results_item_document import RerankResponseResultsItemDocument\n    from .reranker_data_metrics import RerankerDataMetrics\n    from .response_format import JsonObjectResponseFormat, ResponseFormat, TextResponseFormat\n    from .response_format_v2 import JsonObjectResponseFormatV2, ResponseFormatV2, TextResponseFormatV2\n    from .single_generation import SingleGeneration\n    from .single_generation_in_stream import SingleGenerationInStream\n    from .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem\n    from .source import DocumentSource, Source, ToolSource\n    from .streamed_chat_response import (\n        CitationGenerationStreamedChatResponse,\n        DebugStreamedChatResponse,\n        SearchQueriesGenerationStreamedChatResponse,\n        SearchResultsStreamedChatResponse,\n        StreamEndStreamedChatResponse,\n        StreamStartStreamedChatResponse,\n        StreamedChatResponse,\n        TextGenerationStreamedChatResponse,\n        ToolCallsChunkStreamedChatResponse,\n        ToolCallsGenerationStreamedChatResponse,\n    )\n    from .summarize_request_extractiveness import SummarizeRequestExtractiveness\n    from .summarize_request_format import SummarizeRequestFormat\n    from .summarize_request_length import SummarizeRequestLength\n    from .summarize_response import SummarizeResponse\n    from .system_message_v2 import SystemMessageV2\n    from .system_message_v2content import SystemMessageV2Content\n    from .system_message_v2content_one_item import SystemMessageV2ContentOneItem, TextSystemMessageV2ContentOneItem\n    from .thinking import Thinking\n    from .thinking_type import ThinkingType\n    from .tokenize_response import TokenizeResponse\n    from .tool import Tool\n    from .tool_call import ToolCall\n    from .tool_call_delta import ToolCallDelta\n    from .tool_call_v2 import ToolCallV2\n    from .tool_call_v2function import ToolCallV2Function\n    from .tool_content import DocumentToolContent, TextToolContent, ToolContent\n    from .tool_message_v2 import ToolMessageV2\n    from .tool_message_v2content import ToolMessageV2Content\n    from .tool_parameter_definitions_value import ToolParameterDefinitionsValue\n    from .tool_result import ToolResult\n    from .tool_v2 import ToolV2\n    from .tool_v2function import ToolV2Function\n    from .update_connector_response import UpdateConnectorResponse\n    from .usage import Usage\n    from .usage_billed_units import UsageBilledUnits\n    from .usage_tokens import UsageTokens\n    from .user_message_v2 import UserMessageV2\n    from .user_message_v2content import UserMessageV2Content\n_dynamic_imports: typing.Dict[str, str] = {\n    \"ApiMeta\": \".api_meta\",\n    \"ApiMetaApiVersion\": \".api_meta_api_version\",\n    \"ApiMetaBilledUnits\": \".api_meta_billed_units\",\n    \"ApiMetaTokens\": \".api_meta_tokens\",\n    \"AssistantChatMessageV2\": \".chat_message_v2\",\n    \"AssistantMessage\": \".assistant_message\",\n    \"AssistantMessageResponse\": \".assistant_message_response\",\n    \"AssistantMessageResponseContentItem\": \".assistant_message_response_content_item\",\n    \"AssistantMessageV2Content\": \".assistant_message_v2content\",\n    \"AssistantMessageV2ContentOneItem\": \".assistant_message_v2content_one_item\",\n    \"AuthTokenType\": \".auth_token_type\",\n    \"ChatCitation\": \".chat_citation\",\n    \"ChatCitationGenerationEvent\": \".chat_citation_generation_event\",\n    \"ChatCitationType\": \".chat_citation_type\",\n    \"ChatConnector\": \".chat_connector\",\n    \"ChatContentDeltaEvent\": \".chat_content_delta_event\",\n    \"ChatContentDeltaEventDelta\": \".chat_content_delta_event_delta\",\n    \"ChatContentDeltaEventDeltaMessage\": \".chat_content_delta_event_delta_message\",\n    \"ChatContentDeltaEventDeltaMessageContent\": \".chat_content_delta_event_delta_message_content\",\n    \"ChatContentEndEvent\": \".chat_content_end_event\",\n    \"ChatContentStartEvent\": \".chat_content_start_event\",\n    \"ChatContentStartEventDelta\": \".chat_content_start_event_delta\",\n    \"ChatContentStartEventDeltaMessage\": \".chat_content_start_event_delta_message\",\n    \"ChatContentStartEventDeltaMessageContent\": \".chat_content_start_event_delta_message_content\",\n    \"ChatContentStartEventDeltaMessageContentType\": \".chat_content_start_event_delta_message_content_type\",\n    \"ChatDataMetrics\": \".chat_data_metrics\",\n    \"ChatDebugEvent\": \".chat_debug_event\",\n    \"ChatDocument\": \".chat_document\",\n    \"ChatDocumentSource\": \".chat_document_source\",\n    \"ChatFinishReason\": \".chat_finish_reason\",\n    \"ChatMessage\": \".chat_message\",\n    \"ChatMessageEndEvent\": \".chat_message_end_event\",\n    \"ChatMessageEndEventDelta\": \".chat_message_end_event_delta\",\n    \"ChatMessageStartEvent\": \".chat_message_start_event\",\n    \"ChatMessageStartEventDelta\": \".chat_message_start_event_delta\",\n    \"ChatMessageStartEventDeltaMessage\": \".chat_message_start_event_delta_message\",\n    \"ChatMessageV2\": \".chat_message_v2\",\n    \"ChatMessages\": \".chat_messages\",\n    \"ChatRequestCitationQuality\": \".chat_request_citation_quality\",\n    \"ChatRequestPromptTruncation\": \".chat_request_prompt_truncation\",\n    \"ChatRequestSafetyMode\": \".chat_request_safety_mode\",\n    \"ChatSearchQueriesGenerationEvent\": \".chat_search_queries_generation_event\",\n    \"ChatSearchQuery\": \".chat_search_query\",\n    \"ChatSearchResult\": \".chat_search_result\",\n    \"ChatSearchResultConnector\": \".chat_search_result_connector\",\n    \"ChatSearchResultsEvent\": \".chat_search_results_event\",\n    \"ChatStreamEndEvent\": \".chat_stream_end_event\",\n    \"ChatStreamEndEventFinishReason\": \".chat_stream_end_event_finish_reason\",\n    \"ChatStreamEvent\": \".chat_stream_event\",\n    \"ChatStreamEventType\": \".chat_stream_event_type\",\n    \"ChatStreamRequestCitationQuality\": \".chat_stream_request_citation_quality\",\n    \"ChatStreamRequestPromptTruncation\": \".chat_stream_request_prompt_truncation\",\n    \"ChatStreamRequestSafetyMode\": \".chat_stream_request_safety_mode\",\n    \"ChatStreamStartEvent\": \".chat_stream_start_event\",\n    \"ChatTextContent\": \".chat_text_content\",\n    \"ChatTextGenerationEvent\": \".chat_text_generation_event\",\n    \"ChatTextResponseFormat\": \".chat_text_response_format\",\n    \"ChatTextResponseFormatV2\": \".chat_text_response_format_v2\",\n    \"ChatThinkingContent\": \".chat_thinking_content\",\n    \"ChatToolCallDeltaEvent\": \".chat_tool_call_delta_event\",\n    \"ChatToolCallDeltaEventDelta\": \".chat_tool_call_delta_event_delta\",\n    \"ChatToolCallDeltaEventDeltaMessage\": \".chat_tool_call_delta_event_delta_message\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCalls\": \".chat_tool_call_delta_event_delta_message_tool_calls\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCallsFunction\": \".chat_tool_call_delta_event_delta_message_tool_calls_function\",\n    \"ChatToolCallEndEvent\": \".chat_tool_call_end_event\",\n    \"ChatToolCallStartEvent\": \".chat_tool_call_start_event\",\n    \"ChatToolCallStartEventDelta\": \".chat_tool_call_start_event_delta\",\n    \"ChatToolCallStartEventDeltaMessage\": \".chat_tool_call_start_event_delta_message\",\n    \"ChatToolCallsChunkEvent\": \".chat_tool_calls_chunk_event\",\n    \"ChatToolCallsGenerationEvent\": \".chat_tool_calls_generation_event\",\n    \"ChatToolMessage\": \".chat_tool_message\",\n    \"ChatToolPlanDeltaEvent\": \".chat_tool_plan_delta_event\",\n    \"ChatToolPlanDeltaEventDelta\": \".chat_tool_plan_delta_event_delta\",\n    \"ChatToolPlanDeltaEventDeltaMessage\": \".chat_tool_plan_delta_event_delta_message\",\n    \"ChatToolSource\": \".chat_tool_source\",\n    \"ChatbotMessage\": \".message\",\n    \"CheckApiKeyResponse\": \".check_api_key_response\",\n    \"Citation\": \".citation\",\n    \"CitationEndEvent\": \".citation_end_event\",\n    \"CitationGenerationStreamedChatResponse\": \".streamed_chat_response\",\n    \"CitationOptions\": \".citation_options\",\n    \"CitationOptionsMode\": \".citation_options_mode\",\n    \"CitationStartEvent\": \".citation_start_event\",\n    \"CitationStartEventDelta\": \".citation_start_event_delta\",\n    \"CitationStartEventDeltaMessage\": \".citation_start_event_delta_message\",\n    \"CitationType\": \".citation_type\",\n    \"ClassifyDataMetrics\": \".classify_data_metrics\",\n    \"ClassifyExample\": \".classify_example\",\n    \"ClassifyRequestTruncate\": \".classify_request_truncate\",\n    \"ClassifyResponse\": \".classify_response\",\n    \"ClassifyResponseClassificationsItem\": \".classify_response_classifications_item\",\n    \"ClassifyResponseClassificationsItemClassificationType\": \".classify_response_classifications_item_classification_type\",\n    \"ClassifyResponseClassificationsItemLabelsValue\": \".classify_response_classifications_item_labels_value\",\n    \"CompatibleEndpoint\": \".compatible_endpoint\",\n    \"Connector\": \".connector\",\n    \"ConnectorAuthStatus\": \".connector_auth_status\",\n    \"ConnectorOAuth\": \".connector_o_auth\",\n    \"Content\": \".content\",\n    \"CreateConnectorOAuth\": \".create_connector_o_auth\",\n    \"CreateConnectorResponse\": \".create_connector_response\",\n    \"CreateConnectorServiceAuth\": \".create_connector_service_auth\",\n    \"CreateEmbedJobResponse\": \".create_embed_job_response\",\n    \"Dataset\": \".dataset\",\n    \"DatasetPart\": \".dataset_part\",\n    \"DatasetType\": \".dataset_type\",\n    \"DatasetValidationStatus\": \".dataset_validation_status\",\n    \"DebugStreamedChatResponse\": \".streamed_chat_response\",\n    \"DeleteConnectorResponse\": \".delete_connector_response\",\n    \"DetokenizeResponse\": \".detokenize_response\",\n    \"Document\": \".document\",\n    \"DocumentContent\": \".document_content\",\n    \"DocumentSource\": \".source\",\n    \"DocumentToolContent\": \".tool_content\",\n    \"EmbedByTypeResponse\": \".embed_by_type_response\",\n    \"EmbedByTypeResponseEmbeddings\": \".embed_by_type_response_embeddings\",\n    \"EmbedByTypeResponseResponseType\": \".embed_by_type_response_response_type\",\n    \"EmbedContent\": \".embed_content\",\n    \"EmbedFloatsResponse\": \".embed_floats_response\",\n    \"EmbedImage\": \".embed_image\",\n    \"EmbedImageUrl\": \".embed_image_url\",\n    \"EmbedInput\": \".embed_input\",\n    \"EmbedInputType\": \".embed_input_type\",\n    \"EmbedJob\": \".embed_job\",\n    \"EmbedJobStatus\": \".embed_job_status\",\n    \"EmbedJobTruncate\": \".embed_job_truncate\",\n    \"EmbedRequestTruncate\": \".embed_request_truncate\",\n    \"EmbedResponse\": \".embed_response\",\n    \"EmbedText\": \".embed_text\",\n    \"EmbeddingType\": \".embedding_type\",\n    \"EmbeddingsByTypeEmbedResponse\": \".embed_response\",\n    \"EmbeddingsFloatsEmbedResponse\": \".embed_response\",\n    \"FinetuneDatasetMetrics\": \".finetune_dataset_metrics\",\n    \"FinishReason\": \".finish_reason\",\n    \"GenerateRequestReturnLikelihoods\": \".generate_request_return_likelihoods\",\n    \"GenerateRequestTruncate\": \".generate_request_truncate\",\n    \"GenerateStreamEnd\": \".generate_stream_end\",\n    \"GenerateStreamEndResponse\": \".generate_stream_end_response\",\n    \"GenerateStreamError\": \".generate_stream_error\",\n    \"GenerateStreamEvent\": \".generate_stream_event\",\n    \"GenerateStreamRequestReturnLikelihoods\": \".generate_stream_request_return_likelihoods\",\n    \"GenerateStreamRequestTruncate\": \".generate_stream_request_truncate\",\n    \"GenerateStreamText\": \".generate_stream_text\",\n    \"GenerateStreamedResponse\": \".generate_streamed_response\",\n    \"Generation\": \".generation\",\n    \"GetConnectorResponse\": \".get_connector_response\",\n    \"GetModelResponse\": \".get_model_response\",\n    \"GetModelResponseSamplingDefaults\": \".get_model_response_sampling_defaults\",\n    \"Image\": \".image\",\n    \"ImageContent\": \".image_content\",\n    \"ImageUrl\": \".image_url\",\n    \"ImageUrlContent\": \".content\",\n    \"ImageUrlDetail\": \".image_url_detail\",\n    \"ImageUrlEmbedContent\": \".embed_content\",\n    \"JsonObjectResponseFormat\": \".response_format\",\n    \"JsonObjectResponseFormatV2\": \".response_format_v2\",\n    \"JsonResponseFormat\": \".json_response_format\",\n    \"JsonResponseFormatV2\": \".json_response_format_v2\",\n    \"LabelMetric\": \".label_metric\",\n    \"ListConnectorsResponse\": \".list_connectors_response\",\n    \"ListEmbedJobResponse\": \".list_embed_job_response\",\n    \"ListModelsResponse\": \".list_models_response\",\n    \"LogprobItem\": \".logprob_item\",\n    \"Message\": \".message\",\n    \"Metrics\": \".metrics\",\n    \"NonStreamedChatResponse\": \".non_streamed_chat_response\",\n    \"OAuthAuthorizeResponse\": \".o_auth_authorize_response\",\n    \"ParseInfo\": \".parse_info\",\n    \"RerankDocument\": \".rerank_document\",\n    \"RerankRequestDocumentsItem\": \".rerank_request_documents_item\",\n    \"RerankResponse\": \".rerank_response\",\n    \"RerankResponseResultsItem\": \".rerank_response_results_item\",\n    \"RerankResponseResultsItemDocument\": \".rerank_response_results_item_document\",\n    \"RerankerDataMetrics\": \".reranker_data_metrics\",\n    \"ResponseFormat\": \".response_format\",\n    \"ResponseFormatV2\": \".response_format_v2\",\n    \"SearchQueriesGenerationStreamedChatResponse\": \".streamed_chat_response\",\n    \"SearchResultsStreamedChatResponse\": \".streamed_chat_response\",\n    \"SingleGeneration\": \".single_generation\",\n    \"SingleGenerationInStream\": \".single_generation_in_stream\",\n    \"SingleGenerationTokenLikelihoodsItem\": \".single_generation_token_likelihoods_item\",\n    \"Source\": \".source\",\n    \"StreamEndGenerateStreamedResponse\": \".generate_streamed_response\",\n    \"StreamEndStreamedChatResponse\": \".streamed_chat_response\",\n    \"StreamErrorGenerateStreamedResponse\": \".generate_streamed_response\",\n    \"StreamStartStreamedChatResponse\": \".streamed_chat_response\",\n    \"StreamedChatResponse\": \".streamed_chat_response\",\n    \"SummarizeRequestExtractiveness\": \".summarize_request_extractiveness\",\n    \"SummarizeRequestFormat\": \".summarize_request_format\",\n    \"SummarizeRequestLength\": \".summarize_request_length\",\n    \"SummarizeResponse\": \".summarize_response\",\n    \"SystemChatMessageV2\": \".chat_message_v2\",\n    \"SystemMessage\": \".message\",\n    \"SystemMessageV2\": \".system_message_v2\",\n    \"SystemMessageV2Content\": \".system_message_v2content\",\n    \"SystemMessageV2ContentOneItem\": \".system_message_v2content_one_item\",\n    \"TextAssistantMessageResponseContentItem\": \".assistant_message_response_content_item\",\n    \"TextAssistantMessageV2ContentOneItem\": \".assistant_message_v2content_one_item\",\n    \"TextContent\": \".content\",\n    \"TextEmbedContent\": \".embed_content\",\n    \"TextGenerationGenerateStreamedResponse\": \".generate_streamed_response\",\n    \"TextGenerationStreamedChatResponse\": \".streamed_chat_response\",\n    \"TextResponseFormat\": \".response_format\",\n    \"TextResponseFormatV2\": \".response_format_v2\",\n    \"TextSystemMessageV2ContentOneItem\": \".system_message_v2content_one_item\",\n    \"TextToolContent\": \".tool_content\",\n    \"Thinking\": \".thinking\",\n    \"ThinkingAssistantMessageResponseContentItem\": \".assistant_message_response_content_item\",\n    \"ThinkingAssistantMessageV2ContentOneItem\": \".assistant_message_v2content_one_item\",\n    \"ThinkingType\": \".thinking_type\",\n    \"TokenizeResponse\": \".tokenize_response\",\n    \"Tool\": \".tool\",\n    \"ToolCall\": \".tool_call\",\n    \"ToolCallDelta\": \".tool_call_delta\",\n    \"ToolCallV2\": \".tool_call_v2\",\n    \"ToolCallV2Function\": \".tool_call_v2function\",\n    \"ToolCallsChunkStreamedChatResponse\": \".streamed_chat_response\",\n    \"ToolCallsGenerationStreamedChatResponse\": \".streamed_chat_response\",\n    \"ToolChatMessageV2\": \".chat_message_v2\",\n    \"ToolContent\": \".tool_content\",\n    \"ToolMessage\": \".message\",\n    \"ToolMessageV2\": \".tool_message_v2\",\n    \"ToolMessageV2Content\": \".tool_message_v2content\",\n    \"ToolParameterDefinitionsValue\": \".tool_parameter_definitions_value\",\n    \"ToolResult\": \".tool_result\",\n    \"ToolSource\": \".source\",\n    \"ToolV2\": \".tool_v2\",\n    \"ToolV2Function\": \".tool_v2function\",\n    \"UpdateConnectorResponse\": \".update_connector_response\",\n    \"Usage\": \".usage\",\n    \"UsageBilledUnits\": \".usage_billed_units\",\n    \"UsageTokens\": \".usage_tokens\",\n    \"UserChatMessageV2\": \".chat_message_v2\",\n    \"UserMessage\": \".message\",\n    \"UserMessageV2\": \".user_message_v2\",\n    \"UserMessageV2Content\": \".user_message_v2content\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"ApiMeta\",\n    \"ApiMetaApiVersion\",\n    \"ApiMetaBilledUnits\",\n    \"ApiMetaTokens\",\n    \"AssistantChatMessageV2\",\n    \"AssistantMessage\",\n    \"AssistantMessageResponse\",\n    \"AssistantMessageResponseContentItem\",\n    \"AssistantMessageV2Content\",\n    \"AssistantMessageV2ContentOneItem\",\n    \"AuthTokenType\",\n    \"ChatCitation\",\n    \"ChatCitationGenerationEvent\",\n    \"ChatCitationType\",\n    \"ChatConnector\",\n    \"ChatContentDeltaEvent\",\n    \"ChatContentDeltaEventDelta\",\n    \"ChatContentDeltaEventDeltaMessage\",\n    \"ChatContentDeltaEventDeltaMessageContent\",\n    \"ChatContentEndEvent\",\n    \"ChatContentStartEvent\",\n    \"ChatContentStartEventDelta\",\n    \"ChatContentStartEventDeltaMessage\",\n    \"ChatContentStartEventDeltaMessageContent\",\n    \"ChatContentStartEventDeltaMessageContentType\",\n    \"ChatDataMetrics\",\n    \"ChatDebugEvent\",\n    \"ChatDocument\",\n    \"ChatDocumentSource\",\n    \"ChatFinishReason\",\n    \"ChatMessage\",\n    \"ChatMessageEndEvent\",\n    \"ChatMessageEndEventDelta\",\n    \"ChatMessageStartEvent\",\n    \"ChatMessageStartEventDelta\",\n    \"ChatMessageStartEventDeltaMessage\",\n    \"ChatMessageV2\",\n    \"ChatMessages\",\n    \"ChatRequestCitationQuality\",\n    \"ChatRequestPromptTruncation\",\n    \"ChatRequestSafetyMode\",\n    \"ChatSearchQueriesGenerationEvent\",\n    \"ChatSearchQuery\",\n    \"ChatSearchResult\",\n    \"ChatSearchResultConnector\",\n    \"ChatSearchResultsEvent\",\n    \"ChatStreamEndEvent\",\n    \"ChatStreamEndEventFinishReason\",\n    \"ChatStreamEvent\",\n    \"ChatStreamEventType\",\n    \"ChatStreamRequestCitationQuality\",\n    \"ChatStreamRequestPromptTruncation\",\n    \"ChatStreamRequestSafetyMode\",\n    \"ChatStreamStartEvent\",\n    \"ChatTextContent\",\n    \"ChatTextGenerationEvent\",\n    \"ChatTextResponseFormat\",\n    \"ChatTextResponseFormatV2\",\n    \"ChatThinkingContent\",\n    \"ChatToolCallDeltaEvent\",\n    \"ChatToolCallDeltaEventDelta\",\n    \"ChatToolCallDeltaEventDeltaMessage\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCalls\",\n    \"ChatToolCallDeltaEventDeltaMessageToolCallsFunction\",\n    \"ChatToolCallEndEvent\",\n    \"ChatToolCallStartEvent\",\n    \"ChatToolCallStartEventDelta\",\n    \"ChatToolCallStartEventDeltaMessage\",\n    \"ChatToolCallsChunkEvent\",\n    \"ChatToolCallsGenerationEvent\",\n    \"ChatToolMessage\",\n    \"ChatToolPlanDeltaEvent\",\n    \"ChatToolPlanDeltaEventDelta\",\n    \"ChatToolPlanDeltaEventDeltaMessage\",\n    \"ChatToolSource\",\n    \"ChatbotMessage\",\n    \"CheckApiKeyResponse\",\n    \"Citation\",\n    \"CitationEndEvent\",\n    \"CitationGenerationStreamedChatResponse\",\n    \"CitationOptions\",\n    \"CitationOptionsMode\",\n    \"CitationStartEvent\",\n    \"CitationStartEventDelta\",\n    \"CitationStartEventDeltaMessage\",\n    \"CitationType\",\n    \"ClassifyDataMetrics\",\n    \"ClassifyExample\",\n    \"ClassifyRequestTruncate\",\n    \"ClassifyResponse\",\n    \"ClassifyResponseClassificationsItem\",\n    \"ClassifyResponseClassificationsItemClassificationType\",\n    \"ClassifyResponseClassificationsItemLabelsValue\",\n    \"CompatibleEndpoint\",\n    \"Connector\",\n    \"ConnectorAuthStatus\",\n    \"ConnectorOAuth\",\n    \"Content\",\n    \"CreateConnectorOAuth\",\n    \"CreateConnectorResponse\",\n    \"CreateConnectorServiceAuth\",\n    \"CreateEmbedJobResponse\",\n    \"Dataset\",\n    \"DatasetPart\",\n    \"DatasetType\",\n    \"DatasetValidationStatus\",\n    \"DebugStreamedChatResponse\",\n    \"DeleteConnectorResponse\",\n    \"DetokenizeResponse\",\n    \"Document\",\n    \"DocumentContent\",\n    \"DocumentSource\",\n    \"DocumentToolContent\",\n    \"EmbedByTypeResponse\",\n    \"EmbedByTypeResponseEmbeddings\",\n    \"EmbedByTypeResponseResponseType\",\n    \"EmbedContent\",\n    \"EmbedFloatsResponse\",\n    \"EmbedImage\",\n    \"EmbedImageUrl\",\n    \"EmbedInput\",\n    \"EmbedInputType\",\n    \"EmbedJob\",\n    \"EmbedJobStatus\",\n    \"EmbedJobTruncate\",\n    \"EmbedRequestTruncate\",\n    \"EmbedResponse\",\n    \"EmbedText\",\n    \"EmbeddingType\",\n    \"EmbeddingsByTypeEmbedResponse\",\n    \"EmbeddingsFloatsEmbedResponse\",\n    \"FinetuneDatasetMetrics\",\n    \"FinishReason\",\n    \"GenerateRequestReturnLikelihoods\",\n    \"GenerateRequestTruncate\",\n    \"GenerateStreamEnd\",\n    \"GenerateStreamEndResponse\",\n    \"GenerateStreamError\",\n    \"GenerateStreamEvent\",\n    \"GenerateStreamRequestReturnLikelihoods\",\n    \"GenerateStreamRequestTruncate\",\n    \"GenerateStreamText\",\n    \"GenerateStreamedResponse\",\n    \"Generation\",\n    \"GetConnectorResponse\",\n    \"GetModelResponse\",\n    \"GetModelResponseSamplingDefaults\",\n    \"Image\",\n    \"ImageContent\",\n    \"ImageUrl\",\n    \"ImageUrlContent\",\n    \"ImageUrlDetail\",\n    \"ImageUrlEmbedContent\",\n    \"JsonObjectResponseFormat\",\n    \"JsonObjectResponseFormatV2\",\n    \"JsonResponseFormat\",\n    \"JsonResponseFormatV2\",\n    \"LabelMetric\",\n    \"ListConnectorsResponse\",\n    \"ListEmbedJobResponse\",\n    \"ListModelsResponse\",\n    \"LogprobItem\",\n    \"Message\",\n    \"Metrics\",\n    \"NonStreamedChatResponse\",\n    \"OAuthAuthorizeResponse\",\n    \"ParseInfo\",\n    \"RerankDocument\",\n    \"RerankRequestDocumentsItem\",\n    \"RerankResponse\",\n    \"RerankResponseResultsItem\",\n    \"RerankResponseResultsItemDocument\",\n    \"RerankerDataMetrics\",\n    \"ResponseFormat\",\n    \"ResponseFormatV2\",\n    \"SearchQueriesGenerationStreamedChatResponse\",\n    \"SearchResultsStreamedChatResponse\",\n    \"SingleGeneration\",\n    \"SingleGenerationInStream\",\n    \"SingleGenerationTokenLikelihoodsItem\",\n    \"Source\",\n    \"StreamEndGenerateStreamedResponse\",\n    \"StreamEndStreamedChatResponse\",\n    \"StreamErrorGenerateStreamedResponse\",\n    \"StreamStartStreamedChatResponse\",\n    \"StreamedChatResponse\",\n    \"SummarizeRequestExtractiveness\",\n    \"SummarizeRequestFormat\",\n    \"SummarizeRequestLength\",\n    \"SummarizeResponse\",\n    \"SystemChatMessageV2\",\n    \"SystemMessage\",\n    \"SystemMessageV2\",\n    \"SystemMessageV2Content\",\n    \"SystemMessageV2ContentOneItem\",\n    \"TextAssistantMessageResponseContentItem\",\n    \"TextAssistantMessageV2ContentOneItem\",\n    \"TextContent\",\n    \"TextEmbedContent\",\n    \"TextGenerationGenerateStreamedResponse\",\n    \"TextGenerationStreamedChatResponse\",\n    \"TextResponseFormat\",\n    \"TextResponseFormatV2\",\n    \"TextSystemMessageV2ContentOneItem\",\n    \"TextToolContent\",\n    \"Thinking\",\n    \"ThinkingAssistantMessageResponseContentItem\",\n    \"ThinkingAssistantMessageV2ContentOneItem\",\n    \"ThinkingType\",\n    \"TokenizeResponse\",\n    \"Tool\",\n    \"ToolCall\",\n    \"ToolCallDelta\",\n    \"ToolCallV2\",\n    \"ToolCallV2Function\",\n    \"ToolCallsChunkStreamedChatResponse\",\n    \"ToolCallsGenerationStreamedChatResponse\",\n    \"ToolChatMessageV2\",\n    \"ToolContent\",\n    \"ToolMessage\",\n    \"ToolMessageV2\",\n    \"ToolMessageV2Content\",\n    \"ToolParameterDefinitionsValue\",\n    \"ToolResult\",\n    \"ToolSource\",\n    \"ToolV2\",\n    \"ToolV2Function\",\n    \"UpdateConnectorResponse\",\n    \"Usage\",\n    \"UsageBilledUnits\",\n    \"UsageTokens\",\n    \"UserChatMessageV2\",\n    \"UserMessage\",\n    \"UserMessageV2\",\n    \"UserMessageV2Content\",\n]\n"
  },
  {
    "path": "src/cohere/types/api_meta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta_api_version import ApiMetaApiVersion\nfrom .api_meta_billed_units import ApiMetaBilledUnits\nfrom .api_meta_tokens import ApiMetaTokens\n\n\nclass ApiMeta(UncheckedBaseModel):\n    api_version: typing.Optional[ApiMetaApiVersion] = None\n    billed_units: typing.Optional[ApiMetaBilledUnits] = None\n    tokens: typing.Optional[ApiMetaTokens] = None\n    cached_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of prompt tokens that hit the inference cache.\n    \"\"\"\n\n    warnings: typing.Optional[typing.List[str]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/api_meta_api_version.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ApiMetaApiVersion(UncheckedBaseModel):\n    version: str\n    is_deprecated: typing.Optional[bool] = None\n    is_experimental: typing.Optional[bool] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/api_meta_billed_units.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ApiMetaBilledUnits(UncheckedBaseModel):\n    images: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed images.\n    \"\"\"\n\n    input_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed input tokens.\n    \"\"\"\n\n    image_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed image tokens.\n    \"\"\"\n\n    output_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed output tokens.\n    \"\"\"\n\n    search_units: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed search units.\n    \"\"\"\n\n    classifications: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed classifications units.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/api_meta_tokens.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ApiMetaTokens(UncheckedBaseModel):\n    input_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of tokens used as input to the model.\n    \"\"\"\n\n    output_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of tokens produced by the model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/assistant_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .assistant_message_v2content import AssistantMessageV2Content\nfrom .citation import Citation\nfrom .tool_call_v2 import ToolCallV2\n\n\nclass AssistantMessage(UncheckedBaseModel):\n    \"\"\"\n    A message from the assistant role can contain text and tool call information.\n    \"\"\"\n\n    tool_calls: typing.Optional[typing.List[ToolCallV2]] = None\n    tool_plan: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    A chain-of-thought style reflection and plan that the model generates when working with Tools.\n    \"\"\"\n\n    content: typing.Optional[AssistantMessageV2Content] = None\n    citations: typing.Optional[typing.List[Citation]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/assistant_message_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .assistant_message_response_content_item import AssistantMessageResponseContentItem\nfrom .citation import Citation\nfrom .tool_call_v2 import ToolCallV2\n\n\nclass AssistantMessageResponse(UncheckedBaseModel):\n    \"\"\"\n    A message from the assistant role can contain text and tool call information.\n    \"\"\"\n\n    role: typing.Literal[\"assistant\"] = \"assistant\"\n    tool_calls: typing.Optional[typing.List[ToolCallV2]] = None\n    tool_plan: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    A chain-of-thought style reflection and plan that the model generates when working with Tools.\n    \"\"\"\n\n    content: typing.Optional[typing.List[AssistantMessageResponseContentItem]] = None\n    citations: typing.Optional[typing.List[Citation]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/assistant_message_response_content_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\n\n\nclass TextAssistantMessageResponseContentItem(UncheckedBaseModel):\n    type: typing.Literal[\"text\"] = \"text\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ThinkingAssistantMessageResponseContentItem(UncheckedBaseModel):\n    type: typing.Literal[\"thinking\"] = \"thinking\"\n    thinking: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nAssistantMessageResponseContentItem = typing_extensions.Annotated[\n    typing.Union[TextAssistantMessageResponseContentItem, ThinkingAssistantMessageResponseContentItem],\n    UnionMetadata(discriminant=\"type\"),\n]\n"
  },
  {
    "path": "src/cohere/types/assistant_message_v2content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .assistant_message_v2content_one_item import AssistantMessageV2ContentOneItem\n\nAssistantMessageV2Content = typing.Union[str, typing.List[AssistantMessageV2ContentOneItem]]\n"
  },
  {
    "path": "src/cohere/types/assistant_message_v2content_one_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\n\n\nclass TextAssistantMessageV2ContentOneItem(UncheckedBaseModel):\n    type: typing.Literal[\"text\"] = \"text\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ThinkingAssistantMessageV2ContentOneItem(UncheckedBaseModel):\n    type: typing.Literal[\"thinking\"] = \"thinking\"\n    thinking: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nAssistantMessageV2ContentOneItem = typing_extensions.Annotated[\n    typing.Union[TextAssistantMessageV2ContentOneItem, ThinkingAssistantMessageV2ContentOneItem],\n    UnionMetadata(discriminant=\"type\"),\n]\n"
  },
  {
    "path": "src/cohere/types/auth_token_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nAuthTokenType = typing.Union[typing.Literal[\"bearer\", \"basic\", \"noscheme\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_citation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_citation_type import ChatCitationType\n\n\nclass ChatCitation(UncheckedBaseModel):\n    \"\"\"\n    A section of the generated reply which cites external knowledge.\n    \"\"\"\n\n    start: int = pydantic.Field()\n    \"\"\"\n    The index of text that the citation starts at, counting from zero. For example, a generation of `Hello, world!` with a citation on `world` would have a start value of `7`. This is because the citation starts at `w`, which is the seventh character.\n    \"\"\"\n\n    end: int = pydantic.Field()\n    \"\"\"\n    The index of text that the citation ends after, counting from zero. For example, a generation of `Hello, world!` with a citation on `world` would have an end value of `11`. This is because the citation ends after `d`, which is the eleventh character.\n    \"\"\"\n\n    text: str = pydantic.Field()\n    \"\"\"\n    The text of the citation. For example, a generation of `Hello, world!` with a citation of `world` would have a text value of `world`.\n    \"\"\"\n\n    document_ids: typing.List[str] = pydantic.Field()\n    \"\"\"\n    Identifiers of documents cited by this section of the generated reply.\n    \"\"\"\n\n    type: typing.Optional[ChatCitationType] = pydantic.Field(default=None)\n    \"\"\"\n    The type of citation which indicates what part of the response the citation is for.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_citation_generation_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_citation import ChatCitation\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatCitationGenerationEvent(ChatStreamEvent):\n    citations: typing.List[ChatCitation] = pydantic.Field()\n    \"\"\"\n    Citations for the generated reply.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_citation_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatCitationType = typing.Union[typing.Literal[\"TEXT_CONTENT\", \"PLAN\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_connector.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatConnector(UncheckedBaseModel):\n    \"\"\"\n    The connector used for fetching documents.\n    \"\"\"\n\n    id: str = pydantic.Field()\n    \"\"\"\n    The identifier of the connector.\n    \"\"\"\n\n    user_access_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    When specified, this user access token will be passed to the connector in the Authorization header instead of the Cohere generated one.\n    \"\"\"\n\n    continue_on_failure: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Defaults to `false`.\n    \n    When `true`, the request will continue if this connector returned an error.\n    \"\"\"\n\n    options: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)\n    \"\"\"\n    Provides the connector with different settings at request time. The key/value pairs of this object are specific to each connector.\n    \n    For example, the connector `web-search` supports the `site` option, which limits search results to the specified domain.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_delta_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_content_delta_event_delta import ChatContentDeltaEventDelta\nfrom .chat_stream_event_type import ChatStreamEventType\nfrom .logprob_item import LogprobItem\n\n\nclass ChatContentDeltaEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed delta event which contains a delta of chat text content.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatContentDeltaEventDelta] = None\n    logprobs: typing.Optional[LogprobItem] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_delta_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_content_delta_event_delta_message import ChatContentDeltaEventDeltaMessage\n\n\nclass ChatContentDeltaEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatContentDeltaEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_delta_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_content_delta_event_delta_message_content import ChatContentDeltaEventDeltaMessageContent\n\n\nclass ChatContentDeltaEventDeltaMessage(UncheckedBaseModel):\n    content: typing.Optional[ChatContentDeltaEventDeltaMessageContent] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_delta_event_delta_message_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatContentDeltaEventDeltaMessageContent(UncheckedBaseModel):\n    thinking: typing.Optional[str] = None\n    text: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_end_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass ChatContentEndEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed delta event which signifies that the content block has ended.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_start_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_content_start_event_delta import ChatContentStartEventDelta\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass ChatContentStartEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed delta event which signifies that a new content block has started.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatContentStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_start_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_content_start_event_delta_message import ChatContentStartEventDeltaMessage\n\n\nclass ChatContentStartEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatContentStartEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_start_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_content_start_event_delta_message_content import ChatContentStartEventDeltaMessageContent\n\n\nclass ChatContentStartEventDeltaMessage(UncheckedBaseModel):\n    content: typing.Optional[ChatContentStartEventDeltaMessageContent] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_start_event_delta_message_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_content_start_event_delta_message_content_type import ChatContentStartEventDeltaMessageContentType\n\n\nclass ChatContentStartEventDeltaMessageContent(UncheckedBaseModel):\n    thinking: typing.Optional[str] = None\n    text: typing.Optional[str] = None\n    type: typing.Optional[ChatContentStartEventDeltaMessageContentType] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_content_start_event_delta_message_content_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatContentStartEventDeltaMessageContentType = typing.Union[typing.Literal[\"text\", \"thinking\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_data_metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatDataMetrics(UncheckedBaseModel):\n    num_train_turns: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all turns of valid train examples.\n    \"\"\"\n\n    num_eval_turns: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all turns of valid eval examples.\n    \"\"\"\n\n    preamble: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The preamble of this dataset.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_debug_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatDebugEvent(ChatStreamEvent):\n    prompt: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_document.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatDocument = typing.Dict[str, str]\n\"\"\"\nRelevant information that could be used by the model to generate a more accurate reply.\nThe contents of each document are generally short (under 300 words), and are passed in the form of a\ndictionary of strings. Some suggested keys are \"text\", \"author\", \"date\". Both the key name and the value will be\npassed to the model.\n\"\"\"\n"
  },
  {
    "path": "src/cohere/types/chat_document_source.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatDocumentSource(UncheckedBaseModel):\n    \"\"\"\n    A document source object containing the unique identifier of the document and the document itself.\n    \"\"\"\n\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The unique identifier of the document\n    \"\"\"\n\n    document: typing.Optional[typing.Dict[str, typing.Any]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_finish_reason.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatFinishReason = typing.Union[\n    typing.Literal[\"COMPLETE\", \"STOP_SEQUENCE\", \"MAX_TOKENS\", \"TOOL_CALL\", \"ERROR\", \"TIMEOUT\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/chat_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_call import ToolCall\n\n\nclass ChatMessage(UncheckedBaseModel):\n    \"\"\"\n    Represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.\n\n    The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.\n    \"\"\"\n\n    message: str = pydantic.Field()\n    \"\"\"\n    Contents of the chat message.\n    \"\"\"\n\n    tool_calls: typing.Optional[typing.List[ToolCall]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_end_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_message_end_event_delta import ChatMessageEndEventDelta\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass ChatMessageEndEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event which signifies that the chat message has ended.\n    \"\"\"\n\n    id: typing.Optional[str] = None\n    delta: typing.Optional[ChatMessageEndEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_end_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_finish_reason import ChatFinishReason\nfrom .usage import Usage\n\n\nclass ChatMessageEndEventDelta(UncheckedBaseModel):\n    error: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    An error message if an error occurred during the generation.\n    \"\"\"\n\n    finish_reason: typing.Optional[ChatFinishReason] = None\n    usage: typing.Optional[Usage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_start_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_message_start_event_delta import ChatMessageStartEventDelta\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass ChatMessageStartEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event which signifies that a stream has started.\n    \"\"\"\n\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Unique identifier for the generated reply.\n    \"\"\"\n\n    delta: typing.Optional[ChatMessageStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_start_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_message_start_event_delta_message import ChatMessageStartEventDeltaMessage\n\n\nclass ChatMessageStartEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatMessageStartEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_start_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatMessageStartEventDeltaMessage(UncheckedBaseModel):\n    role: typing.Optional[typing.Literal[\"assistant\"]] = pydantic.Field(default=None)\n    \"\"\"\n    The role of the message.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_message_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .assistant_message_v2content import AssistantMessageV2Content\nfrom .citation import Citation\nfrom .system_message_v2content import SystemMessageV2Content\nfrom .tool_call_v2 import ToolCallV2\nfrom .tool_message_v2content import ToolMessageV2Content\nfrom .user_message_v2content import UserMessageV2Content\n\n\nclass UserChatMessageV2(UncheckedBaseModel):\n    \"\"\"\n    Represents a single message in the chat history from a given role.\n    \"\"\"\n\n    role: typing.Literal[\"user\"] = \"user\"\n    content: UserMessageV2Content\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass AssistantChatMessageV2(UncheckedBaseModel):\n    \"\"\"\n    Represents a single message in the chat history from a given role.\n    \"\"\"\n\n    role: typing.Literal[\"assistant\"] = \"assistant\"\n    tool_calls: typing.Optional[typing.List[ToolCallV2]] = None\n    tool_plan: typing.Optional[str] = None\n    content: typing.Optional[AssistantMessageV2Content] = None\n    citations: typing.Optional[typing.List[Citation]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass SystemChatMessageV2(UncheckedBaseModel):\n    \"\"\"\n    Represents a single message in the chat history from a given role.\n    \"\"\"\n\n    role: typing.Literal[\"system\"] = \"system\"\n    content: SystemMessageV2Content\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolChatMessageV2(UncheckedBaseModel):\n    \"\"\"\n    Represents a single message in the chat history from a given role.\n    \"\"\"\n\n    role: typing.Literal[\"tool\"] = \"tool\"\n    tool_call_id: str\n    content: ToolMessageV2Content\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nChatMessageV2 = typing_extensions.Annotated[\n    typing.Union[UserChatMessageV2, AssistantChatMessageV2, SystemChatMessageV2, ToolChatMessageV2],\n    UnionMetadata(discriminant=\"role\"),\n]\n"
  },
  {
    "path": "src/cohere/types/chat_messages.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .chat_message_v2 import ChatMessageV2\n\nChatMessages = typing.List[ChatMessageV2]\n\"\"\"\nA list of chat messages in chronological order, representing a conversation between the user and the model.\n\nMessages can be from `User`, `Assistant`, `Tool` and `System` roles. Learn more about messages and roles in [the Chat API guide](https://docs.cohere.com/v2/docs/chat-api).\n\"\"\"\n"
  },
  {
    "path": "src/cohere/types/chat_request_citation_quality.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatRequestCitationQuality = typing.Union[typing.Literal[\"ENABLED\", \"DISABLED\", \"FAST\", \"ACCURATE\", \"OFF\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_request_prompt_truncation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatRequestPromptTruncation = typing.Union[typing.Literal[\"OFF\", \"AUTO\", \"AUTO_PRESERVE_ORDER\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_request_safety_mode.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatRequestSafetyMode = typing.Union[typing.Literal[\"CONTEXTUAL\", \"STRICT\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_search_queries_generation_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_search_query import ChatSearchQuery\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatSearchQueriesGenerationEvent(ChatStreamEvent):\n    search_queries: typing.List[ChatSearchQuery] = pydantic.Field()\n    \"\"\"\n    Generated search queries, meant to be used as part of the RAG flow.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_search_query.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatSearchQuery(UncheckedBaseModel):\n    \"\"\"\n    The generated search query. Contains the text of the query and a unique identifier for the query.\n    \"\"\"\n\n    text: str = pydantic.Field()\n    \"\"\"\n    The text of the search query.\n    \"\"\"\n\n    generation_id: str = pydantic.Field()\n    \"\"\"\n    Unique identifier for the generated search query. Useful for submitting feedback.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_search_result.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_search_query import ChatSearchQuery\nfrom .chat_search_result_connector import ChatSearchResultConnector\n\n\nclass ChatSearchResult(UncheckedBaseModel):\n    search_query: typing.Optional[ChatSearchQuery] = None\n    connector: ChatSearchResultConnector = pydantic.Field()\n    \"\"\"\n    The connector from which this result comes from.\n    \"\"\"\n\n    document_ids: typing.List[str] = pydantic.Field()\n    \"\"\"\n    Identifiers of documents found by this search query.\n    \"\"\"\n\n    error_message: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    An error message if the search failed.\n    \"\"\"\n\n    continue_on_failure: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Whether a chat request should continue or not if the request to this connector fails.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_search_result_connector.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatSearchResultConnector(UncheckedBaseModel):\n    \"\"\"\n    The connector used for fetching documents.\n    \"\"\"\n\n    id: str = pydantic.Field()\n    \"\"\"\n    The identifier of the connector.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_search_results_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_document import ChatDocument\nfrom .chat_search_result import ChatSearchResult\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatSearchResultsEvent(ChatStreamEvent):\n    search_results: typing.Optional[typing.List[ChatSearchResult]] = pydantic.Field(default=None)\n    \"\"\"\n    Conducted searches and the ids of documents retrieved from each of them.\n    \"\"\"\n\n    documents: typing.Optional[typing.List[ChatDocument]] = pydantic.Field(default=None)\n    \"\"\"\n    Documents fetched from searches or provided by the user.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_stream_end_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason\nfrom .chat_stream_event import ChatStreamEvent\nfrom .non_streamed_chat_response import NonStreamedChatResponse\n\n\nclass ChatStreamEndEvent(ChatStreamEvent):\n    finish_reason: ChatStreamEndEventFinishReason = pydantic.Field()\n    \"\"\"\n    - `COMPLETE` - the model sent back a finished reply\n    - `ERROR_LIMIT` - the reply was cut off because the model reached the maximum number of tokens for its context length\n    - `MAX_TOKENS` - the reply was cut off because the model reached the maximum number of tokens specified by the max_tokens parameter\n    - `ERROR` - something went wrong when generating the reply\n    - `ERROR_TOXIC` - the model generated a reply that was deemed toxic\n    \"\"\"\n\n    response: NonStreamedChatResponse = pydantic.Field()\n    \"\"\"\n    The consolidated response from the model. Contains the generated reply and all the other information streamed back in the previous events.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_stream_end_event_finish_reason.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatStreamEndEventFinishReason = typing.Union[\n    typing.Literal[\"COMPLETE\", \"ERROR_LIMIT\", \"MAX_TOKENS\", \"ERROR\", \"ERROR_TOXIC\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/chat_stream_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatStreamEvent(UncheckedBaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_stream_event_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatStreamEventType(UncheckedBaseModel):\n    \"\"\"\n    The streamed event types\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_stream_request_citation_quality.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatStreamRequestCitationQuality = typing.Union[\n    typing.Literal[\"ENABLED\", \"DISABLED\", \"FAST\", \"ACCURATE\", \"OFF\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/chat_stream_request_prompt_truncation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatStreamRequestPromptTruncation = typing.Union[typing.Literal[\"OFF\", \"AUTO\", \"AUTO_PRESERVE_ORDER\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_stream_request_safety_mode.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nChatStreamRequestSafetyMode = typing.Union[typing.Literal[\"CONTEXTUAL\", \"STRICT\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/chat_stream_start_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatStreamStartEvent(ChatStreamEvent):\n    generation_id: str = pydantic.Field()\n    \"\"\"\n    Unique identifier for the generated reply. Useful for submitting feedback.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_text_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatTextContent(UncheckedBaseModel):\n    \"\"\"\n    Text content of the message.\n    \"\"\"\n\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_text_generation_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event import ChatStreamEvent\n\n\nclass ChatTextGenerationEvent(ChatStreamEvent):\n    text: str = pydantic.Field()\n    \"\"\"\n    The next batch of text generated by the model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_text_response_format.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatTextResponseFormat(UncheckedBaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_text_response_format_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatTextResponseFormatV2(UncheckedBaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_thinking_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatThinkingContent(UncheckedBaseModel):\n    \"\"\"\n    Thinking content of the message. This will be present when `thinking` is enabled, and will contain the models internal reasoning.\n    \"\"\"\n\n    thinking: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_delta_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\nfrom .chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta\n\n\nclass ChatToolCallDeltaEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event delta which signifies a delta in tool call arguments.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatToolCallDeltaEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_delta_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_tool_call_delta_event_delta_message import ChatToolCallDeltaEventDeltaMessage\n\n\nclass ChatToolCallDeltaEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatToolCallDeltaEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_delta_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_tool_call_delta_event_delta_message_tool_calls import ChatToolCallDeltaEventDeltaMessageToolCalls\n\n\nclass ChatToolCallDeltaEventDeltaMessage(UncheckedBaseModel):\n    tool_calls: typing.Optional[ChatToolCallDeltaEventDeltaMessageToolCalls] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_delta_event_delta_message_tool_calls.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_tool_call_delta_event_delta_message_tool_calls_function import (\n    ChatToolCallDeltaEventDeltaMessageToolCallsFunction,\n)\n\n\nclass ChatToolCallDeltaEventDeltaMessageToolCalls(UncheckedBaseModel):\n    function: typing.Optional[ChatToolCallDeltaEventDeltaMessageToolCallsFunction] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_delta_event_delta_message_tool_calls_function.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatToolCallDeltaEventDeltaMessageToolCallsFunction(UncheckedBaseModel):\n    arguments: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_end_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass ChatToolCallEndEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event delta which signifies a tool call has finished streaming.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_start_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\nfrom .chat_tool_call_start_event_delta import ChatToolCallStartEventDelta\n\n\nclass ChatToolCallStartEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event delta which signifies a tool call has started streaming.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatToolCallStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_start_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_tool_call_start_event_delta_message import ChatToolCallStartEventDeltaMessage\n\n\nclass ChatToolCallStartEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatToolCallStartEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_call_start_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_call_v2 import ToolCallV2\n\n\nclass ChatToolCallStartEventDeltaMessage(UncheckedBaseModel):\n    tool_calls: typing.Optional[ToolCallV2] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_calls_chunk_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event import ChatStreamEvent\nfrom .tool_call_delta import ToolCallDelta\n\n\nclass ChatToolCallsChunkEvent(ChatStreamEvent):\n    tool_call_delta: ToolCallDelta\n    text: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_calls_generation_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event import ChatStreamEvent\nfrom .tool_call import ToolCall\n\n\nclass ChatToolCallsGenerationEvent(ChatStreamEvent):\n    text: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The text generated related to the tool calls generated\n    \"\"\"\n\n    tool_calls: typing.List[ToolCall]\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_result import ToolResult\n\n\nclass ChatToolMessage(UncheckedBaseModel):\n    \"\"\"\n    Represents tool result in the chat history.\n    \"\"\"\n\n    tool_results: typing.Optional[typing.List[ToolResult]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_plan_delta_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\nfrom .chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta\n\n\nclass ChatToolPlanDeltaEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event which contains a delta of tool plan text.\n    \"\"\"\n\n    delta: typing.Optional[ChatToolPlanDeltaEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_plan_delta_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .chat_tool_plan_delta_event_delta_message import ChatToolPlanDeltaEventDeltaMessage\n\n\nclass ChatToolPlanDeltaEventDelta(UncheckedBaseModel):\n    message: typing.Optional[ChatToolPlanDeltaEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_plan_delta_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatToolPlanDeltaEventDeltaMessage(UncheckedBaseModel):\n    tool_plan: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/chat_tool_source.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ChatToolSource(UncheckedBaseModel):\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The unique identifier of the document\n    \"\"\"\n\n    tool_output: typing.Optional[typing.Dict[str, typing.Any]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/check_api_key_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass CheckApiKeyResponse(UncheckedBaseModel):\n    valid: bool\n    organization_id: typing.Optional[str] = None\n    owner_id: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .citation_type import CitationType\nfrom .source import Source\n\n\nclass Citation(UncheckedBaseModel):\n    \"\"\"\n    Citation information containing sources and the text cited.\n    \"\"\"\n\n    start: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Start index of the cited snippet in the original source text.\n    \"\"\"\n\n    end: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    End index of the cited snippet in the original source text.\n    \"\"\"\n\n    text: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Text snippet that is being cited.\n    \"\"\"\n\n    sources: typing.Optional[typing.List[Source]] = None\n    content_index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Index of the content block in which this citation appears.\n    \"\"\"\n\n    type: typing.Optional[CitationType] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_end_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\n\n\nclass CitationEndEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event which signifies a citation has finished streaming.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_options.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .citation_options_mode import CitationOptionsMode\n\n\nclass CitationOptions(UncheckedBaseModel):\n    \"\"\"\n    Options for controlling citation generation.\n    \"\"\"\n\n    mode: typing.Optional[CitationOptionsMode] = pydantic.Field(default=None)\n    \"\"\"\n    Defaults to `\"enabled\"`.\n    Citations are enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_options_mode.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nCitationOptionsMode = typing.Union[typing.Literal[\"ENABLED\", \"DISABLED\", \"FAST\", \"ACCURATE\", \"OFF\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/citation_start_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .chat_stream_event_type import ChatStreamEventType\nfrom .citation_start_event_delta import CitationStartEventDelta\n\n\nclass CitationStartEvent(ChatStreamEventType):\n    \"\"\"\n    A streamed event which signifies a citation has been created.\n    \"\"\"\n\n    index: typing.Optional[int] = None\n    delta: typing.Optional[CitationStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_start_event_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .citation_start_event_delta_message import CitationStartEventDeltaMessage\n\n\nclass CitationStartEventDelta(UncheckedBaseModel):\n    message: typing.Optional[CitationStartEventDeltaMessage] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_start_event_delta_message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .citation import Citation\n\n\nclass CitationStartEventDeltaMessage(UncheckedBaseModel):\n    citations: typing.Optional[Citation] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/citation_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nCitationType = typing.Union[typing.Literal[\"TEXT_CONTENT\", \"THINKING_CONTENT\", \"PLAN\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/classify_data_metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .label_metric import LabelMetric\n\n\nclass ClassifyDataMetrics(UncheckedBaseModel):\n    label_metrics: typing.Optional[typing.List[LabelMetric]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/classify_example.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ClassifyExample(UncheckedBaseModel):\n    text: typing.Optional[str] = None\n    label: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/classify_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nClassifyRequestTruncate = typing.Union[typing.Literal[\"NONE\", \"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/classify_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .classify_response_classifications_item import ClassifyResponseClassificationsItem\n\n\nclass ClassifyResponse(UncheckedBaseModel):\n    id: str\n    classifications: typing.List[ClassifyResponseClassificationsItem]\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/classify_response_classifications_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .classify_response_classifications_item_classification_type import (\n    ClassifyResponseClassificationsItemClassificationType,\n)\nfrom .classify_response_classifications_item_labels_value import ClassifyResponseClassificationsItemLabelsValue\n\n\nclass ClassifyResponseClassificationsItem(UncheckedBaseModel):\n    id: str\n    input: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The input text that was classified\n    \"\"\"\n\n    prediction: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The predicted label for the associated query (only filled for single-label models)\n    \"\"\"\n\n    predictions: typing.List[str] = pydantic.Field()\n    \"\"\"\n    An array containing the predicted labels for the associated query (only filled for single-label classification)\n    \"\"\"\n\n    confidence: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The confidence score for the top predicted class (only filled for single-label classification)\n    \"\"\"\n\n    confidences: typing.List[float] = pydantic.Field()\n    \"\"\"\n    An array containing the confidence scores of all the predictions in the same order\n    \"\"\"\n\n    labels: typing.Dict[str, ClassifyResponseClassificationsItemLabelsValue] = pydantic.Field()\n    \"\"\"\n    A map containing each label and its confidence score according to the classifier. All the confidence scores add up to 1 for single-label classification. For multi-label classification the label confidences are independent of each other, so they don't have to sum up to 1.\n    \"\"\"\n\n    classification_type: ClassifyResponseClassificationsItemClassificationType = pydantic.Field()\n    \"\"\"\n    The type of classification performed\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/classify_response_classifications_item_classification_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nClassifyResponseClassificationsItemClassificationType = typing.Union[\n    typing.Literal[\"single-label\", \"multi-label\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/classify_response_classifications_item_labels_value.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ClassifyResponseClassificationsItemLabelsValue(UncheckedBaseModel):\n    confidence: typing.Optional[float] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/compatible_endpoint.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nCompatibleEndpoint = typing.Union[\n    typing.Literal[\"chat\", \"embed\", \"classify\", \"summarize\", \"rerank\", \"rate\", \"generate\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/connector.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .connector_auth_status import ConnectorAuthStatus\nfrom .connector_o_auth import ConnectorOAuth\n\n\nclass Connector(UncheckedBaseModel):\n    \"\"\"\n    A connector allows you to integrate data sources with the '/chat' endpoint to create grounded generations with citations to the data source.\n    documents to help answer users.\n    \"\"\"\n\n    id: str = pydantic.Field()\n    \"\"\"\n    The unique identifier of the connector (used in both `/connectors` & `/chat` endpoints).\n    This is automatically created from the name of the connector upon registration.\n    \"\"\"\n\n    organization_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The organization to which this connector belongs. This is automatically set to\n    the organization of the user who created the connector.\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    A human-readable name for the connector.\n    \"\"\"\n\n    description: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    A description of the connector.\n    \"\"\"\n\n    url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The URL of the connector that will be used to search for documents.\n    \"\"\"\n\n    created_at: dt.datetime = pydantic.Field()\n    \"\"\"\n    The UTC time at which the connector was created.\n    \"\"\"\n\n    updated_at: dt.datetime = pydantic.Field()\n    \"\"\"\n    The UTC time at which the connector was last updated.\n    \"\"\"\n\n    excludes: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    A list of fields to exclude from the prompt (fields remain in the document).\n    \"\"\"\n\n    auth_type: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The type of authentication/authorization used by the connector. Possible values: [oauth, service_auth]\n    \"\"\"\n\n    oauth: typing.Optional[ConnectorOAuth] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 configuration for the connector.\n    \"\"\"\n\n    auth_status: typing.Optional[ConnectorAuthStatus] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth status for the user making the request. One of [\"valid\", \"expired\", \"\"]. Empty string (field is omitted) means the user has not authorized the connector yet.\n    \"\"\"\n\n    active: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Whether the connector is active or not.\n    \"\"\"\n\n    continue_on_failure: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Whether a chat request should continue or not if the request to this connector fails.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/connector_auth_status.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nConnectorAuthStatus = typing.Union[typing.Literal[\"valid\", \"expired\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/connector_o_auth.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ConnectorOAuth(UncheckedBaseModel):\n    client_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 client ID. This field is encrypted at rest.\n    \"\"\"\n\n    client_secret: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 client Secret. This field is encrypted at rest and never returned in a response.\n    \"\"\"\n\n    authorize_url: str = pydantic.Field()\n    \"\"\"\n    The OAuth 2.0 /authorize endpoint to use when users authorize the connector.\n    \"\"\"\n\n    token_url: str = pydantic.Field()\n    \"\"\"\n    The OAuth 2.0 /token endpoint to use when users authorize the connector.\n    \"\"\"\n\n    scope: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth scopes to request when users authorize the connector.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .image_url import ImageUrl\n\n\nclass TextContent(UncheckedBaseModel):\n    \"\"\"\n    A Content block which contains information about the content type and the content itself.\n    \"\"\"\n\n    type: typing.Literal[\"text\"] = \"text\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ImageUrlContent(UncheckedBaseModel):\n    \"\"\"\n    A Content block which contains information about the content type and the content itself.\n    \"\"\"\n\n    type: typing.Literal[\"image_url\"] = \"image_url\"\n    image_url: ImageUrl\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nContent = typing_extensions.Annotated[typing.Union[TextContent, ImageUrlContent], UnionMetadata(discriminant=\"type\")]\n"
  },
  {
    "path": "src/cohere/types/create_connector_o_auth.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass CreateConnectorOAuth(UncheckedBaseModel):\n    client_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 client ID. This fields is encrypted at rest.\n    \"\"\"\n\n    client_secret: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 client Secret. This field is encrypted at rest and never returned in a response.\n    \"\"\"\n\n    authorize_url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 /authorize endpoint to use when users authorize the connector.\n    \"\"\"\n\n    token_url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 /token endpoint to use when users authorize the connector.\n    \"\"\"\n\n    scope: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth scopes to request when users authorize the connector.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/create_connector_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .connector import Connector\n\n\nclass CreateConnectorResponse(UncheckedBaseModel):\n    connector: Connector\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/create_connector_service_auth.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .auth_token_type import AuthTokenType\n\n\nclass CreateConnectorServiceAuth(UncheckedBaseModel):\n    type: AuthTokenType\n    token: str = pydantic.Field()\n    \"\"\"\n    The token that will be used in the HTTP Authorization header when making requests to the connector. This field is encrypted at rest and never returned in a response.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/create_embed_job_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\n\n\nclass CreateEmbedJobResponse(UncheckedBaseModel):\n    \"\"\"\n    Response from creating an embed job.\n    \"\"\"\n\n    job_id: str\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/dataset.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.serialization import FieldMetadata\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .dataset_part import DatasetPart\nfrom .dataset_type import DatasetType\nfrom .dataset_validation_status import DatasetValidationStatus\n\n\nclass Dataset(UncheckedBaseModel):\n    id: str = pydantic.Field()\n    \"\"\"\n    The dataset ID\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    The name of the dataset\n    \"\"\"\n\n    created_at: dt.datetime = pydantic.Field()\n    \"\"\"\n    The creation date\n    \"\"\"\n\n    updated_at: dt.datetime = pydantic.Field()\n    \"\"\"\n    The last update date\n    \"\"\"\n\n    dataset_type: DatasetType\n    validation_status: DatasetValidationStatus\n    validation_error: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Errors found during validation\n    \"\"\"\n\n    schema_: typing_extensions.Annotated[\n        typing.Optional[str],\n        FieldMetadata(alias=\"schema\"),\n        pydantic.Field(alias=\"schema\", description=\"the avro schema of the dataset\"),\n    ] = None\n    required_fields: typing.Optional[typing.List[str]] = None\n    preserve_fields: typing.Optional[typing.List[str]] = None\n    dataset_parts: typing.Optional[typing.List[DatasetPart]] = pydantic.Field(default=None)\n    \"\"\"\n    the underlying files that make up the dataset\n    \"\"\"\n\n    validation_warnings: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    warnings found during validation\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/dataset_part.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass DatasetPart(UncheckedBaseModel):\n    id: str = pydantic.Field()\n    \"\"\"\n    The dataset part ID\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    The name of the dataset part\n    \"\"\"\n\n    url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The download url of the file\n    \"\"\"\n\n    index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The index of the file\n    \"\"\"\n\n    size_bytes: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The size of the file in bytes\n    \"\"\"\n\n    num_rows: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of rows in the file\n    \"\"\"\n\n    original_url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The download url of the original file\n    \"\"\"\n\n    samples: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    The first few rows of the parsed file\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/dataset_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nDatasetType = typing.Union[\n    typing.Literal[\n        \"embed-input\",\n        \"embed-result\",\n        \"cluster-result\",\n        \"cluster-outliers\",\n        \"reranker-finetune-input\",\n        \"single-label-classification-finetune-input\",\n        \"chat-finetune-input\",\n        \"multi-label-classification-finetune-input\",\n        \"batch-chat-input\",\n        \"batch-openai-chat-input\",\n        \"batch-embed-v2-input\",\n        \"batch-chat-v2-input\",\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/types/dataset_validation_status.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nDatasetValidationStatus = typing.Union[\n    typing.Literal[\"unknown\", \"queued\", \"processing\", \"failed\", \"validated\", \"skipped\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/delete_connector_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nDeleteConnectorResponse = typing.Dict[str, typing.Any]\n"
  },
  {
    "path": "src/cohere/types/detokenize_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\n\n\nclass DetokenizeResponse(UncheckedBaseModel):\n    text: str = pydantic.Field()\n    \"\"\"\n    A string representing the list of tokens.\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/document.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass Document(UncheckedBaseModel):\n    \"\"\"\n    Relevant information that could be used by the model to generate a more accurate reply.\n    The content of each document are generally short (should be under 300 words). Metadata should be used to provide additional information, both the key name and the value will be\n    passed to the model.\n    \"\"\"\n\n    data: typing.Dict[str, typing.Any] = pydantic.Field()\n    \"\"\"\n    A relevant document that the model can cite to generate a more accurate reply. Each document is a string-any dictionary.\n    \"\"\"\n\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Unique identifier for this document which will be referenced in citations. If not provided an ID will be automatically generated.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/document_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .document import Document\n\n\nclass DocumentContent(UncheckedBaseModel):\n    \"\"\"\n    Document content.\n    \"\"\"\n\n    document: Document\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_by_type_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings\nfrom .embed_by_type_response_response_type import EmbedByTypeResponseResponseType\nfrom .image import Image\n\n\nclass EmbedByTypeResponse(UncheckedBaseModel):\n    response_type: typing.Optional[EmbedByTypeResponseResponseType] = None\n    id: str\n    embeddings: EmbedByTypeResponseEmbeddings = pydantic.Field()\n    \"\"\"\n    An object with different embedding types. The length of each embedding type array will be the same as the length of the original `texts` array.\n    \"\"\"\n\n    texts: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    The text entries for which embeddings were returned.\n    \"\"\"\n\n    images: typing.Optional[typing.List[Image]] = pydantic.Field(default=None)\n    \"\"\"\n    The image entries for which embeddings were returned.\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_by_type_response_embeddings.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.serialization import FieldMetadata\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass EmbedByTypeResponseEmbeddings(UncheckedBaseModel):\n    \"\"\"\n    An object with different embedding types. The length of each embedding type array will be the same as the length of the original `texts` array.\n    \"\"\"\n\n    float_: typing_extensions.Annotated[\n        typing.Optional[typing.List[typing.List[float]]],\n        FieldMetadata(alias=\"float\"),\n        pydantic.Field(alias=\"float\", description=\"An array of float embeddings.\"),\n    ] = None\n    int8: typing.Optional[typing.List[typing.List[int]]] = pydantic.Field(default=None)\n    \"\"\"\n    An array of signed int8 embeddings. Each value is between -128 and 127.\n    \"\"\"\n\n    uint8: typing.Optional[typing.List[typing.List[int]]] = pydantic.Field(default=None)\n    \"\"\"\n    An array of unsigned int8 embeddings. Each value is between 0 and 255.\n    \"\"\"\n\n    binary: typing.Optional[typing.List[typing.List[int]]] = pydantic.Field(default=None)\n    \"\"\"\n    An array of packed signed binary embeddings. The length of each binary embedding is 1/8 the length of the float embeddings of the provided model. Each value is between -128 and 127.\n    \"\"\"\n\n    ubinary: typing.Optional[typing.List[typing.List[int]]] = pydantic.Field(default=None)\n    \"\"\"\n    An array of packed unsigned binary embeddings. The length of each binary embedding is 1/8 the length of the float embeddings of the provided model. Each value is between 0 and 255.\n    \"\"\"\n\n    base64: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    An array of base64 embeddings. Each string is the result of appending the float embedding bytes together and base64 encoding that.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_by_type_response_response_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbedByTypeResponseResponseType = typing.Union[typing.Literal[\"embeddings_floats\", \"embeddings_by_type\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/embed_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .embed_image_url import EmbedImageUrl\n\n\nclass ImageUrlEmbedContent(UncheckedBaseModel):\n    type: typing.Literal[\"image_url\"] = \"image_url\"\n    image_url: typing.Optional[EmbedImageUrl] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass TextEmbedContent(UncheckedBaseModel):\n    type: typing.Literal[\"text\"] = \"text\"\n    text: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nEmbedContent = typing_extensions.Annotated[\n    typing.Union[ImageUrlEmbedContent, TextEmbedContent], UnionMetadata(discriminant=\"type\")\n]\n"
  },
  {
    "path": "src/cohere/types/embed_floats_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .image import Image\n\n\nclass EmbedFloatsResponse(UncheckedBaseModel):\n    id: str\n    embeddings: typing.List[typing.List[float]] = pydantic.Field()\n    \"\"\"\n    An array of embeddings, where each embedding is an array of floats. The length of the `embeddings` array will be the same as the length of the original `texts` array.\n    \"\"\"\n\n    texts: typing.List[str] = pydantic.Field()\n    \"\"\"\n    The text entries for which embeddings were returned.\n    \"\"\"\n\n    images: typing.Optional[typing.List[Image]] = pydantic.Field(default=None)\n    \"\"\"\n    The image entries for which embeddings were returned.\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_image.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .embed_image_url import EmbedImageUrl\n\n\nclass EmbedImage(UncheckedBaseModel):\n    \"\"\"\n    Image content of the input. Supported with Embed v3.0 and newer models.\n    \"\"\"\n\n    image_url: typing.Optional[EmbedImageUrl] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_image_url.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass EmbedImageUrl(UncheckedBaseModel):\n    \"\"\"\n    Base64 url of image.\n    \"\"\"\n\n    url: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_input.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .embed_content import EmbedContent\n\n\nclass EmbedInput(UncheckedBaseModel):\n    content: typing.List[EmbedContent] = pydantic.Field()\n    \"\"\"\n    An array of objects containing the input data for the model to embed.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_input_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbedInputType = typing.Union[\n    typing.Literal[\"search_document\", \"search_query\", \"classification\", \"clustering\", \"image\"], typing.Any\n]\n"
  },
  {
    "path": "src/cohere/types/embed_job.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport datetime as dt\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .embed_job_status import EmbedJobStatus\nfrom .embed_job_truncate import EmbedJobTruncate\n\n\nclass EmbedJob(UncheckedBaseModel):\n    job_id: str = pydantic.Field()\n    \"\"\"\n    ID of the embed job\n    \"\"\"\n\n    name: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The name of the embed job\n    \"\"\"\n\n    status: EmbedJobStatus = pydantic.Field()\n    \"\"\"\n    The status of the embed job\n    \"\"\"\n\n    created_at: dt.datetime = pydantic.Field()\n    \"\"\"\n    The creation date of the embed job\n    \"\"\"\n\n    input_dataset_id: str = pydantic.Field()\n    \"\"\"\n    ID of the input dataset\n    \"\"\"\n\n    output_dataset_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    ID of the resulting output dataset\n    \"\"\"\n\n    model: str = pydantic.Field()\n    \"\"\"\n    ID of the model used to embed\n    \"\"\"\n\n    truncate: EmbedJobTruncate = pydantic.Field()\n    \"\"\"\n    The truncation option used\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embed_job_status.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbedJobStatus = typing.Union[typing.Literal[\"processing\", \"complete\", \"cancelling\", \"cancelled\", \"failed\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/embed_job_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbedJobTruncate = typing.Union[typing.Literal[\"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/embed_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbedRequestTruncate = typing.Union[typing.Literal[\"NONE\", \"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/embed_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .api_meta import ApiMeta\nfrom .embed_by_type_response_embeddings import EmbedByTypeResponseEmbeddings\nfrom .image import Image\n\n\nclass EmbeddingsFloatsEmbedResponse(UncheckedBaseModel):\n    response_type: typing.Literal[\"embeddings_floats\"] = \"embeddings_floats\"\n    id: str\n    embeddings: typing.List[typing.List[float]]\n    texts: typing.List[str]\n    images: typing.Optional[typing.List[Image]] = None\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass EmbeddingsByTypeEmbedResponse(UncheckedBaseModel):\n    response_type: typing.Literal[\"embeddings_by_type\"] = \"embeddings_by_type\"\n    id: str\n    embeddings: EmbedByTypeResponseEmbeddings\n    texts: typing.Optional[typing.List[str]] = None\n    images: typing.Optional[typing.List[Image]] = None\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nEmbedResponse = typing_extensions.Annotated[\n    typing.Union[EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse],\n    UnionMetadata(discriminant=\"response_type\"),\n]\n"
  },
  {
    "path": "src/cohere/types/embed_text.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass EmbedText(UncheckedBaseModel):\n    \"\"\"\n    Text content of the input.\n    \"\"\"\n\n    text: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/embedding_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nEmbeddingType = typing.Union[typing.Literal[\"float\", \"int8\", \"uint8\", \"binary\", \"ubinary\", \"base64\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/finetune_dataset_metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass FinetuneDatasetMetrics(UncheckedBaseModel):\n    trainable_token_count: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of tokens of valid examples that can be used for training.\n    \"\"\"\n\n    total_examples: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The overall number of examples.\n    \"\"\"\n\n    train_examples: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of training examples.\n    \"\"\"\n\n    train_size_bytes: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The size in bytes of all training examples.\n    \"\"\"\n\n    eval_examples: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Number of evaluation examples.\n    \"\"\"\n\n    eval_size_bytes: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The size in bytes of all eval examples.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/finish_reason.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nFinishReason = typing.Union[\n    typing.Literal[\n        \"COMPLETE\", \"STOP_SEQUENCE\", \"ERROR\", \"ERROR_TOXIC\", \"ERROR_LIMIT\", \"USER_CANCEL\", \"MAX_TOKENS\", \"TIMEOUT\"\n    ],\n    typing.Any,\n]\n"
  },
  {
    "path": "src/cohere/types/generate_request_return_likelihoods.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nGenerateRequestReturnLikelihoods = typing.Union[typing.Literal[\"GENERATION\", \"ALL\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/generate_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nGenerateRequestTruncate = typing.Union[typing.Literal[\"NONE\", \"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/generate_stream_end.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .finish_reason import FinishReason\nfrom .generate_stream_end_response import GenerateStreamEndResponse\nfrom .generate_stream_event import GenerateStreamEvent\n\n\nclass GenerateStreamEnd(GenerateStreamEvent):\n    is_finished: bool\n    finish_reason: typing.Optional[FinishReason] = None\n    response: GenerateStreamEndResponse\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/generate_stream_end_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .single_generation_in_stream import SingleGenerationInStream\n\n\nclass GenerateStreamEndResponse(UncheckedBaseModel):\n    id: str\n    prompt: typing.Optional[str] = None\n    generations: typing.Optional[typing.List[SingleGenerationInStream]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/generate_stream_error.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .finish_reason import FinishReason\nfrom .generate_stream_event import GenerateStreamEvent\n\n\nclass GenerateStreamError(GenerateStreamEvent):\n    index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Refers to the nth generation. Only present when `num_generations` is greater than zero.\n    \"\"\"\n\n    is_finished: bool\n    finish_reason: FinishReason\n    err: str = pydantic.Field()\n    \"\"\"\n    Error message\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/generate_stream_event.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass GenerateStreamEvent(UncheckedBaseModel):\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/generate_stream_request_return_likelihoods.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nGenerateStreamRequestReturnLikelihoods = typing.Union[typing.Literal[\"GENERATION\", \"ALL\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/generate_stream_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nGenerateStreamRequestTruncate = typing.Union[typing.Literal[\"NONE\", \"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/generate_stream_text.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom .generate_stream_event import GenerateStreamEvent\n\n\nclass GenerateStreamText(GenerateStreamEvent):\n    text: str = pydantic.Field()\n    \"\"\"\n    A segment of text of the generation.\n    \"\"\"\n\n    index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Refers to the nth generation. Only present when `num_generations` is greater than zero, and only when text responses are being streamed.\n    \"\"\"\n\n    is_finished: bool\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/generate_streamed_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .finish_reason import FinishReason\nfrom .generate_stream_end_response import GenerateStreamEndResponse\n\n\nclass TextGenerationGenerateStreamedResponse(UncheckedBaseModel):\n    \"\"\"\n    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.\n    \"\"\"\n\n    event_type: typing.Literal[\"text-generation\"] = \"text-generation\"\n    text: str\n    index: typing.Optional[int] = None\n    is_finished: bool\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass StreamEndGenerateStreamedResponse(UncheckedBaseModel):\n    \"\"\"\n    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.\n    \"\"\"\n\n    event_type: typing.Literal[\"stream-end\"] = \"stream-end\"\n    is_finished: bool\n    finish_reason: typing.Optional[FinishReason] = None\n    response: GenerateStreamEndResponse\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass StreamErrorGenerateStreamedResponse(UncheckedBaseModel):\n    \"\"\"\n    Response in content type stream when `stream` is `true` in the request parameters. Generation tokens are streamed with the GenerationStream response. The final response is of type GenerationFinalResponse.\n    \"\"\"\n\n    event_type: typing.Literal[\"stream-error\"] = \"stream-error\"\n    index: typing.Optional[int] = None\n    is_finished: bool\n    finish_reason: FinishReason\n    err: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nGenerateStreamedResponse = typing_extensions.Annotated[\n    typing.Union[\n        TextGenerationGenerateStreamedResponse, StreamEndGenerateStreamedResponse, StreamErrorGenerateStreamedResponse\n    ],\n    UnionMetadata(discriminant=\"event_type\"),\n]\n"
  },
  {
    "path": "src/cohere/types/generation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .single_generation import SingleGeneration\n\n\nclass Generation(UncheckedBaseModel):\n    id: str\n    prompt: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Prompt used for generations.\n    \"\"\"\n\n    generations: typing.List[SingleGeneration] = pydantic.Field()\n    \"\"\"\n    List of generated results\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/get_connector_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .connector import Connector\n\n\nclass GetConnectorResponse(UncheckedBaseModel):\n    connector: Connector\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/get_model_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .compatible_endpoint import CompatibleEndpoint\nfrom .get_model_response_sampling_defaults import GetModelResponseSamplingDefaults\n\n\nclass GetModelResponse(UncheckedBaseModel):\n    \"\"\"\n    Contains information about the model and which API endpoints it can be used with.\n    \"\"\"\n\n    name: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Specify this name in the `model` parameter of API requests to use your chosen model.\n    \"\"\"\n\n    is_deprecated: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Whether the model is deprecated or not.\n    \"\"\"\n\n    endpoints: typing.Optional[typing.List[CompatibleEndpoint]] = pydantic.Field(default=None)\n    \"\"\"\n    The API endpoints that the model is compatible with.\n    \"\"\"\n\n    finetuned: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Whether the model has been fine-tuned or not.\n    \"\"\"\n\n    context_length: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The maximum number of tokens that the model can process in a single request. Note that not all of these tokens are always available due to special tokens and preambles that Cohere has added by default.\n    \"\"\"\n\n    tokenizer_url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Public URL to the tokenizer's configuration file.\n    \"\"\"\n\n    default_endpoints: typing.Optional[typing.List[CompatibleEndpoint]] = pydantic.Field(default=None)\n    \"\"\"\n    The API endpoints that the model is default to.\n    \"\"\"\n\n    features: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    The features that the model supports.\n    \"\"\"\n\n    sampling_defaults: typing.Optional[GetModelResponseSamplingDefaults] = pydantic.Field(default=None)\n    \"\"\"\n    Default sampling parameters for this model when omitted from API requests.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/get_model_response_sampling_defaults.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass GetModelResponseSamplingDefaults(UncheckedBaseModel):\n    \"\"\"\n    Default sampling parameters for this model when omitted from API requests.\n    \"\"\"\n\n    temperature: typing.Optional[float] = None\n    k: typing.Optional[int] = None\n    p: typing.Optional[float] = None\n    frequency_penalty: typing.Optional[float] = None\n    presence_penalty: typing.Optional[float] = None\n    max_tokens_per_doc: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/image.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass Image(UncheckedBaseModel):\n    width: int = pydantic.Field()\n    \"\"\"\n    Width of the image in pixels\n    \"\"\"\n\n    height: int = pydantic.Field()\n    \"\"\"\n    Height of the image in pixels\n    \"\"\"\n\n    format: str = pydantic.Field()\n    \"\"\"\n    Format of the image\n    \"\"\"\n\n    bit_depth: int = pydantic.Field()\n    \"\"\"\n    Bit depth of the image\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/image_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .image_url import ImageUrl\n\n\nclass ImageContent(UncheckedBaseModel):\n    \"\"\"\n    Image content of the message.\n    \"\"\"\n\n    image_url: ImageUrl\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/image_url.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .image_url_detail import ImageUrlDetail\n\n\nclass ImageUrl(UncheckedBaseModel):\n    url: str = pydantic.Field()\n    \"\"\"\n    URL of an image. Can be either a base64 data URI or a web URL.\n    \"\"\"\n\n    detail: typing.Optional[ImageUrlDetail] = pydantic.Field(default=None)\n    \"\"\"\n    Controls the level of detail in image processing. `\"auto\"` is the default and lets the system choose, `\"low\"` is faster but less detailed, and `\"high\"` preserves maximum detail. You can save tokens and speed up responses by using detail: `\"low\"`.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/image_url_detail.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nImageUrlDetail = typing.Union[typing.Literal[\"auto\", \"low\", \"high\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/json_response_format.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.serialization import FieldMetadata\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass JsonResponseFormat(UncheckedBaseModel):\n    schema_: typing_extensions.Annotated[\n        typing.Optional[typing.Dict[str, typing.Any]],\n        FieldMetadata(alias=\"schema\"),\n        pydantic.Field(\n            alias=\"schema\",\n            description='A JSON schema object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](https://docs.cohere.com/docs/structured-outputs-json#schema-constraints) for more information.\\nExample (required name and age object):\\n```json\\n{\\n  \"type\": \"object\",\\n  \"properties\": {\\n    \"name\": {\"type\": \"string\"},\\n    \"age\": {\"type\": \"integer\"}\\n  },\\n  \"required\": [\"name\", \"age\"]\\n}\\n```\\n\\n**Note**: This field must not be specified when the `type` is set to `\"text\"`.',\n        ),\n    ] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/json_response_format_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass JsonResponseFormatV2(UncheckedBaseModel):\n    json_schema: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None)\n    \"\"\"\n    A [JSON schema](https://json-schema.org/overview/what-is-jsonschema) object that the output will adhere to. There are some restrictions we have on the schema, refer to [our guide](https://docs.cohere.com/docs/structured-outputs-json#schema-constraints) for more information.\n    Example (required name and age object):\n    ```json\n    {\n      \"type\": \"object\",\n      \"properties\": {\n        \"name\": {\"type\": \"string\"},\n        \"age\": {\"type\": \"integer\"}\n      },\n      \"required\": [\"name\", \"age\"]\n    }\n    ```\n    \n    **Note**: This field must not be specified when the `type` is set to `\"text\"`.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/label_metric.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass LabelMetric(UncheckedBaseModel):\n    total_examples: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Total number of examples for this label\n    \"\"\"\n\n    label: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    value of the label\n    \"\"\"\n\n    samples: typing.Optional[typing.List[str]] = pydantic.Field(default=None)\n    \"\"\"\n    samples for this label\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/list_connectors_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .connector import Connector\n\n\nclass ListConnectorsResponse(UncheckedBaseModel):\n    connectors: typing.List[Connector]\n    total_count: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    Total number of connectors.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/list_embed_job_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .embed_job import EmbedJob\n\n\nclass ListEmbedJobResponse(UncheckedBaseModel):\n    embed_jobs: typing.Optional[typing.List[EmbedJob]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/list_models_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .get_model_response import GetModelResponse\n\n\nclass ListModelsResponse(UncheckedBaseModel):\n    models: typing.List[GetModelResponse]\n    next_page_token: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    A token to retrieve the next page of results. Provide in the page_token parameter of the next request.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/logprob_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass LogprobItem(UncheckedBaseModel):\n    text: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The text chunk for which the log probabilities was calculated.\n    \"\"\"\n\n    token_ids: typing.List[int] = pydantic.Field()\n    \"\"\"\n    The token ids of each token used to construct the text chunk.\n    \"\"\"\n\n    logprobs: typing.Optional[typing.List[float]] = pydantic.Field(default=None)\n    \"\"\"\n    The log probability of each token used to construct the text chunk.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/message.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .tool_call import ToolCall\nfrom .tool_result import ToolResult\n\n\nclass ChatbotMessage(UncheckedBaseModel):\n    role: typing.Literal[\"CHATBOT\"] = \"CHATBOT\"\n    message: str\n    tool_calls: typing.Optional[typing.List[ToolCall]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass SystemMessage(UncheckedBaseModel):\n    role: typing.Literal[\"SYSTEM\"] = \"SYSTEM\"\n    message: str\n    tool_calls: typing.Optional[typing.List[ToolCall]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass UserMessage(UncheckedBaseModel):\n    role: typing.Literal[\"USER\"] = \"USER\"\n    message: str\n    tool_calls: typing.Optional[typing.List[ToolCall]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolMessage(UncheckedBaseModel):\n    role: typing.Literal[\"TOOL\"] = \"TOOL\"\n    tool_results: typing.Optional[typing.List[ToolResult]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nMessage = typing_extensions.Annotated[\n    typing.Union[ChatbotMessage, SystemMessage, UserMessage, ToolMessage], UnionMetadata(discriminant=\"role\")\n]\n"
  },
  {
    "path": "src/cohere/types/metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .finetune_dataset_metrics import FinetuneDatasetMetrics\n\n\nclass Metrics(UncheckedBaseModel):\n    finetune_dataset_metrics: typing.Optional[FinetuneDatasetMetrics] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/non_streamed_chat_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .chat_citation import ChatCitation\nfrom .chat_document import ChatDocument\nfrom .chat_search_query import ChatSearchQuery\nfrom .chat_search_result import ChatSearchResult\nfrom .finish_reason import FinishReason\nfrom .message import Message\nfrom .tool_call import ToolCall\n\n\nclass NonStreamedChatResponse(UncheckedBaseModel):\n    text: str = pydantic.Field()\n    \"\"\"\n    Contents of the reply generated by the model.\n    \"\"\"\n\n    generation_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Unique identifier for the generated reply. Useful for submitting feedback.\n    \"\"\"\n\n    response_id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Unique identifier for the response.\n    \"\"\"\n\n    citations: typing.Optional[typing.List[ChatCitation]] = pydantic.Field(default=None)\n    \"\"\"\n    Inline citations for the generated reply.\n    \"\"\"\n\n    documents: typing.Optional[typing.List[ChatDocument]] = pydantic.Field(default=None)\n    \"\"\"\n    Documents seen by the model when generating the reply.\n    \"\"\"\n\n    is_search_required: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Denotes that a search for documents is required during the RAG flow.\n    \"\"\"\n\n    search_queries: typing.Optional[typing.List[ChatSearchQuery]] = pydantic.Field(default=None)\n    \"\"\"\n    Generated search queries, meant to be used as part of the RAG flow.\n    \"\"\"\n\n    search_results: typing.Optional[typing.List[ChatSearchResult]] = pydantic.Field(default=None)\n    \"\"\"\n    Documents retrieved from each of the conducted searches.\n    \"\"\"\n\n    finish_reason: typing.Optional[FinishReason] = None\n    tool_calls: typing.Optional[typing.List[ToolCall]] = None\n    chat_history: typing.Optional[typing.List[Message]] = pydantic.Field(default=None)\n    \"\"\"\n    A list of previous messages between the user and the model, meant to give the model conversational context for responding to the user's `message`.\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/o_auth_authorize_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass OAuthAuthorizeResponse(UncheckedBaseModel):\n    redirect_url: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The OAuth 2.0 redirect url. Redirect the user to this url to authorize the connector.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/parse_info.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ParseInfo(UncheckedBaseModel):\n    separator: typing.Optional[str] = None\n    delimiter: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/rerank_document.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nRerankDocument = typing.Dict[str, str]\n"
  },
  {
    "path": "src/cohere/types/rerank_request_documents_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .rerank_document import RerankDocument\n\nRerankRequestDocumentsItem = typing.Union[str, RerankDocument]\n"
  },
  {
    "path": "src/cohere/types/rerank_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\nfrom .rerank_response_results_item import RerankResponseResultsItem\n\n\nclass RerankResponse(UncheckedBaseModel):\n    id: typing.Optional[str] = None\n    results: typing.List[RerankResponseResultsItem] = pydantic.Field()\n    \"\"\"\n    An ordered list of ranked documents\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/rerank_response_results_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .rerank_response_results_item_document import RerankResponseResultsItemDocument\n\n\nclass RerankResponseResultsItem(UncheckedBaseModel):\n    document: typing.Optional[RerankResponseResultsItemDocument] = pydantic.Field(default=None)\n    \"\"\"\n    If `return_documents` is set as `false` this will return none, if `true` it will return the documents passed in\n    \"\"\"\n\n    index: int = pydantic.Field()\n    \"\"\"\n    Corresponds to the index in the original list of documents to which the ranked document belongs. (i.e. if the first value in the `results` object has an `index` value of 3, it means in the list of documents passed in, the document at `index=3` had the highest relevance)\n    \"\"\"\n\n    relevance_score: float = pydantic.Field()\n    \"\"\"\n    Relevance scores are normalized to be in the range `[0, 1]`. Scores close to `1` indicate a high relevance to the query, and scores closer to `0` indicate low relevance. It is not accurate to assume a score of 0.9 means the document is 2x more relevant than a document with a score of 0.45\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/rerank_response_results_item_document.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass RerankResponseResultsItemDocument(UncheckedBaseModel):\n    \"\"\"\n    If `return_documents` is set as `false` this will return none, if `true` it will return the documents passed in\n    \"\"\"\n\n    text: str = pydantic.Field()\n    \"\"\"\n    The text of the document to rerank\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/reranker_data_metrics.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass RerankerDataMetrics(UncheckedBaseModel):\n    num_train_queries: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of training queries.\n    \"\"\"\n\n    num_train_relevant_passages: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all relevant passages of valid training examples.\n    \"\"\"\n\n    num_train_hard_negatives: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all hard negatives of valid training examples.\n    \"\"\"\n\n    num_eval_queries: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The number of evaluation queries.\n    \"\"\"\n\n    num_eval_relevant_passages: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all relevant passages of valid eval examples.\n    \"\"\"\n\n    num_eval_hard_negatives: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The sum of all hard negatives of valid eval examples.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/response_format.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.serialization import FieldMetadata\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\n\n\nclass TextResponseFormat(UncheckedBaseModel):\n    \"\"\"\n    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.\n\n    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ \"type\": \"json_object\" }`.\n\n    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.\n\n    **Note**: When using  `{ \"type\": \"json_object\" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _\"Generate a JSON ...\"_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.\n    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).\n    \"\"\"\n\n    type: typing.Literal[\"text\"] = \"text\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass JsonObjectResponseFormat(UncheckedBaseModel):\n    \"\"\"\n    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R 03-2024](https://docs.cohere.com/docs/command-r), [Command R+ 04-2024](https://docs.cohere.com/docs/command-r-plus) and newer models.\n\n    The model can be forced into outputting JSON objects (with up to 5 levels of nesting) by setting `{ \"type\": \"json_object\" }`.\n\n    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.\n\n    **Note**: When using  `{ \"type\": \"json_object\" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _\"Generate a JSON ...\"_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.\n    **Limitation**: The parameter is not supported in RAG mode (when any of `connectors`, `documents`, `tools`, `tool_results` are provided).\n    \"\"\"\n\n    type: typing.Literal[\"json_object\"] = \"json_object\"\n    schema_: typing_extensions.Annotated[\n        typing.Optional[typing.Dict[str, typing.Any]], FieldMetadata(alias=\"schema\"), pydantic.Field(alias=\"schema\")\n    ] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nResponseFormat = typing_extensions.Annotated[\n    typing.Union[TextResponseFormat, JsonObjectResponseFormat], UnionMetadata(discriminant=\"type\")\n]\n"
  },
  {
    "path": "src/cohere/types/response_format_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\n\n\nclass TextResponseFormatV2(UncheckedBaseModel):\n    \"\"\"\n    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/v2/docs/command-r), [Command R+](https://docs.cohere.com/v2/docs/command-r-plus) and newer models.\n\n    The model can be forced into outputting JSON objects by setting `{ \"type\": \"json_object\" }`.\n\n    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.\n\n    **Note**: When using  `{ \"type\": \"json_object\" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _\"Generate a JSON ...\"_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.\n\n    **Note**: When `json_schema` is not specified, the generated object can have up to 5 layers of nesting.\n\n    **Limitation**: The parameter is not supported when used in combinations with the `documents` or `tools` parameters.\n    \"\"\"\n\n    type: typing.Literal[\"text\"] = \"text\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass JsonObjectResponseFormatV2(UncheckedBaseModel):\n    \"\"\"\n    Configuration for forcing the model output to adhere to the specified format. Supported on [Command R](https://docs.cohere.com/v2/docs/command-r), [Command R+](https://docs.cohere.com/v2/docs/command-r-plus) and newer models.\n\n    The model can be forced into outputting JSON objects by setting `{ \"type\": \"json_object\" }`.\n\n    A [JSON Schema](https://json-schema.org/) can optionally be provided, to ensure a specific structure.\n\n    **Note**: When using  `{ \"type\": \"json_object\" }` your `message` should always explicitly instruct the model to generate a JSON (eg: _\"Generate a JSON ...\"_) . Otherwise the model may end up getting stuck generating an infinite stream of characters and eventually run out of context length.\n\n    **Note**: When `json_schema` is not specified, the generated object can have up to 5 layers of nesting.\n\n    **Limitation**: The parameter is not supported when used in combinations with the `documents` or `tools` parameters.\n    \"\"\"\n\n    type: typing.Literal[\"json_object\"] = \"json_object\"\n    json_schema: typing.Optional[typing.Dict[str, typing.Any]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nResponseFormatV2 = typing_extensions.Annotated[\n    typing.Union[TextResponseFormatV2, JsonObjectResponseFormatV2], UnionMetadata(discriminant=\"type\")\n]\n"
  },
  {
    "path": "src/cohere/types/single_generation.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .single_generation_token_likelihoods_item import SingleGenerationTokenLikelihoodsItem\n\n\nclass SingleGeneration(UncheckedBaseModel):\n    id: str\n    text: str\n    index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Refers to the nth generation. Only present when `num_generations` is greater than zero.\n    \"\"\"\n\n    likelihood: typing.Optional[float] = None\n    token_likelihoods: typing.Optional[typing.List[SingleGenerationTokenLikelihoodsItem]] = pydantic.Field(default=None)\n    \"\"\"\n    Only returned if `return_likelihoods` is set to `GENERATION` or `ALL`. The likelihood refers to the average log-likelihood of the entire specified string, which is useful for [evaluating the performance of your model](likelihood-eval), especially if you've created a [custom model](https://docs.cohere.com/docs/training-custom-models). Individual token likelihoods provide the log-likelihood of each token. The first token will not have a likelihood.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/single_generation_in_stream.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .finish_reason import FinishReason\n\n\nclass SingleGenerationInStream(UncheckedBaseModel):\n    id: str\n    text: str = pydantic.Field()\n    \"\"\"\n    Full text of the generation.\n    \"\"\"\n\n    index: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    Refers to the nth generation. Only present when `num_generations` is greater than zero.\n    \"\"\"\n\n    finish_reason: FinishReason\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/single_generation_token_likelihoods_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass SingleGenerationTokenLikelihoodsItem(UncheckedBaseModel):\n    token: str\n    likelihood: float\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/source.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\n\n\nclass ToolSource(UncheckedBaseModel):\n    \"\"\"\n    A source object containing information about the source of the data cited.\n    \"\"\"\n\n    type: typing.Literal[\"tool\"] = \"tool\"\n    id: typing.Optional[str] = None\n    tool_output: typing.Optional[typing.Dict[str, typing.Any]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass DocumentSource(UncheckedBaseModel):\n    \"\"\"\n    A source object containing information about the source of the data cited.\n    \"\"\"\n\n    type: typing.Literal[\"document\"] = \"document\"\n    id: typing.Optional[str] = None\n    document: typing.Optional[typing.Dict[str, typing.Any]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nSource = typing_extensions.Annotated[typing.Union[ToolSource, DocumentSource], UnionMetadata(discriminant=\"type\")]\n"
  },
  {
    "path": "src/cohere/types/streamed_chat_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .chat_citation import ChatCitation\nfrom .chat_document import ChatDocument\nfrom .chat_search_query import ChatSearchQuery\nfrom .chat_search_result import ChatSearchResult\nfrom .chat_stream_end_event_finish_reason import ChatStreamEndEventFinishReason\nfrom .non_streamed_chat_response import NonStreamedChatResponse\nfrom .tool_call import ToolCall\nfrom .tool_call_delta import ToolCallDelta\n\n\nclass StreamStartStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"stream-start\"] = \"stream-start\"\n    generation_id: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass SearchQueriesGenerationStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"search-queries-generation\"] = \"search-queries-generation\"\n    search_queries: typing.List[ChatSearchQuery]\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass SearchResultsStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"search-results\"] = \"search-results\"\n    search_results: typing.Optional[typing.List[ChatSearchResult]] = None\n    documents: typing.Optional[typing.List[ChatDocument]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass TextGenerationStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"text-generation\"] = \"text-generation\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass CitationGenerationStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"citation-generation\"] = \"citation-generation\"\n    citations: typing.List[ChatCitation]\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolCallsGenerationStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"tool-calls-generation\"] = \"tool-calls-generation\"\n    text: typing.Optional[str] = None\n    tool_calls: typing.List[ToolCall]\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass StreamEndStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"stream-end\"] = \"stream-end\"\n    finish_reason: ChatStreamEndEventFinishReason\n    response: NonStreamedChatResponse\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolCallsChunkStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"tool-calls-chunk\"] = \"tool-calls-chunk\"\n    tool_call_delta: ToolCallDelta\n    text: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass DebugStreamedChatResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    event_type: typing.Literal[\"debug\"] = \"debug\"\n    prompt: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nStreamedChatResponse = typing_extensions.Annotated[\n    typing.Union[\n        StreamStartStreamedChatResponse,\n        SearchQueriesGenerationStreamedChatResponse,\n        SearchResultsStreamedChatResponse,\n        TextGenerationStreamedChatResponse,\n        CitationGenerationStreamedChatResponse,\n        ToolCallsGenerationStreamedChatResponse,\n        StreamEndStreamedChatResponse,\n        ToolCallsChunkStreamedChatResponse,\n        DebugStreamedChatResponse,\n    ],\n    UnionMetadata(discriminant=\"event_type\"),\n]\n"
  },
  {
    "path": "src/cohere/types/summarize_request_extractiveness.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nSummarizeRequestExtractiveness = typing.Union[typing.Literal[\"low\", \"medium\", \"high\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/summarize_request_format.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nSummarizeRequestFormat = typing.Union[typing.Literal[\"paragraph\", \"bullets\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/summarize_request_length.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nSummarizeRequestLength = typing.Union[typing.Literal[\"short\", \"medium\", \"long\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/summarize_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\n\n\nclass SummarizeResponse(UncheckedBaseModel):\n    id: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Generated ID for the summary\n    \"\"\"\n\n    summary: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Generated summary for the text\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/system_message_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .system_message_v2content import SystemMessageV2Content\n\n\nclass SystemMessageV2(UncheckedBaseModel):\n    \"\"\"\n    A message from the system.\n    \"\"\"\n\n    content: SystemMessageV2Content\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/system_message_v2content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .system_message_v2content_one_item import SystemMessageV2ContentOneItem\n\nSystemMessageV2Content = typing.Union[str, typing.List[SystemMessageV2ContentOneItem]]\n"
  },
  {
    "path": "src/cohere/types/system_message_v2content_one_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass TextSystemMessageV2ContentOneItem(UncheckedBaseModel):\n    type: typing.Literal[\"text\"] = \"text\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nSystemMessageV2ContentOneItem = TextSystemMessageV2ContentOneItem\n"
  },
  {
    "path": "src/cohere/types/thinking.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .thinking_type import ThinkingType\n\n\nclass Thinking(UncheckedBaseModel):\n    \"\"\"\n    Configuration for [reasoning features](https://docs.cohere.com/docs/reasoning).\n    \"\"\"\n\n    type: ThinkingType = pydantic.Field()\n    \"\"\"\n    Reasoning is enabled by default for models that support it, but can be turned off by setting `\"type\": \"disabled\"`.\n    \"\"\"\n\n    token_budget: typing.Optional[int] = pydantic.Field(default=None)\n    \"\"\"\n    The maximum number of tokens the model can use for thinking, which must be set to a positive integer.\n    The model will stop thinking if it reaches the thinking token budget and will proceed with the response.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/thinking_type.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nThinkingType = typing.Union[typing.Literal[\"enabled\", \"disabled\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/types/tokenize_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .api_meta import ApiMeta\n\n\nclass TokenizeResponse(UncheckedBaseModel):\n    tokens: typing.List[int] = pydantic.Field()\n    \"\"\"\n    An array of tokens, where each token is an integer.\n    \"\"\"\n\n    token_strings: typing.List[str]\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_parameter_definitions_value import ToolParameterDefinitionsValue\n\n\nclass Tool(UncheckedBaseModel):\n    name: str = pydantic.Field()\n    \"\"\"\n    The name of the tool to be called. Valid names contain only the characters `a-z`, `A-Z`, `0-9`, `_` and must not begin with a digit.\n    \"\"\"\n\n    description: str = pydantic.Field()\n    \"\"\"\n    The description of what the tool does, the model uses the description to choose when and how to call the function.\n    \"\"\"\n\n    parameter_definitions: typing.Optional[typing.Dict[str, ToolParameterDefinitionsValue]] = pydantic.Field(\n        default=None\n    )\n    \"\"\"\n    The input parameters of the tool. Accepts a dictionary where the key is the name of the parameter and the value is the parameter spec. Valid parameter names contain only the characters `a-z`, `A-Z`, `0-9`, `_` and must not begin with a digit.\n    ```\n    {\n      \"my_param\": {\n        \"description\": <string>,\n        \"type\": <string>, // any python data type, such as 'str', 'bool'\n        \"required\": <boolean>\n      }\n    }\n    ```\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_call.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ToolCall(UncheckedBaseModel):\n    \"\"\"\n    Contains the tool calls generated by the model. Use it to invoke your tools.\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    Name of the tool to call.\n    \"\"\"\n\n    parameters: typing.Dict[str, typing.Any] = pydantic.Field()\n    \"\"\"\n    The name and value of the parameters to use when invoking a tool.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_call_delta.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ToolCallDelta(UncheckedBaseModel):\n    \"\"\"\n    Contains the chunk of the tool call generation in the stream.\n    \"\"\"\n\n    name: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Name of the tool call\n    \"\"\"\n\n    index: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    Index of the tool call generated\n    \"\"\"\n\n    parameters: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Chunk of the tool parameters\n    \"\"\"\n\n    text: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    Chunk of the tool plan text\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_call_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_call_v2function import ToolCallV2Function\n\n\nclass ToolCallV2(UncheckedBaseModel):\n    \"\"\"\n    An array of tool calls to be made.\n    \"\"\"\n\n    id: str\n    type: typing.Literal[\"function\"] = \"function\"\n    function: typing.Optional[ToolCallV2Function] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_call_v2function.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ToolCallV2Function(UncheckedBaseModel):\n    name: typing.Optional[str] = None\n    arguments: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom .document import Document\n\n\nclass TextToolContent(UncheckedBaseModel):\n    \"\"\"\n    A content block which contains information about the content of a tool result\n    \"\"\"\n\n    type: typing.Literal[\"text\"] = \"text\"\n    text: str\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass DocumentToolContent(UncheckedBaseModel):\n    \"\"\"\n    A content block which contains information about the content of a tool result\n    \"\"\"\n\n    type: typing.Literal[\"document\"] = \"document\"\n    document: Document\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nToolContent = typing_extensions.Annotated[\n    typing.Union[TextToolContent, DocumentToolContent], UnionMetadata(discriminant=\"type\")\n]\n"
  },
  {
    "path": "src/cohere/types/tool_message_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_message_v2content import ToolMessageV2Content\n\n\nclass ToolMessageV2(UncheckedBaseModel):\n    \"\"\"\n    A message with Tool outputs.\n    \"\"\"\n\n    tool_call_id: str = pydantic.Field()\n    \"\"\"\n    The id of the associated tool call that has provided the given content\n    \"\"\"\n\n    content: ToolMessageV2Content = pydantic.Field()\n    \"\"\"\n    Outputs from a tool. The content should formatted as a JSON object string, or a list of tool content blocks\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_message_v2content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .tool_content import ToolContent\n\nToolMessageV2Content = typing.Union[str, typing.List[ToolContent]]\n"
  },
  {
    "path": "src/cohere/types/tool_parameter_definitions_value.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ToolParameterDefinitionsValue(UncheckedBaseModel):\n    description: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The description of the parameter.\n    \"\"\"\n\n    type: str = pydantic.Field()\n    \"\"\"\n    The type of the parameter. Must be a valid Python type.\n    \"\"\"\n\n    required: typing.Optional[bool] = pydantic.Field(default=None)\n    \"\"\"\n    Denotes whether the parameter is always present (required) or not. Defaults to not required.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_result.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_call import ToolCall\n\n\nclass ToolResult(UncheckedBaseModel):\n    call: ToolCall\n    outputs: typing.List[typing.Dict[str, typing.Any]]\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .tool_v2function import ToolV2Function\n\n\nclass ToolV2(UncheckedBaseModel):\n    type: typing.Literal[\"function\"] = \"function\"\n    function: typing.Optional[ToolV2Function] = pydantic.Field(default=None)\n    \"\"\"\n    The function to be executed.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/tool_v2function.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass ToolV2Function(UncheckedBaseModel):\n    \"\"\"\n    The function to be executed.\n    \"\"\"\n\n    name: str = pydantic.Field()\n    \"\"\"\n    The name of the function.\n    \"\"\"\n\n    description: typing.Optional[str] = pydantic.Field(default=None)\n    \"\"\"\n    The description of the function.\n    \"\"\"\n\n    parameters: typing.Dict[str, typing.Any] = pydantic.Field()\n    \"\"\"\n    The parameters of the function as a JSON schema.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/update_connector_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .connector import Connector\n\n\nclass UpdateConnectorResponse(UncheckedBaseModel):\n    connector: Connector\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/usage.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .usage_billed_units import UsageBilledUnits\nfrom .usage_tokens import UsageTokens\n\n\nclass Usage(UncheckedBaseModel):\n    billed_units: typing.Optional[UsageBilledUnits] = None\n    tokens: typing.Optional[UsageTokens] = None\n    cached_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of prompt tokens that hit the inference cache.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/usage_billed_units.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass UsageBilledUnits(UncheckedBaseModel):\n    input_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed input tokens.\n    \"\"\"\n\n    output_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed output tokens.\n    \"\"\"\n\n    search_units: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed search units.\n    \"\"\"\n\n    classifications: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of billed classifications units.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/usage_tokens.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\n\n\nclass UsageTokens(UncheckedBaseModel):\n    input_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of tokens used as input to the model.\n    \"\"\"\n\n    output_tokens: typing.Optional[float] = pydantic.Field(default=None)\n    \"\"\"\n    The number of tokens produced by the model.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/user_message_v2.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ..core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ..core.unchecked_base_model import UncheckedBaseModel\nfrom .user_message_v2content import UserMessageV2Content\n\n\nclass UserMessageV2(UncheckedBaseModel):\n    \"\"\"\n    A message from the user.\n    \"\"\"\n\n    content: UserMessageV2Content = pydantic.Field()\n    \"\"\"\n    The content of the message. This can be a string or a list of content blocks.\n    If a string is provided, it will be treated as a text content block.\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/types/user_message_v2content.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom .content import Content\n\nUserMessageV2Content = typing.Union[str, typing.List[Content]]\n"
  },
  {
    "path": "src/cohere/utils.py",
    "content": "import asyncio\nimport csv\nimport json\nimport time\nimport typing\nfrom typing import Optional\n\nimport requests\nfrom fastavro import parse_schema, reader, writer\n\nfrom . import EmbedResponse, EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse, ApiMeta, \\\n    EmbedByTypeResponseEmbeddings, ApiMetaBilledUnits, EmbedJob, CreateEmbedJobResponse, Dataset\nfrom .datasets import DatasetsCreateResponse, DatasetsGetResponse\nfrom .overrides import get_fields\n\n# Note: utils.py does NOT call run_overrides() itself - that's done in client.py\n# which imports utils.py. This ensures overrides are applied when client is used.\n\n\ndef get_terminal_states():\n    return get_success_states() | get_failed_states()\n\n\ndef get_success_states():\n    return {\"complete\", \"validated\"}\n\n\ndef get_failed_states():\n    return {\"unknown\", \"failed\", \"skipped\", \"cancelled\", \"failed\"}\n\n\ndef get_id(\n        awaitable: typing.Union[CreateEmbedJobResponse, DatasetsCreateResponse, EmbedJob, DatasetsGetResponse]):\n    return getattr(awaitable, \"job_id\", None) or getattr(awaitable, \"id\", None) or getattr(\n        getattr(awaitable, \"dataset\", None), \"id\", None)\n\n\ndef get_validation_status(awaitable: typing.Union[EmbedJob, DatasetsGetResponse]):\n    return getattr(awaitable, \"status\", None) or getattr(getattr(awaitable, \"dataset\", None), \"validation_status\", None)\n\n\ndef get_job(cohere: typing.Any,\n            awaitable: typing.Union[CreateEmbedJobResponse, DatasetsCreateResponse, EmbedJob, DatasetsGetResponse]) -> \\\n        typing.Union[\n            EmbedJob, DatasetsGetResponse]:\n    if awaitable.__class__.__name__ == \"EmbedJob\" or awaitable.__class__.__name__ == \"CreateEmbedJobResponse\":\n        return cohere.embed_jobs.get(id=get_id(awaitable))\n    elif awaitable.__class__.__name__ == \"DatasetsGetResponse\" or awaitable.__class__.__name__ == \"DatasetsCreateResponse\":\n        return cohere.datasets.get(id=get_id(awaitable))\n    else:\n        raise ValueError(f\"Unexpected awaitable type {awaitable}\")\n\n\nasync def async_get_job(cohere: typing.Any, awaitable: typing.Union[CreateEmbedJobResponse, DatasetsCreateResponse]) -> \\\n        typing.Union[\n            EmbedJob, DatasetsGetResponse]:\n    if awaitable.__class__.__name__ == \"EmbedJob\" or awaitable.__class__.__name__ == \"CreateEmbedJobResponse\":\n        return await cohere.embed_jobs.get(id=get_id(awaitable))\n    elif awaitable.__class__.__name__ == \"DatasetsGetResponse\" or awaitable.__class__.__name__ == \"DatasetsCreateResponse\":\n        return await cohere.datasets.get(id=get_id(awaitable))\n    else:\n        raise ValueError(f\"Unexpected awaitable type {awaitable}\")\n\n\ndef get_failure_reason(job: typing.Union[EmbedJob, DatasetsGetResponse]) -> Optional[str]:\n    if isinstance(job, EmbedJob):\n        return f\"Embed job {job.job_id} failed with status {job.status}\"\n    elif isinstance(job, DatasetsGetResponse):\n        return f\"Dataset creation failed with status {job.dataset.validation_status} and error : {job.dataset.validation_error}\"\n    return None\n\n\n@typing.overload\ndef wait(\n        cohere: typing.Any,\n        awaitable: CreateEmbedJobResponse,\n        timeout: Optional[float] = None,\n        interval: float = 10,\n) -> EmbedJob:\n    ...\n\n\n@typing.overload\ndef wait(\n        cohere: typing.Any,\n        awaitable: DatasetsCreateResponse,\n        timeout: Optional[float] = None,\n        interval: float = 10,\n) -> DatasetsGetResponse:\n    ...\n\n\ndef wait(\n        cohere: typing.Any,\n        awaitable: typing.Union[CreateEmbedJobResponse, DatasetsCreateResponse],\n        timeout: Optional[float] = None,\n        interval: float = 2,\n) -> typing.Union[EmbedJob, DatasetsGetResponse]:\n    start_time = time.time()\n    terminal_states = get_terminal_states()\n    failed_states = get_failed_states()\n\n    job = get_job(cohere, awaitable)\n    while get_validation_status(job) not in terminal_states:\n        if timeout is not None and time.time() - start_time > timeout:\n            raise TimeoutError(f\"wait timed out after {timeout} seconds\")\n\n        time.sleep(interval)\n        print(\"...\")\n\n        job = get_job(cohere, awaitable)\n\n    if get_validation_status(job) in failed_states:\n        raise Exception(get_failure_reason(job))\n\n    return job\n\n\n@typing.overload\nasync def async_wait(\n        cohere: typing.Any,\n        awaitable: CreateEmbedJobResponse,\n        timeout: Optional[float] = None,\n        interval: float = 10,\n) -> EmbedJob:\n    ...\n\n\n@typing.overload\nasync def async_wait(\n        cohere: typing.Any,\n        awaitable: DatasetsCreateResponse,\n        timeout: Optional[float] = None,\n        interval: float = 10,\n) -> DatasetsGetResponse:\n    ...\n\n\nasync def async_wait(\n        cohere: typing.Any,\n        awaitable: typing.Union[CreateEmbedJobResponse, DatasetsCreateResponse],\n        timeout: Optional[float] = None,\n        interval: float = 10,\n) -> typing.Union[EmbedJob, DatasetsGetResponse]:\n    start_time = time.time()\n    terminal_states = get_terminal_states()\n    failed_states = get_failed_states()\n\n    job = await async_get_job(cohere, awaitable)\n    while get_validation_status(job) not in terminal_states:\n        if timeout is not None and time.time() - start_time > timeout:\n            raise TimeoutError(f\"wait timed out after {timeout} seconds\")\n\n        await asyncio.sleep(interval)\n        print(\"...\")\n\n        job = await async_get_job(cohere, awaitable)\n\n    if get_validation_status(job) in failed_states:\n        raise Exception(get_failure_reason(job))\n\n    return job\n\n\ndef sum_fields_if_not_none(obj: typing.Any, field: str) -> Optional[int]:\n    non_none = [getattr(obj, field) for obj in obj if getattr(obj, field) is not None]\n    return sum(non_none) if non_none else None\n\n\ndef merge_meta_field(metas: typing.List[ApiMeta]) -> ApiMeta:\n    api_version = metas[0].api_version if metas else None\n    billed_units = [meta.billed_units for meta in metas]\n    input_tokens = sum_fields_if_not_none(billed_units, \"input_tokens\")\n    output_tokens = sum_fields_if_not_none(billed_units, \"output_tokens\")\n    search_units = sum_fields_if_not_none(billed_units, \"search_units\")\n    classifications = sum_fields_if_not_none(billed_units, \"classifications\")\n    warnings = {warning for meta in metas if meta.warnings for warning in meta.warnings}\n    return ApiMeta(\n        api_version=api_version,\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=input_tokens,\n            output_tokens=output_tokens,\n            search_units=search_units,\n            classifications=classifications\n        ),\n        warnings=list(warnings)\n    )\n\n\ndef merge_embed_responses(responses: typing.List[EmbedResponse]) -> EmbedResponse:\n    meta = merge_meta_field([response.meta for response in responses if response.meta])\n    response_id = \", \".join(response.id for response in responses)\n    texts = [\n        text\n        for response in responses\n        if response.texts is not None\n        for text in response.texts\n    ]\n\n    if responses[0].response_type == \"embeddings_floats\":\n        embeddings_floats = typing.cast(typing.List[EmbeddingsFloatsEmbedResponse], responses)\n\n        embeddings = [\n            embedding\n            for embeddings_floats in embeddings_floats\n            for embedding in embeddings_floats.embeddings\n        ]\n\n        return EmbeddingsFloatsEmbedResponse(\n            response_type=\"embeddings_floats\",\n            id=response_id,\n            texts=texts,\n            embeddings=embeddings,\n            meta=meta\n        )\n    else:\n        embeddings_type = typing.cast(typing.List[EmbeddingsByTypeEmbedResponse], responses)\n\n        embeddings_by_type = [\n            response.embeddings\n            for response in embeddings_type\n        ]\n\n        # only get set keys from the pydantic model (i.e. exclude fields that are set to 'None')\n        fields = [x for x in get_fields(embeddings_type[0].embeddings) if getattr(embeddings_type[0].embeddings, x) is not None]\n\n        merged_dicts = {\n            field: [\n                embedding\n                for embedding_by_type in embeddings_by_type\n                for embedding in getattr(embedding_by_type, field)\n            ]\n            for field in fields\n        }\n\n        embeddings_by_type_merged = EmbedByTypeResponseEmbeddings.parse_obj(merged_dicts)\n\n        return EmbeddingsByTypeEmbedResponse(\n            response_type=\"embeddings_by_type\",\n            id=response_id,\n            embeddings=embeddings_by_type_merged,\n            texts=texts,\n            meta=meta\n        )\n\n\nsupported_formats = [\"jsonl\", \"csv\", \"avro\"]\n\n\ndef save_avro(dataset: Dataset, filepath: str):\n    if not dataset.schema_:\n        raise ValueError(\"Dataset does not have a schema\")\n    schema = parse_schema(json.loads(dataset.schema_))\n    with open(filepath, \"wb\") as outfile:\n        writer(outfile, schema, dataset_generator(dataset))\n\n\ndef save_jsonl(dataset: Dataset, filepath: str):\n    with open(filepath, \"w\") as outfile:\n        for data in dataset_generator(dataset):\n            json.dump(data, outfile)\n            outfile.write(\"\\n\")\n\n\ndef save_csv(dataset: Dataset, filepath: str):\n    with open(filepath, \"w\") as outfile:\n        for i, data in enumerate(dataset_generator(dataset)):\n            if i == 0:\n                writer = csv.DictWriter(outfile, fieldnames=list(data.keys()))\n                writer.writeheader()\n            writer.writerow(data)\n\n\ndef dataset_generator(dataset: Dataset):\n    if not dataset.dataset_parts:\n        raise ValueError(\"Dataset does not have dataset_parts\")\n    for part in dataset.dataset_parts:\n        if not part.url:\n            raise ValueError(\"Dataset part does not have a url\")\n        resp = requests.get(part.url, stream=True)\n        for record in reader(resp.raw): # type: ignore\n            yield record\n\n\nclass SdkUtils:\n\n    @staticmethod\n    def save_dataset(dataset: Dataset, filepath: str, format: typing.Literal[\"jsonl\", \"csv\", \"avro\"] = \"jsonl\"):\n        if format == \"jsonl\":\n            return save_jsonl(dataset, filepath)\n        if format == \"csv\":\n            return save_csv(dataset, filepath)\n        if format == \"avro\":\n            return save_avro(dataset, filepath)\n        raise Exception(f\"unsupported format must be one of : {supported_formats}\")\n\n\nclass SyncSdkUtils(SdkUtils):\n    pass\n\n\nclass AsyncSdkUtils(SdkUtils):\n    pass\n"
  },
  {
    "path": "src/cohere/v2/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .types import (\n        CitationEndV2ChatStreamResponse,\n        CitationStartV2ChatStreamResponse,\n        ContentDeltaV2ChatStreamResponse,\n        ContentEndV2ChatStreamResponse,\n        ContentStartV2ChatStreamResponse,\n        DebugV2ChatStreamResponse,\n        MessageEndV2ChatStreamResponse,\n        MessageStartV2ChatStreamResponse,\n        ToolCallDeltaV2ChatStreamResponse,\n        ToolCallEndV2ChatStreamResponse,\n        ToolCallStartV2ChatStreamResponse,\n        ToolPlanDeltaV2ChatStreamResponse,\n        V2ChatRequestDocumentsItem,\n        V2ChatRequestSafetyMode,\n        V2ChatRequestToolChoice,\n        V2ChatResponse,\n        V2ChatStreamRequestDocumentsItem,\n        V2ChatStreamRequestSafetyMode,\n        V2ChatStreamRequestToolChoice,\n        V2ChatStreamResponse,\n        V2EmbedRequestTruncate,\n        V2RerankResponse,\n        V2RerankResponseResultsItem,\n    )\n_dynamic_imports: typing.Dict[str, str] = {\n    \"CitationEndV2ChatStreamResponse\": \".types\",\n    \"CitationStartV2ChatStreamResponse\": \".types\",\n    \"ContentDeltaV2ChatStreamResponse\": \".types\",\n    \"ContentEndV2ChatStreamResponse\": \".types\",\n    \"ContentStartV2ChatStreamResponse\": \".types\",\n    \"DebugV2ChatStreamResponse\": \".types\",\n    \"MessageEndV2ChatStreamResponse\": \".types\",\n    \"MessageStartV2ChatStreamResponse\": \".types\",\n    \"ToolCallDeltaV2ChatStreamResponse\": \".types\",\n    \"ToolCallEndV2ChatStreamResponse\": \".types\",\n    \"ToolCallStartV2ChatStreamResponse\": \".types\",\n    \"ToolPlanDeltaV2ChatStreamResponse\": \".types\",\n    \"V2ChatRequestDocumentsItem\": \".types\",\n    \"V2ChatRequestSafetyMode\": \".types\",\n    \"V2ChatRequestToolChoice\": \".types\",\n    \"V2ChatResponse\": \".types\",\n    \"V2ChatStreamRequestDocumentsItem\": \".types\",\n    \"V2ChatStreamRequestSafetyMode\": \".types\",\n    \"V2ChatStreamRequestToolChoice\": \".types\",\n    \"V2ChatStreamResponse\": \".types\",\n    \"V2EmbedRequestTruncate\": \".types\",\n    \"V2RerankResponse\": \".types\",\n    \"V2RerankResponseResultsItem\": \".types\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"CitationEndV2ChatStreamResponse\",\n    \"CitationStartV2ChatStreamResponse\",\n    \"ContentDeltaV2ChatStreamResponse\",\n    \"ContentEndV2ChatStreamResponse\",\n    \"ContentStartV2ChatStreamResponse\",\n    \"DebugV2ChatStreamResponse\",\n    \"MessageEndV2ChatStreamResponse\",\n    \"MessageStartV2ChatStreamResponse\",\n    \"ToolCallDeltaV2ChatStreamResponse\",\n    \"ToolCallEndV2ChatStreamResponse\",\n    \"ToolCallStartV2ChatStreamResponse\",\n    \"ToolPlanDeltaV2ChatStreamResponse\",\n    \"V2ChatRequestDocumentsItem\",\n    \"V2ChatRequestSafetyMode\",\n    \"V2ChatRequestToolChoice\",\n    \"V2ChatResponse\",\n    \"V2ChatStreamRequestDocumentsItem\",\n    \"V2ChatStreamRequestSafetyMode\",\n    \"V2ChatStreamRequestToolChoice\",\n    \"V2ChatStreamResponse\",\n    \"V2EmbedRequestTruncate\",\n    \"V2RerankResponse\",\n    \"V2RerankResponseResultsItem\",\n]\n"
  },
  {
    "path": "src/cohere/v2/client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.request_options import RequestOptions\nfrom ..types.chat_messages import ChatMessages\nfrom ..types.citation_options import CitationOptions\nfrom ..types.embed_by_type_response import EmbedByTypeResponse\nfrom ..types.embed_input import EmbedInput\nfrom ..types.embed_input_type import EmbedInputType\nfrom ..types.embedding_type import EmbeddingType\nfrom ..types.response_format_v2 import ResponseFormatV2\nfrom ..types.thinking import Thinking\nfrom ..types.tool_v2 import ToolV2\nfrom .raw_client import AsyncRawV2Client, RawV2Client\nfrom .types.v2chat_request_documents_item import V2ChatRequestDocumentsItem\nfrom .types.v2chat_request_safety_mode import V2ChatRequestSafetyMode\nfrom .types.v2chat_request_tool_choice import V2ChatRequestToolChoice\nfrom .types.v2chat_response import V2ChatResponse\nfrom .types.v2chat_stream_request_documents_item import V2ChatStreamRequestDocumentsItem\nfrom .types.v2chat_stream_request_safety_mode import V2ChatStreamRequestSafetyMode\nfrom .types.v2chat_stream_request_tool_choice import V2ChatStreamRequestToolChoice\nfrom .types.v2chat_stream_response import V2ChatStreamResponse\nfrom .types.v2embed_request_truncate import V2EmbedRequestTruncate\nfrom .types.v2rerank_response import V2RerankResponse\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass V2Client:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._raw_client = RawV2Client(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> RawV2Client:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        RawV2Client\n        \"\"\"\n        return self._raw_client\n\n    def chat_stream(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[V2ChatStreamResponse]:\n        \"\"\"\n        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[V2ChatStreamResponse]\n\n\n        Examples\n        --------\n        from cohere import Client, UserChatMessageV2\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        response = client.v2.chat_stream(\n            model=\"command-a-03-2025\",\n            messages=[\n                UserChatMessageV2(\n                    content=\"Tell me about LLMs\",\n                )\n            ],\n        )\n        for chunk in response:\n            yield chunk\n        \"\"\"\n        with self._raw_client.chat_stream(\n            model=model,\n            messages=messages,\n            tools=tools,\n            strict_tools=strict_tools,\n            documents=documents,\n            citation_options=citation_options,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            max_tokens=max_tokens,\n            stop_sequences=stop_sequences,\n            temperature=temperature,\n            seed=seed,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            k=k,\n            p=p,\n            logprobs=logprobs,\n            tool_choice=tool_choice,\n            thinking=thinking,\n            priority=priority,\n            request_options=request_options,\n        ) as r:\n            yield from r.data\n\n    def chat(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> V2ChatResponse:\n        \"\"\"\n        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        V2ChatResponse\n\n\n        Examples\n        --------\n        from cohere import Client, UserChatMessageV2\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.v2.chat(\n            model=\"command-a-03-2025\",\n            messages=[\n                UserChatMessageV2(\n                    content=\"Tell me about LLMs\",\n                )\n            ],\n        )\n        \"\"\"\n        _response = self._raw_client.chat(\n            model=model,\n            messages=messages,\n            tools=tools,\n            strict_tools=strict_tools,\n            documents=documents,\n            citation_options=citation_options,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            max_tokens=max_tokens,\n            stop_sequences=stop_sequences,\n            temperature=temperature,\n            seed=seed,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            k=k,\n            p=p,\n            logprobs=logprobs,\n            tool_choice=tool_choice,\n            thinking=thinking,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def embed(\n        self,\n        *,\n        model: str,\n        input_type: EmbedInputType,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        inputs: typing.Optional[typing.Sequence[EmbedInput]] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        output_dimension: typing.Optional[int] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> EmbedByTypeResponse:\n        \"\"\"\n        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.\n\n        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        model : str\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : EmbedInputType\n\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Image embeddings are supported with Embed v3.0 and newer models.\n\n        inputs : typing.Optional[typing.Sequence[EmbedInput]]\n            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.\n\n        output_dimension : typing.Optional[int]\n            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.\n            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"base64\"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[V2EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedByTypeResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.v2.embed(\n            texts=[\"hello\", \"goodbye\"],\n            model=\"embed-v4.0\",\n            input_type=\"classification\",\n            embedding_types=[\"float\"],\n        )\n        \"\"\"\n        _response = self._raw_client.embed(\n            model=model,\n            input_type=input_type,\n            texts=texts,\n            images=images,\n            inputs=inputs,\n            max_tokens=max_tokens,\n            output_dimension=output_dimension,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n\n    def rerank(\n        self,\n        *,\n        model: str,\n        query: str,\n        documents: typing.Sequence[str],\n        top_n: typing.Optional[int] = OMIT,\n        max_tokens_per_doc: typing.Optional[int] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> V2RerankResponse:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        model : str\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        query : str\n            The search query\n\n        documents : typing.Sequence[str]\n            A list of texts that will be compared to the `query`.\n            For optimal performance we recommend against sending more than 1,000 documents in a single request.\n\n            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.\n\n            **Note**: structured data should be formatted as YAML strings for best performance.\n\n        top_n : typing.Optional[int]\n            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.\n\n        max_tokens_per_doc : typing.Optional[int]\n            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        V2RerankResponse\n            OK\n\n        Examples\n        --------\n        from cohere import Client\n\n        client = Client(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n        client.v2.rerank(\n            documents=[\n                \"Carson City is the capital city of the American state of Nevada.\",\n                \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\",\n                \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\",\n                \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\",\n                \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\",\n            ],\n            query=\"What is the capital of the United States?\",\n            top_n=3,\n            model=\"rerank-v4.0-pro\",\n        )\n        \"\"\"\n        _response = self._raw_client.rerank(\n            model=model,\n            query=query,\n            documents=documents,\n            top_n=top_n,\n            max_tokens_per_doc=max_tokens_per_doc,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n\n\nclass AsyncV2Client:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._raw_client = AsyncRawV2Client(client_wrapper=client_wrapper)\n\n    @property\n    def with_raw_response(self) -> AsyncRawV2Client:\n        \"\"\"\n        Retrieves a raw implementation of this client that returns raw responses.\n\n        Returns\n        -------\n        AsyncRawV2Client\n        \"\"\"\n        return self._raw_client\n\n    async def chat_stream(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[V2ChatStreamResponse]:\n        \"\"\"\n        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[V2ChatStreamResponse]\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient, UserChatMessageV2\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            response = await client.v2.chat_stream(\n                model=\"command-a-03-2025\",\n                messages=[\n                    UserChatMessageV2(\n                        content=\"Tell me about LLMs\",\n                    )\n                ],\n            )\n            async for chunk in response:\n                yield chunk\n\n\n        asyncio.run(main())\n        \"\"\"\n        async with self._raw_client.chat_stream(\n            model=model,\n            messages=messages,\n            tools=tools,\n            strict_tools=strict_tools,\n            documents=documents,\n            citation_options=citation_options,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            max_tokens=max_tokens,\n            stop_sequences=stop_sequences,\n            temperature=temperature,\n            seed=seed,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            k=k,\n            p=p,\n            logprobs=logprobs,\n            tool_choice=tool_choice,\n            thinking=thinking,\n            priority=priority,\n            request_options=request_options,\n        ) as r:\n            async for _chunk in r.data:\n                yield _chunk\n\n    async def chat(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> V2ChatResponse:\n        \"\"\"\n        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        V2ChatResponse\n\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient, UserChatMessageV2\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.v2.chat(\n                model=\"command-a-03-2025\",\n                messages=[\n                    UserChatMessageV2(\n                        content=\"Tell me about LLMs\",\n                    )\n                ],\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.chat(\n            model=model,\n            messages=messages,\n            tools=tools,\n            strict_tools=strict_tools,\n            documents=documents,\n            citation_options=citation_options,\n            response_format=response_format,\n            safety_mode=safety_mode,\n            max_tokens=max_tokens,\n            stop_sequences=stop_sequences,\n            temperature=temperature,\n            seed=seed,\n            frequency_penalty=frequency_penalty,\n            presence_penalty=presence_penalty,\n            k=k,\n            p=p,\n            logprobs=logprobs,\n            tool_choice=tool_choice,\n            thinking=thinking,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def embed(\n        self,\n        *,\n        model: str,\n        input_type: EmbedInputType,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        inputs: typing.Optional[typing.Sequence[EmbedInput]] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        output_dimension: typing.Optional[int] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> EmbedByTypeResponse:\n        \"\"\"\n        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.\n\n        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        model : str\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : EmbedInputType\n\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Image embeddings are supported with Embed v3.0 and newer models.\n\n        inputs : typing.Optional[typing.Sequence[EmbedInput]]\n            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.\n\n        output_dimension : typing.Optional[int]\n            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.\n            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"base64\"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[V2EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        EmbedByTypeResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.v2.embed(\n                texts=[\"hello\", \"goodbye\"],\n                model=\"embed-v4.0\",\n                input_type=\"classification\",\n                embedding_types=[\"float\"],\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.embed(\n            model=model,\n            input_type=input_type,\n            texts=texts,\n            images=images,\n            inputs=inputs,\n            max_tokens=max_tokens,\n            output_dimension=output_dimension,\n            embedding_types=embedding_types,\n            truncate=truncate,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n\n    async def rerank(\n        self,\n        *,\n        model: str,\n        query: str,\n        documents: typing.Sequence[str],\n        top_n: typing.Optional[int] = OMIT,\n        max_tokens_per_doc: typing.Optional[int] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> V2RerankResponse:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        model : str\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        query : str\n            The search query\n\n        documents : typing.Sequence[str]\n            A list of texts that will be compared to the `query`.\n            For optimal performance we recommend against sending more than 1,000 documents in a single request.\n\n            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.\n\n            **Note**: structured data should be formatted as YAML strings for best performance.\n\n        top_n : typing.Optional[int]\n            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.\n\n        max_tokens_per_doc : typing.Optional[int]\n            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        V2RerankResponse\n            OK\n\n        Examples\n        --------\n        import asyncio\n\n        from cohere import AsyncClient\n\n        client = AsyncClient(\n            client_name=\"YOUR_CLIENT_NAME\",\n            token=\"YOUR_TOKEN\",\n        )\n\n\n        async def main() -> None:\n            await client.v2.rerank(\n                documents=[\n                    \"Carson City is the capital city of the American state of Nevada.\",\n                    \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.\",\n                    \"Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.\",\n                    \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.\",\n                    \"Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\",\n                ],\n                query=\"What is the capital of the United States?\",\n                top_n=3,\n                model=\"rerank-v4.0-pro\",\n            )\n\n\n        asyncio.run(main())\n        \"\"\"\n        _response = await self._raw_client.rerank(\n            model=model,\n            query=query,\n            documents=documents,\n            top_n=top_n,\n            max_tokens_per_doc=max_tokens_per_doc,\n            priority=priority,\n            request_options=request_options,\n        )\n        return _response.data\n"
  },
  {
    "path": "src/cohere/v2/raw_client.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport contextlib\nimport typing\nfrom json.decoder import JSONDecodeError\nfrom logging import error, warning\n\nfrom ..core.api_error import ApiError\nfrom ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper\nfrom ..core.http_response import AsyncHttpResponse, HttpResponse\nfrom ..core.http_sse._api import EventSource\nfrom ..core.parse_error import ParsingError\nfrom ..core.pydantic_utilities import parse_sse_obj\nfrom ..core.request_options import RequestOptions\nfrom ..core.serialization import convert_and_respect_annotation_metadata\nfrom ..core.unchecked_base_model import construct_type\nfrom ..errors.bad_request_error import BadRequestError\nfrom ..errors.client_closed_request_error import ClientClosedRequestError\nfrom ..errors.forbidden_error import ForbiddenError\nfrom ..errors.gateway_timeout_error import GatewayTimeoutError\nfrom ..errors.internal_server_error import InternalServerError\nfrom ..errors.invalid_token_error import InvalidTokenError\nfrom ..errors.not_found_error import NotFoundError\nfrom ..errors.not_implemented_error import NotImplementedError\nfrom ..errors.service_unavailable_error import ServiceUnavailableError\nfrom ..errors.too_many_requests_error import TooManyRequestsError\nfrom ..errors.unauthorized_error import UnauthorizedError\nfrom ..errors.unprocessable_entity_error import UnprocessableEntityError\nfrom ..types.chat_messages import ChatMessages\nfrom ..types.citation_options import CitationOptions\nfrom ..types.embed_by_type_response import EmbedByTypeResponse\nfrom ..types.embed_input import EmbedInput\nfrom ..types.embed_input_type import EmbedInputType\nfrom ..types.embedding_type import EmbeddingType\nfrom ..types.response_format_v2 import ResponseFormatV2\nfrom ..types.thinking import Thinking\nfrom ..types.tool_v2 import ToolV2\nfrom .types.v2chat_request_documents_item import V2ChatRequestDocumentsItem\nfrom .types.v2chat_request_safety_mode import V2ChatRequestSafetyMode\nfrom .types.v2chat_request_tool_choice import V2ChatRequestToolChoice\nfrom .types.v2chat_response import V2ChatResponse\nfrom .types.v2chat_stream_request_documents_item import V2ChatStreamRequestDocumentsItem\nfrom .types.v2chat_stream_request_safety_mode import V2ChatStreamRequestSafetyMode\nfrom .types.v2chat_stream_request_tool_choice import V2ChatStreamRequestToolChoice\nfrom .types.v2chat_stream_response import V2ChatStreamResponse\nfrom .types.v2embed_request_truncate import V2EmbedRequestTruncate\nfrom .types.v2rerank_response import V2RerankResponse\nfrom pydantic import ValidationError\n\n# this is used as the default value for optional parameters\nOMIT = typing.cast(typing.Any, ...)\n\n\nclass RawV2Client:\n    def __init__(self, *, client_wrapper: SyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    @contextlib.contextmanager\n    def chat_stream(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.Iterator[HttpResponse[typing.Iterator[V2ChatStreamResponse]]]:\n        \"\"\"\n        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.Iterator[HttpResponse[typing.Iterator[V2ChatStreamResponse]]]\n\n        \"\"\"\n        with self._client_wrapper.httpx_client.stream(\n            \"v2/chat\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"messages\": convert_and_respect_annotation_metadata(\n                    object_=messages, annotation=ChatMessages, direction=\"write\"\n                ),\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[ToolV2], direction=\"write\"\n                ),\n                \"strict_tools\": strict_tools,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[V2ChatStreamRequestDocumentsItem], direction=\"write\"\n                ),\n                \"citation_options\": convert_and_respect_annotation_metadata(\n                    object_=citation_options, annotation=CitationOptions, direction=\"write\"\n                ),\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormatV2, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"max_tokens\": max_tokens,\n                \"stop_sequences\": stop_sequences,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"k\": k,\n                \"p\": p,\n                \"logprobs\": logprobs,\n                \"tool_choice\": tool_choice,\n                \"thinking\": convert_and_respect_annotation_metadata(\n                    object_=thinking, annotation=Thinking, direction=\"write\"\n                ),\n                \"priority\": priority,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            def _stream() -> HttpResponse[typing.Iterator[V2ChatStreamResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        def _iter():\n                            _event_source = EventSource(_response)\n                            for _sse in _event_source.iter_sse():\n                                if _sse.data == \"[DONE]\":\n                                    return\n                                try:\n                                    yield typing.cast(\n                                        V2ChatStreamResponse,\n                                        parse_sse_obj(\n                                            sse=_sse,\n                                            type_=V2ChatStreamResponse,  # type: ignore\n                                        ),\n                                    )\n                                except JSONDecodeError as e:\n                                    warning(f\"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}\")\n                                except (TypeError, ValueError, KeyError, AttributeError) as e:\n                                    warning(\n                                        f\"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}\"\n                                    )\n                                except Exception as e:\n                                    error(\n                                        f\"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}\"\n                                    )\n                            return\n\n                        return HttpResponse(response=_response, data=_iter())\n                    _response.read()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield _stream()\n\n    def chat(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[V2ChatResponse]:\n        \"\"\"\n        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[V2ChatResponse]\n\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/chat\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"messages\": convert_and_respect_annotation_metadata(\n                    object_=messages, annotation=ChatMessages, direction=\"write\"\n                ),\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[ToolV2], direction=\"write\"\n                ),\n                \"strict_tools\": strict_tools,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[V2ChatRequestDocumentsItem], direction=\"write\"\n                ),\n                \"citation_options\": convert_and_respect_annotation_metadata(\n                    object_=citation_options, annotation=CitationOptions, direction=\"write\"\n                ),\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormatV2, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"max_tokens\": max_tokens,\n                \"stop_sequences\": stop_sequences,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"k\": k,\n                \"p\": p,\n                \"logprobs\": logprobs,\n                \"tool_choice\": tool_choice,\n                \"thinking\": convert_and_respect_annotation_metadata(\n                    object_=thinking, annotation=Thinking, direction=\"write\"\n                ),\n                \"priority\": priority,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    V2ChatResponse,\n                    construct_type(\n                        type_=V2ChatResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def embed(\n        self,\n        *,\n        model: str,\n        input_type: EmbedInputType,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        inputs: typing.Optional[typing.Sequence[EmbedInput]] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        output_dimension: typing.Optional[int] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[EmbedByTypeResponse]:\n        \"\"\"\n        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.\n\n        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        model : str\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : EmbedInputType\n\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Image embeddings are supported with Embed v3.0 and newer models.\n\n        inputs : typing.Optional[typing.Sequence[EmbedInput]]\n            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.\n\n        output_dimension : typing.Optional[int]\n            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.\n            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"base64\"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[V2EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[EmbedByTypeResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/embed\",\n            method=\"POST\",\n            json={\n                \"texts\": texts,\n                \"images\": images,\n                \"model\": model,\n                \"input_type\": input_type,\n                \"inputs\": convert_and_respect_annotation_metadata(\n                    object_=inputs, annotation=typing.Sequence[EmbedInput], direction=\"write\"\n                ),\n                \"max_tokens\": max_tokens,\n                \"output_dimension\": output_dimension,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n                \"priority\": priority,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedByTypeResponse,\n                    construct_type(\n                        type_=EmbedByTypeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    def rerank(\n        self,\n        *,\n        model: str,\n        query: str,\n        documents: typing.Sequence[str],\n        top_n: typing.Optional[int] = OMIT,\n        max_tokens_per_doc: typing.Optional[int] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> HttpResponse[V2RerankResponse]:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        model : str\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        query : str\n            The search query\n\n        documents : typing.Sequence[str]\n            A list of texts that will be compared to the `query`.\n            For optimal performance we recommend against sending more than 1,000 documents in a single request.\n\n            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.\n\n            **Note**: structured data should be formatted as YAML strings for best performance.\n\n        top_n : typing.Optional[int]\n            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.\n\n        max_tokens_per_doc : typing.Optional[int]\n            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        HttpResponse[V2RerankResponse]\n            OK\n        \"\"\"\n        _response = self._client_wrapper.httpx_client.request(\n            \"v2/rerank\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"query\": query,\n                \"documents\": documents,\n                \"top_n\": top_n,\n                \"max_tokens_per_doc\": max_tokens_per_doc,\n                \"priority\": priority,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    V2RerankResponse,\n                    construct_type(\n                        type_=V2RerankResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return HttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n\nclass AsyncRawV2Client:\n    def __init__(self, *, client_wrapper: AsyncClientWrapper):\n        self._client_wrapper = client_wrapper\n\n    @contextlib.asynccontextmanager\n    async def chat_stream(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatStreamRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[V2ChatStreamResponse]]]:\n        \"\"\"\n        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Yields\n        ------\n        typing.AsyncIterator[AsyncHttpResponse[typing.AsyncIterator[V2ChatStreamResponse]]]\n\n        \"\"\"\n        async with self._client_wrapper.httpx_client.stream(\n            \"v2/chat\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"messages\": convert_and_respect_annotation_metadata(\n                    object_=messages, annotation=ChatMessages, direction=\"write\"\n                ),\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[ToolV2], direction=\"write\"\n                ),\n                \"strict_tools\": strict_tools,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[V2ChatStreamRequestDocumentsItem], direction=\"write\"\n                ),\n                \"citation_options\": convert_and_respect_annotation_metadata(\n                    object_=citation_options, annotation=CitationOptions, direction=\"write\"\n                ),\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormatV2, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"max_tokens\": max_tokens,\n                \"stop_sequences\": stop_sequences,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"k\": k,\n                \"p\": p,\n                \"logprobs\": logprobs,\n                \"tool_choice\": tool_choice,\n                \"thinking\": convert_and_respect_annotation_metadata(\n                    object_=thinking, annotation=Thinking, direction=\"write\"\n                ),\n                \"priority\": priority,\n                \"stream\": True,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        ) as _response:\n\n            async def _stream() -> AsyncHttpResponse[typing.AsyncIterator[V2ChatStreamResponse]]:\n                try:\n                    if 200 <= _response.status_code < 300:\n\n                        async def _iter():\n                            _event_source = EventSource(_response)\n                            async for _sse in _event_source.aiter_sse():\n                                if _sse.data == \"[DONE]\":\n                                    return\n                                try:\n                                    yield typing.cast(\n                                        V2ChatStreamResponse,\n                                        parse_sse_obj(\n                                            sse=_sse,\n                                            type_=V2ChatStreamResponse,  # type: ignore\n                                        ),\n                                    )\n                                except JSONDecodeError as e:\n                                    warning(f\"Skipping SSE event with invalid JSON: {e}, sse: {_sse!r}\")\n                                except (TypeError, ValueError, KeyError, AttributeError) as e:\n                                    warning(\n                                        f\"Skipping SSE event due to model construction error: {type(e).__name__}: {e}, sse: {_sse!r}\"\n                                    )\n                                except Exception as e:\n                                    error(\n                                        f\"Unexpected error processing SSE event: {type(e).__name__}: {e}, sse: {_sse!r}\"\n                                    )\n                            return\n\n                        return AsyncHttpResponse(response=_response, data=_iter())\n                    await _response.aread()\n                    if _response.status_code == 400:\n                        raise BadRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 401:\n                        raise UnauthorizedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 403:\n                        raise ForbiddenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 404:\n                        raise NotFoundError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 422:\n                        raise UnprocessableEntityError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 429:\n                        raise TooManyRequestsError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 498:\n                        raise InvalidTokenError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 499:\n                        raise ClientClosedRequestError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 500:\n                        raise InternalServerError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 501:\n                        raise NotImplementedError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 503:\n                        raise ServiceUnavailableError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    if _response.status_code == 504:\n                        raise GatewayTimeoutError(\n                            headers=dict(_response.headers),\n                            body=typing.cast(\n                                typing.Any,\n                                construct_type(\n                                    type_=typing.Any,  # type: ignore\n                                    object_=_response.json(),\n                                ),\n                            ),\n                        )\n                    _response_json = _response.json()\n                except JSONDecodeError:\n                    raise ApiError(\n                        status_code=_response.status_code, headers=dict(_response.headers), body=_response.text\n                    )\n                except ValidationError as e:\n                    raise ParsingError(\n                        status_code=_response.status_code,\n                        headers=dict(_response.headers),\n                        body=_response.json(),\n                        cause=e,\n                    )\n                raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n            yield await _stream()\n\n    async def chat(\n        self,\n        *,\n        model: str,\n        messages: ChatMessages,\n        tools: typing.Optional[typing.Sequence[ToolV2]] = OMIT,\n        strict_tools: typing.Optional[bool] = OMIT,\n        documents: typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]] = OMIT,\n        citation_options: typing.Optional[CitationOptions] = OMIT,\n        response_format: typing.Optional[ResponseFormatV2] = OMIT,\n        safety_mode: typing.Optional[V2ChatRequestSafetyMode] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,\n        temperature: typing.Optional[float] = OMIT,\n        seed: typing.Optional[int] = OMIT,\n        frequency_penalty: typing.Optional[float] = OMIT,\n        presence_penalty: typing.Optional[float] = OMIT,\n        k: typing.Optional[int] = OMIT,\n        p: typing.Optional[float] = OMIT,\n        logprobs: typing.Optional[bool] = OMIT,\n        tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,\n        thinking: typing.Optional[Thinking] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[V2ChatResponse]:\n        \"\"\"\n        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).\n\n        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.\n\n        Parameters\n        ----------\n        model : str\n            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).\n\n        messages : ChatMessages\n\n        tools : typing.Optional[typing.Sequence[ToolV2]]\n            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.\n\n            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).\n\n        strict_tools : typing.Optional[bool]\n            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).\n\n            **Note**: The first few requests with a new set of tools will take longer to process.\n\n        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]\n            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.\n\n        citation_options : typing.Optional[CitationOptions]\n\n        response_format : typing.Optional[ResponseFormatV2]\n\n        safety_mode : typing.Optional[V2ChatRequestSafetyMode]\n            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.\n            When `OFF` is specified, the safety instruction will be omitted.\n\n            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.\n\n            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).\n\n            **Note**: `command-r7b-12-2024` and newer models only support `\"CONTEXTUAL\"` and `\"STRICT\"` modes.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).\n\n            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `\"MAX_TOKENS\"`.\n\n            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.\n\n        stop_sequences : typing.Optional[typing.Sequence[str]]\n            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.\n\n        temperature : typing.Optional[float]\n            Defaults to `0.3`.\n\n            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.\n\n            Randomness can be further maximized by increasing the  value of the `p` parameter.\n\n        seed : typing.Optional[int]\n            If specified, the backend will make a best effort to sample tokens\n            deterministically, such that repeated requests with the same\n            seed and parameters should return the same result. However,\n            determinism cannot be totally guaranteed.\n\n        frequency_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.\n\n        presence_penalty : typing.Optional[float]\n            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.\n            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.\n\n        k : typing.Optional[int]\n            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.\n            Defaults to `0`, min value of `0`, max value of `500`.\n\n        p : typing.Optional[float]\n            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.\n            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.\n\n        logprobs : typing.Optional[bool]\n            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.\n\n        tool_choice : typing.Optional[V2ChatRequestToolChoice]\n            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.\n            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.\n            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.\n\n            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.\n\n        thinking : typing.Optional[Thinking]\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[V2ChatResponse]\n\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/chat\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"messages\": convert_and_respect_annotation_metadata(\n                    object_=messages, annotation=ChatMessages, direction=\"write\"\n                ),\n                \"tools\": convert_and_respect_annotation_metadata(\n                    object_=tools, annotation=typing.Sequence[ToolV2], direction=\"write\"\n                ),\n                \"strict_tools\": strict_tools,\n                \"documents\": convert_and_respect_annotation_metadata(\n                    object_=documents, annotation=typing.Sequence[V2ChatRequestDocumentsItem], direction=\"write\"\n                ),\n                \"citation_options\": convert_and_respect_annotation_metadata(\n                    object_=citation_options, annotation=CitationOptions, direction=\"write\"\n                ),\n                \"response_format\": convert_and_respect_annotation_metadata(\n                    object_=response_format, annotation=ResponseFormatV2, direction=\"write\"\n                ),\n                \"safety_mode\": safety_mode,\n                \"max_tokens\": max_tokens,\n                \"stop_sequences\": stop_sequences,\n                \"temperature\": temperature,\n                \"seed\": seed,\n                \"frequency_penalty\": frequency_penalty,\n                \"presence_penalty\": presence_penalty,\n                \"k\": k,\n                \"p\": p,\n                \"logprobs\": logprobs,\n                \"tool_choice\": tool_choice,\n                \"thinking\": convert_and_respect_annotation_metadata(\n                    object_=thinking, annotation=Thinking, direction=\"write\"\n                ),\n                \"priority\": priority,\n                \"stream\": False,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    V2ChatResponse,\n                    construct_type(\n                        type_=V2ChatResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def embed(\n        self,\n        *,\n        model: str,\n        input_type: EmbedInputType,\n        texts: typing.Optional[typing.Sequence[str]] = OMIT,\n        images: typing.Optional[typing.Sequence[str]] = OMIT,\n        inputs: typing.Optional[typing.Sequence[EmbedInput]] = OMIT,\n        max_tokens: typing.Optional[int] = OMIT,\n        output_dimension: typing.Optional[int] = OMIT,\n        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,\n        truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[EmbedByTypeResponse]:\n        \"\"\"\n        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.\n\n        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.\n\n        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).\n\n        Parameters\n        ----------\n        model : str\n            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).\n\n        input_type : EmbedInputType\n\n        texts : typing.Optional[typing.Sequence[str]]\n            An array of strings for the model to embed. Maximum number of texts per call is `96`.\n\n        images : typing.Optional[typing.Sequence[str]]\n            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.\n\n            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.\n\n            Image embeddings are supported with Embed v3.0 and newer models.\n\n        inputs : typing.Optional[typing.Sequence[EmbedInput]]\n            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.\n\n        max_tokens : typing.Optional[int]\n            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.\n\n        output_dimension : typing.Optional[int]\n            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.\n            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.\n\n        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]\n            Specifies the types of embeddings you want to get back. Can be one or more of the following types.\n\n            * `\"float\"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.\n            * `\"int8\"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"uint8\"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"binary\"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"ubinary\"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.\n            * `\"base64\"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.\n\n        truncate : typing.Optional[V2EmbedRequestTruncate]\n            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.\n\n            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.\n\n            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[EmbedByTypeResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/embed\",\n            method=\"POST\",\n            json={\n                \"texts\": texts,\n                \"images\": images,\n                \"model\": model,\n                \"input_type\": input_type,\n                \"inputs\": convert_and_respect_annotation_metadata(\n                    object_=inputs, annotation=typing.Sequence[EmbedInput], direction=\"write\"\n                ),\n                \"max_tokens\": max_tokens,\n                \"output_dimension\": output_dimension,\n                \"embedding_types\": embedding_types,\n                \"truncate\": truncate,\n                \"priority\": priority,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    EmbedByTypeResponse,\n                    construct_type(\n                        type_=EmbedByTypeResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n\n    async def rerank(\n        self,\n        *,\n        model: str,\n        query: str,\n        documents: typing.Sequence[str],\n        top_n: typing.Optional[int] = OMIT,\n        max_tokens_per_doc: typing.Optional[int] = OMIT,\n        priority: typing.Optional[int] = OMIT,\n        request_options: typing.Optional[RequestOptions] = None,\n    ) -> AsyncHttpResponse[V2RerankResponse]:\n        \"\"\"\n        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.\n\n        Parameters\n        ----------\n        model : str\n            The identifier of the model to use, eg `rerank-v3.5`.\n\n        query : str\n            The search query\n\n        documents : typing.Sequence[str]\n            A list of texts that will be compared to the `query`.\n            For optimal performance we recommend against sending more than 1,000 documents in a single request.\n\n            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.\n\n            **Note**: structured data should be formatted as YAML strings for best performance.\n\n        top_n : typing.Optional[int]\n            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.\n\n        max_tokens_per_doc : typing.Optional[int]\n            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.\n\n        priority : typing.Optional[int]\n            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.\n\n        request_options : typing.Optional[RequestOptions]\n            Request-specific configuration.\n\n        Returns\n        -------\n        AsyncHttpResponse[V2RerankResponse]\n            OK\n        \"\"\"\n        _response = await self._client_wrapper.httpx_client.request(\n            \"v2/rerank\",\n            method=\"POST\",\n            json={\n                \"model\": model,\n                \"query\": query,\n                \"documents\": documents,\n                \"top_n\": top_n,\n                \"max_tokens_per_doc\": max_tokens_per_doc,\n                \"priority\": priority,\n            },\n            headers={\n                \"content-type\": \"application/json\",\n            },\n            request_options=request_options,\n            omit=OMIT,\n        )\n        try:\n            if 200 <= _response.status_code < 300:\n                _data = typing.cast(\n                    V2RerankResponse,\n                    construct_type(\n                        type_=V2RerankResponse,  # type: ignore\n                        object_=_response.json(),\n                    ),\n                )\n                return AsyncHttpResponse(response=_response, data=_data)\n            if _response.status_code == 400:\n                raise BadRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 401:\n                raise UnauthorizedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 403:\n                raise ForbiddenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 404:\n                raise NotFoundError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 422:\n                raise UnprocessableEntityError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 429:\n                raise TooManyRequestsError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 498:\n                raise InvalidTokenError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 499:\n                raise ClientClosedRequestError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 500:\n                raise InternalServerError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 501:\n                raise NotImplementedError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 503:\n                raise ServiceUnavailableError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            if _response.status_code == 504:\n                raise GatewayTimeoutError(\n                    headers=dict(_response.headers),\n                    body=typing.cast(\n                        typing.Any,\n                        construct_type(\n                            type_=typing.Any,  # type: ignore\n                            object_=_response.json(),\n                        ),\n                    ),\n                )\n            _response_json = _response.json()\n        except JSONDecodeError:\n            raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)\n        except ValidationError as e:\n            raise ParsingError(\n                status_code=_response.status_code, headers=dict(_response.headers), body=_response.json(), cause=e\n            )\n        raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)\n"
  },
  {
    "path": "src/cohere/v2/types/__init__.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\n# isort: skip_file\n\nimport typing\nfrom importlib import import_module\n\nif typing.TYPE_CHECKING:\n    from .v2chat_request_documents_item import V2ChatRequestDocumentsItem\n    from .v2chat_request_safety_mode import V2ChatRequestSafetyMode\n    from .v2chat_request_tool_choice import V2ChatRequestToolChoice\n    from .v2chat_response import V2ChatResponse\n    from .v2chat_stream_request_documents_item import V2ChatStreamRequestDocumentsItem\n    from .v2chat_stream_request_safety_mode import V2ChatStreamRequestSafetyMode\n    from .v2chat_stream_request_tool_choice import V2ChatStreamRequestToolChoice\n    from .v2chat_stream_response import (\n        CitationEndV2ChatStreamResponse,\n        CitationStartV2ChatStreamResponse,\n        ContentDeltaV2ChatStreamResponse,\n        ContentEndV2ChatStreamResponse,\n        ContentStartV2ChatStreamResponse,\n        DebugV2ChatStreamResponse,\n        MessageEndV2ChatStreamResponse,\n        MessageStartV2ChatStreamResponse,\n        ToolCallDeltaV2ChatStreamResponse,\n        ToolCallEndV2ChatStreamResponse,\n        ToolCallStartV2ChatStreamResponse,\n        ToolPlanDeltaV2ChatStreamResponse,\n        V2ChatStreamResponse,\n    )\n    from .v2embed_request_truncate import V2EmbedRequestTruncate\n    from .v2rerank_response import V2RerankResponse\n    from .v2rerank_response_results_item import V2RerankResponseResultsItem\n_dynamic_imports: typing.Dict[str, str] = {\n    \"CitationEndV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"CitationStartV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ContentDeltaV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ContentEndV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ContentStartV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"DebugV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"MessageEndV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"MessageStartV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ToolCallDeltaV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ToolCallEndV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ToolCallStartV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"ToolPlanDeltaV2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"V2ChatRequestDocumentsItem\": \".v2chat_request_documents_item\",\n    \"V2ChatRequestSafetyMode\": \".v2chat_request_safety_mode\",\n    \"V2ChatRequestToolChoice\": \".v2chat_request_tool_choice\",\n    \"V2ChatResponse\": \".v2chat_response\",\n    \"V2ChatStreamRequestDocumentsItem\": \".v2chat_stream_request_documents_item\",\n    \"V2ChatStreamRequestSafetyMode\": \".v2chat_stream_request_safety_mode\",\n    \"V2ChatStreamRequestToolChoice\": \".v2chat_stream_request_tool_choice\",\n    \"V2ChatStreamResponse\": \".v2chat_stream_response\",\n    \"V2EmbedRequestTruncate\": \".v2embed_request_truncate\",\n    \"V2RerankResponse\": \".v2rerank_response\",\n    \"V2RerankResponseResultsItem\": \".v2rerank_response_results_item\",\n}\n\n\ndef __getattr__(attr_name: str) -> typing.Any:\n    module_name = _dynamic_imports.get(attr_name)\n    if module_name is None:\n        raise AttributeError(f\"No {attr_name} found in _dynamic_imports for module name -> {__name__}\")\n    try:\n        module = import_module(module_name, __package__)\n        if module_name == f\".{attr_name}\":\n            return module\n        else:\n            return getattr(module, attr_name)\n    except ImportError as e:\n        raise ImportError(f\"Failed to import {attr_name} from {module_name}: {e}\") from e\n    except AttributeError as e:\n        raise AttributeError(f\"Failed to get {attr_name} from {module_name}: {e}\") from e\n\n\ndef __dir__():\n    lazy_attrs = list(_dynamic_imports.keys())\n    return sorted(lazy_attrs)\n\n\n__all__ = [\n    \"CitationEndV2ChatStreamResponse\",\n    \"CitationStartV2ChatStreamResponse\",\n    \"ContentDeltaV2ChatStreamResponse\",\n    \"ContentEndV2ChatStreamResponse\",\n    \"ContentStartV2ChatStreamResponse\",\n    \"DebugV2ChatStreamResponse\",\n    \"MessageEndV2ChatStreamResponse\",\n    \"MessageStartV2ChatStreamResponse\",\n    \"ToolCallDeltaV2ChatStreamResponse\",\n    \"ToolCallEndV2ChatStreamResponse\",\n    \"ToolCallStartV2ChatStreamResponse\",\n    \"ToolPlanDeltaV2ChatStreamResponse\",\n    \"V2ChatRequestDocumentsItem\",\n    \"V2ChatRequestSafetyMode\",\n    \"V2ChatRequestToolChoice\",\n    \"V2ChatResponse\",\n    \"V2ChatStreamRequestDocumentsItem\",\n    \"V2ChatStreamRequestSafetyMode\",\n    \"V2ChatStreamRequestToolChoice\",\n    \"V2ChatStreamResponse\",\n    \"V2EmbedRequestTruncate\",\n    \"V2RerankResponse\",\n    \"V2RerankResponseResultsItem\",\n]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_request_documents_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ...types.document import Document\n\nV2ChatRequestDocumentsItem = typing.Union[str, Document]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_request_safety_mode.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nV2ChatRequestSafetyMode = typing.Union[typing.Literal[\"CONTEXTUAL\", \"STRICT\", \"OFF\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_request_tool_choice.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nV2ChatRequestToolChoice = typing.Union[typing.Literal[\"REQUIRED\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom ...types.assistant_message_response import AssistantMessageResponse\nfrom ...types.chat_finish_reason import ChatFinishReason\nfrom ...types.logprob_item import LogprobItem\nfrom ...types.usage import Usage\n\n\nclass V2ChatResponse(UncheckedBaseModel):\n    id: str = pydantic.Field()\n    \"\"\"\n    Unique identifier for the generated reply. Useful for submitting feedback.\n    \"\"\"\n\n    finish_reason: ChatFinishReason\n    message: AssistantMessageResponse\n    usage: typing.Optional[Usage] = None\n    logprobs: typing.Optional[typing.List[LogprobItem]] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_stream_request_documents_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nfrom ...types.document import Document\n\nV2ChatStreamRequestDocumentsItem = typing.Union[str, Document]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_stream_request_safety_mode.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nV2ChatStreamRequestSafetyMode = typing.Union[typing.Literal[\"CONTEXTUAL\", \"STRICT\", \"OFF\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_stream_request_tool_choice.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nV2ChatStreamRequestToolChoice = typing.Union[typing.Literal[\"REQUIRED\", \"NONE\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/v2/types/v2chat_stream_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nfrom __future__ import annotations\n\nimport typing\n\nimport pydantic\nimport typing_extensions\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel, UnionMetadata\nfrom ...types.chat_content_delta_event_delta import ChatContentDeltaEventDelta\nfrom ...types.chat_content_start_event_delta import ChatContentStartEventDelta\nfrom ...types.chat_message_end_event_delta import ChatMessageEndEventDelta\nfrom ...types.chat_message_start_event_delta import ChatMessageStartEventDelta\nfrom ...types.chat_tool_call_delta_event_delta import ChatToolCallDeltaEventDelta\nfrom ...types.chat_tool_call_start_event_delta import ChatToolCallStartEventDelta\nfrom ...types.chat_tool_plan_delta_event_delta import ChatToolPlanDeltaEventDelta\nfrom ...types.citation_start_event_delta import CitationStartEventDelta\nfrom ...types.logprob_item import LogprobItem\n\n\nclass MessageStartV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"message-start\"] = \"message-start\"\n    id: typing.Optional[str] = None\n    delta: typing.Optional[ChatMessageStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ContentStartV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"content-start\"] = \"content-start\"\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatContentStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ContentDeltaV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"content-delta\"] = \"content-delta\"\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatContentDeltaEventDelta] = None\n    logprobs: typing.Optional[LogprobItem] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ContentEndV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"content-end\"] = \"content-end\"\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolPlanDeltaV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"tool-plan-delta\"] = \"tool-plan-delta\"\n    delta: typing.Optional[ChatToolPlanDeltaEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolCallStartV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"tool-call-start\"] = \"tool-call-start\"\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatToolCallStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolCallDeltaV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"tool-call-delta\"] = \"tool-call-delta\"\n    index: typing.Optional[int] = None\n    delta: typing.Optional[ChatToolCallDeltaEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass ToolCallEndV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"tool-call-end\"] = \"tool-call-end\"\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass CitationStartV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"citation-start\"] = \"citation-start\"\n    index: typing.Optional[int] = None\n    delta: typing.Optional[CitationStartEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass CitationEndV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"citation-end\"] = \"citation-end\"\n    index: typing.Optional[int] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass MessageEndV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"message-end\"] = \"message-end\"\n    id: typing.Optional[str] = None\n    delta: typing.Optional[ChatMessageEndEventDelta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nclass DebugV2ChatStreamResponse(UncheckedBaseModel):\n    \"\"\"\n    StreamedChatResponse is returned in streaming mode (specified with `stream=True` in the request).\n    \"\"\"\n\n    type: typing.Literal[\"debug\"] = \"debug\"\n    prompt: typing.Optional[str] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n\n\nV2ChatStreamResponse = typing_extensions.Annotated[\n    typing.Union[\n        MessageStartV2ChatStreamResponse,\n        ContentStartV2ChatStreamResponse,\n        ContentDeltaV2ChatStreamResponse,\n        ContentEndV2ChatStreamResponse,\n        ToolPlanDeltaV2ChatStreamResponse,\n        ToolCallStartV2ChatStreamResponse,\n        ToolCallDeltaV2ChatStreamResponse,\n        ToolCallEndV2ChatStreamResponse,\n        CitationStartV2ChatStreamResponse,\n        CitationEndV2ChatStreamResponse,\n        MessageEndV2ChatStreamResponse,\n        DebugV2ChatStreamResponse,\n    ],\n    UnionMetadata(discriminant=\"type\"),\n]\n"
  },
  {
    "path": "src/cohere/v2/types/v2embed_request_truncate.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nV2EmbedRequestTruncate = typing.Union[typing.Literal[\"NONE\", \"START\", \"END\"], typing.Any]\n"
  },
  {
    "path": "src/cohere/v2/types/v2rerank_response.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\nfrom ...types.api_meta import ApiMeta\nfrom .v2rerank_response_results_item import V2RerankResponseResultsItem\n\n\nclass V2RerankResponse(UncheckedBaseModel):\n    id: typing.Optional[str] = None\n    results: typing.List[V2RerankResponseResultsItem] = pydantic.Field()\n    \"\"\"\n    An ordered list of ranked documents\n    \"\"\"\n\n    meta: typing.Optional[ApiMeta] = None\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/v2/types/v2rerank_response_results_item.py",
    "content": "# This file was auto-generated by Fern from our API Definition.\n\nimport typing\n\nimport pydantic\nfrom ...core.pydantic_utilities import IS_PYDANTIC_V2\nfrom ...core.unchecked_base_model import UncheckedBaseModel\n\n\nclass V2RerankResponseResultsItem(UncheckedBaseModel):\n    index: int = pydantic.Field()\n    \"\"\"\n    Corresponds to the index in the original list of documents to which the ranked document belongs. (i.e. if the first value in the `results` object has an `index` value of 3, it means in the list of documents passed in, the document at `index=3` had the highest relevance)\n    \"\"\"\n\n    relevance_score: float = pydantic.Field()\n    \"\"\"\n    Relevance scores are normalized to be in the range `[0, 1]`. Scores close to `1` indicate a high relevance to the query, and scores closer to `0` indicate low relevance. It is not accurate to assume a score of 0.9 means the document is 2x more relevant than a document with a score of 0.45\n    \"\"\"\n\n    if IS_PYDANTIC_V2:\n        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra=\"allow\")  # type: ignore # Pydantic v2\n    else:\n\n        class Config:\n            smart_union = True\n            extra = pydantic.Extra.allow\n"
  },
  {
    "path": "src/cohere/version.py",
    "content": "from importlib import metadata\n\n__version__ = metadata.version(\"cohere\")\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/embed_job.jsonl",
    "content": "{\"text\": \"The quick brown fox jumps over the lazy dog\"}\n"
  },
  {
    "path": "tests/test_async_client.py",
    "content": "import os\nimport unittest\n\nimport cohere\nfrom cohere import ChatConnector, ClassifyExample, CreateConnectorServiceAuth, Tool, \\\n    ToolParameterDefinitionsValue, ToolResult, UserMessage, ChatbotMessage\n\npackage_dir = os.path.dirname(os.path.abspath(__file__))\nembed_job = os.path.join(package_dir, 'embed_job.jsonl')\n\n\nclass TestClient(unittest.IsolatedAsyncioTestCase):\n    co: cohere.AsyncClient\n\n    def setUp(self) -> None:\n        self.co = cohere.AsyncClient(timeout=10000)\n\n    async def test_token_falls_back_on_env_variable(self) -> None:\n        cohere.AsyncClient(api_key=None)\n        cohere.AsyncClient(None)\n\n    async def test_context_manager(self) -> None:\n        async with cohere.AsyncClient(api_key=\"xxx\") as client:\n            self.assertIsNotNone(client)\n\n    async def test_chat(self) -> None:\n        chat = await self.co.chat(\n            model=\"command-a-03-2025\",\n            chat_history=[\n                UserMessage(\n                    message=\"Who discovered gravity?\"),\n                ChatbotMessage(message=\"The man who is widely credited with discovering \"\n                               \"gravity is Sir Isaac Newton\")\n            ],\n            message=\"What year was he born?\",\n        )\n\n        print(chat)\n\n    async def test_chat_stream(self) -> None:\n        stream = self.co.chat_stream(\n            model=\"command-a-03-2025\",\n            chat_history=[\n                UserMessage(\n                    message=\"Who discovered gravity?\"),\n                ChatbotMessage(message=\"The man who is widely credited with discovering \"\n                               \"gravity is Sir Isaac Newton\")\n            ],\n            message=\"What year was he born?\",\n        )\n\n        events = set()\n\n        async for chat_event in stream:\n            events.add(chat_event.event_type)\n            if chat_event.event_type == \"text-generation\":\n                print(chat_event.text)\n\n        self.assertTrue(\"text-generation\" in events)\n        self.assertTrue(\"stream-start\" in events)\n        self.assertTrue(\"stream-end\" in events)\n\n    async def test_stream_equals_true(self) -> None:\n        with self.assertRaises(ValueError):\n            await self.co.chat(\n                stream=True,  # type: ignore\n                message=\"What year was he born?\",\n            )\n\n    async def test_deprecated_fn(self) -> None:\n        with self.assertRaises(ValueError):\n            await self.co.check_api_key(\"dummy\", dummy=\"dummy\")  # type: ignore\n\n    async def test_moved_fn(self) -> None:\n        with self.assertRaises(ValueError):\n            await self.co.list_connectors(\"dummy\", dummy=\"dummy\") # type: ignore\n\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    async def test_embed(self) -> None:\n        response = await self.co.embed(\n            texts=['hello', 'goodbye'],\n            model='embed-english-v3.0',\n            input_type=\"classification\"\n        )\n        print(response)\n\n    async def test_embed_batch_types(self) -> None:\n        # batch more than 96 texts\n        response = await self.co.embed(\n            texts=['hello']*100,\n            model='embed-english-v3.0',\n            input_type=\"classification\",\n            embedding_types=[\"float\", \"int8\", \"uint8\", \"binary\", \"ubinary\"]\n        )\n\n        if response.response_type == \"embeddings_by_type\":\n            self.assertEqual(len(response.texts or []), 100)\n            self.assertEqual(len(response.embeddings.float_ or []), 100)\n            self.assertEqual(len(response.embeddings.int8 or []), 100)\n            self.assertEqual(len(response.embeddings.uint8 or []), 100)\n            self.assertEqual(len(response.embeddings.binary or []), 100)\n            self.assertEqual(len(response.embeddings.ubinary or []), 100)\n        else:\n            self.fail(\"Expected embeddings_by_type response type\")\n\n        print(response)\n\n    async def test_embed_batch_v1(self) -> None:\n        # batch more than 96 texts\n        response = await self.co.embed(\n            texts=['hello']*100,\n            model='embed-english-v3.0',\n            input_type=\"classification\",\n        )\n\n        if response.response_type == \"embeddings_floats\":\n            self.assertEqual(len(response.embeddings), 100)\n        else:\n            self.fail(\"Expected embeddings_floats response type\")\n\n        print(response)\n\n    @unittest.skip(\"temp\")\n    async def test_embed_job_crud(self) -> None:\n        dataset = await self.co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        result = await self.co.wait(dataset)\n\n        self.assertEqual(result.dataset.validation_status, \"validated\")\n\n        # start an embed job\n        job = await self.co.embed_jobs.create(\n            dataset_id=dataset.id or \"\",\n            input_type=\"search_document\",\n            model='embed-english-v3.0')\n\n        print(job)\n\n        # list embed jobs\n        my_embed_jobs = await self.co.embed_jobs.list()\n\n        print(my_embed_jobs)\n\n        emb_result = await self.co.wait(job)\n\n        self.assertEqual(emb_result.status, \"complete\")\n\n        await self.co.embed_jobs.cancel(job.job_id)\n\n        await self.co.datasets.delete(dataset.id or \"\")\n\n    async def test_rerank(self) -> None:\n        docs = [\n            'Carson City is the capital city of the American state of Nevada.',\n            'The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.',\n            'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.',\n            'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.']\n\n        response = await self.co.rerank(\n            model='rerank-v3.5',\n            query='What is the capital of the United States?',\n            documents=docs,\n            top_n=3,\n        )\n\n        print(response)\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    async def test_datasets_crud(self) -> None:\n        my_dataset = await self.co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        print(my_dataset)\n\n        my_datasets = await self.co.datasets.list()\n\n        print(my_datasets)\n\n        dataset = await self.co.datasets.get(my_dataset.id or \"\")\n\n        print(dataset)\n\n        await self.co.datasets.delete(my_dataset.id or \"\")\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    async def test_save_load(self) -> None:\n        my_dataset = await self.co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        result = await self.co.wait(my_dataset)\n\n        self.co.utils.save_dataset(result.dataset, \"dataset.jsonl\")\n\n        # assert files equal\n        self.assertTrue(os.path.exists(\"dataset.jsonl\"))\n        self.assertEqual(open(embed_job, 'rb').read(),\n                         open(\"dataset.jsonl\", 'rb').read())\n\n        print(result)\n\n        await self.co.datasets.delete(my_dataset.id or \"\")\n\n    async def test_tokenize(self) -> None:\n        response = await self.co.tokenize(\n            text='tokenize me! :D',\n            model=\"command-a-03-2025\",\n            offline=False,\n        )\n        print(response)\n\n    async def test_detokenize(self) -> None:\n        response = await self.co.detokenize(\n            tokens=[10104, 12221, 1315, 34, 1420, 69],\n            model=\"command-a-03-2025\",\n            offline=False,\n        )\n        print(response)\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    async def test_tool_use(self) -> None:\n        tools = [\n            Tool(\n                name=\"sales_database\",\n                description=\"Connects to a database about sales volumes\",\n                parameter_definitions={\n                    \"day\": ToolParameterDefinitionsValue(\n                        description=\"Retrieves sales data from this day, formatted as YYYY-MM-DD.\",\n                        type=\"str\",\n                        required=True\n                    )}\n            )\n        ]\n\n        tool_parameters_response = await self.co.chat(\n            message=\"How good were the sales on September 29 2023?\",\n            tools=tools,\n            model=\"command-nightly\",\n            preamble=\"\"\"\n                ## Task Description\n                You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n                ## Style Guide\n                Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n            \"\"\"\n        )\n\n        if tool_parameters_response.tool_calls is not None:\n            self.assertEqual(\n                tool_parameters_response.tool_calls[0].name, \"sales_database\")\n            self.assertEqual(tool_parameters_response.tool_calls[0].parameters, {\n                             \"day\": \"2023-09-29\"})\n        else:\n            raise ValueError(\"Expected tool calls to be present\")\n\n        local_tools = {\n            \"sales_database\": lambda day: {\n                \"number_of_sales\": 120,\n                \"total_revenue\": 48500,\n                \"average_sale_value\": 404.17,\n                \"date\": \"2023-09-29\"\n            }\n        }\n\n        tool_results = []\n        for tool_call in tool_parameters_response.tool_calls:\n            output = local_tools[tool_call.name](**tool_call.parameters)\n            outputs = [output]\n\n            tool_results.append(ToolResult(\n                call=tool_call,\n                outputs=outputs\n            ))\n\n        cited_response = await self.co.chat(\n            message=\"How good were the sales on September 29?\",\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=True,\n            model=\"command-a-03-2025\",\n        )\n\n        self.assertEqual(cited_response.documents, [\n            {\n                \"average_sale_value\": \"404.17\",\n                \"date\": \"2023-09-29\",\n                \"id\": \"sales_database:0:0\",\n                \"number_of_sales\": \"120\",\n                \"total_revenue\": \"48500\",\n            }\n        ])\n\n    async def test_local_tokenize(self) -> None:\n        response = await self.co.tokenize(\n            model=\"command-a-03-2025\",\n            text=\"tokenize me! :D\"\n        )\n        print(response)\n\n    async def test_local_detokenize(self) -> None:\n        response = await self.co.detokenize(\n            model=\"command-a-03-2025\",\n            tokens=[10104, 12221, 1315, 34, 1420, 69]\n        )\n        print(response)\n\n    async def test_tokenize_async_context_with_sync_client(self) -> None:\n        # Test that the sync client can be used in an async context.\n        co = cohere.Client(timeout=10000)\n        print(co.tokenize(model=\"command-a-03-2025\", text=\"tokenize me! :D\"))\n        print(co.detokenize(model=\"command-a-03-2025\", tokens=[\n              10104, 12221, 1315, 34, 1420, 69]))\n"
  },
  {
    "path": "tests/test_aws_client_unit.py",
    "content": "\"\"\"\nUnit tests (mocked, no AWS credentials needed) for AWS client fixes.\n\nCovers:\n- Fix 1: SigV4 signing uses the correct host header after URL rewrite\n- Fix 2: cohere_aws.Client conditionally initializes based on mode\n- Fix 3: embed() accepts and passes output_dimension and embedding_types\n\"\"\"\n\nimport inspect\nimport json\nimport os\nimport unittest\nfrom unittest.mock import MagicMock, patch\n\nimport httpx\n\nfrom cohere.manually_maintained.cohere_aws.mode import Mode\n\n\nclass TestSigV4HostHeader(unittest.TestCase):\n    \"\"\"Fix 1: The headers dict passed to AWSRequest for SigV4 signing must\n    contain the rewritten Bedrock/SageMaker host, not the stale api.cohere.com.\"\"\"\n\n    def test_sigv4_signs_with_correct_host(self) -> None:\n        captured_aws_request_kwargs: dict = {}\n\n        mock_aws_request_cls = MagicMock()\n\n        def capture_aws_request(**kwargs):  # type: ignore\n            captured_aws_request_kwargs.update(kwargs)\n            mock_req = MagicMock()\n            mock_req.prepare.return_value = MagicMock(\n                headers={\"host\": \"bedrock-runtime.us-east-1.amazonaws.com\"}\n            )\n            return mock_req\n\n        mock_aws_request_cls.side_effect = capture_aws_request\n\n        mock_botocore = MagicMock()\n        mock_botocore.awsrequest.AWSRequest = mock_aws_request_cls\n        mock_botocore.auth.SigV4Auth.return_value = MagicMock()\n\n        mock_boto3 = MagicMock()\n        mock_session = MagicMock()\n        mock_session.region_name = \"us-east-1\"\n        mock_session.get_credentials.return_value = MagicMock()\n        mock_boto3.Session.return_value = mock_session\n\n        with patch(\"cohere.aws_client.lazy_botocore\", return_value=mock_botocore), \\\n             patch(\"cohere.aws_client.lazy_boto3\", return_value=mock_boto3):\n\n            from cohere.aws_client import map_request_to_bedrock\n\n            hook = map_request_to_bedrock(service=\"bedrock\", aws_region=\"us-east-1\")\n\n            request = httpx.Request(\n                method=\"POST\",\n                url=\"https://api.cohere.com/v1/chat\",\n                headers={\"connection\": \"keep-alive\"},\n                json={\"model\": \"cohere.command-r-plus-v1:0\", \"message\": \"hello\"},\n            )\n\n            self.assertEqual(request.url.host, \"api.cohere.com\")\n\n            hook(request)\n\n            self.assertIn(\"bedrock-runtime.us-east-1.amazonaws.com\", str(request.url))\n\n            signed_headers = captured_aws_request_kwargs[\"headers\"]\n            self.assertEqual(\n                signed_headers[\"host\"],\n                \"bedrock-runtime.us-east-1.amazonaws.com\",\n            )\n\n\nclass TestModeConditionalInit(unittest.TestCase):\n    \"\"\"Fix 2: cohere_aws.Client should initialize different boto3 clients\n    depending on mode, and default to SAGEMAKER for backwards compat.\"\"\"\n\n    def test_sagemaker_mode_creates_sagemaker_clients(self) -> None:\n        mock_boto3 = MagicMock()\n        mock_sagemaker = MagicMock()\n\n        with patch(\"cohere.manually_maintained.cohere_aws.client.lazy_boto3\", return_value=mock_boto3), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_sagemaker\", return_value=mock_sagemaker), \\\n             patch.dict(os.environ, {\"AWS_DEFAULT_REGION\": \"us-east-1\"}):\n\n            from cohere.manually_maintained.cohere_aws.client import Client\n\n            client = Client(aws_region=\"us-east-1\")\n\n            self.assertEqual(client.mode, Mode.SAGEMAKER)\n\n            service_names = [c[0][0] for c in mock_boto3.client.call_args_list]\n            self.assertIn(\"sagemaker-runtime\", service_names)\n            self.assertIn(\"sagemaker\", service_names)\n            self.assertNotIn(\"bedrock-runtime\", service_names)\n            self.assertNotIn(\"bedrock\", service_names)\n\n            mock_sagemaker.Session.assert_called_once()\n\n    def test_bedrock_mode_creates_bedrock_clients(self) -> None:\n        mock_boto3 = MagicMock()\n        mock_sagemaker = MagicMock()\n\n        with patch(\"cohere.manually_maintained.cohere_aws.client.lazy_boto3\", return_value=mock_boto3), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_sagemaker\", return_value=mock_sagemaker), \\\n             patch.dict(os.environ, {\"AWS_DEFAULT_REGION\": \"us-west-2\"}):\n\n            from cohere.manually_maintained.cohere_aws.client import Client\n\n            client = Client(aws_region=\"us-west-2\", mode=Mode.BEDROCK)\n\n            self.assertEqual(client.mode, Mode.BEDROCK)\n\n            service_names = [c[0][0] for c in mock_boto3.client.call_args_list]\n            self.assertIn(\"bedrock-runtime\", service_names)\n            self.assertIn(\"bedrock\", service_names)\n            self.assertNotIn(\"sagemaker-runtime\", service_names)\n            self.assertNotIn(\"sagemaker\", service_names)\n\n            mock_sagemaker.Session.assert_not_called()\n\n    def test_default_mode_is_sagemaker(self) -> None:\n        from cohere.manually_maintained.cohere_aws.client import Client\n\n        sig = inspect.signature(Client.__init__)\n        self.assertEqual(sig.parameters[\"mode\"].default, Mode.SAGEMAKER)\n\n\nclass TestEmbedV4Params(unittest.TestCase):\n    \"\"\"Fix 3: embed() should accept output_dimension and embedding_types,\n    pass them through to the request body, and strip them when None.\"\"\"\n\n    @staticmethod\n    def _make_bedrock_client():  # type: ignore\n        mock_boto3 = MagicMock()\n        mock_botocore = MagicMock()\n        captured_body: dict = {}\n\n        def fake_invoke_model(**kwargs):  # type: ignore\n            captured_body.update(json.loads(kwargs[\"body\"]))\n            mock_body = MagicMock()\n            mock_body.read.return_value = json.dumps({\"embeddings\": [[0.1, 0.2]]}).encode()\n            return {\"body\": mock_body}\n\n        mock_bedrock_client = MagicMock()\n        mock_bedrock_client.invoke_model.side_effect = fake_invoke_model\n\n        def fake_boto3_client(service_name, **kwargs):  # type: ignore\n            if service_name == \"bedrock-runtime\":\n                return mock_bedrock_client\n            return MagicMock()\n\n        mock_boto3.client.side_effect = fake_boto3_client\n        return mock_boto3, mock_botocore, captured_body\n\n    def test_embed_accepts_new_params(self) -> None:\n        from cohere.manually_maintained.cohere_aws.client import Client\n\n        sig = inspect.signature(Client.embed)\n        self.assertIn(\"output_dimension\", sig.parameters)\n        self.assertIn(\"embedding_types\", sig.parameters)\n        self.assertIsNone(sig.parameters[\"output_dimension\"].default)\n        self.assertIsNone(sig.parameters[\"embedding_types\"].default)\n\n    def test_embed_passes_params_to_bedrock(self) -> None:\n        mock_boto3, mock_botocore, captured_body = self._make_bedrock_client()\n\n        with patch(\"cohere.manually_maintained.cohere_aws.client.lazy_boto3\", return_value=mock_boto3), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_botocore\", return_value=mock_botocore), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_sagemaker\", return_value=MagicMock()), \\\n             patch.dict(os.environ, {\"AWS_DEFAULT_REGION\": \"us-east-1\"}):\n\n            from cohere.manually_maintained.cohere_aws.client import Client\n\n            client = Client(aws_region=\"us-east-1\", mode=Mode.BEDROCK)\n            client.embed(\n                texts=[\"hello world\"],\n                input_type=\"search_document\",\n                model_id=\"cohere.embed-english-v3\",\n                output_dimension=256,\n                embedding_types=[\"float\", \"int8\"],\n            )\n\n            self.assertEqual(captured_body[\"output_dimension\"], 256)\n            self.assertEqual(captured_body[\"embedding_types\"], [\"float\", \"int8\"])\n\n    def test_embed_omits_none_params(self) -> None:\n        mock_boto3, mock_botocore, captured_body = self._make_bedrock_client()\n\n        with patch(\"cohere.manually_maintained.cohere_aws.client.lazy_boto3\", return_value=mock_boto3), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_botocore\", return_value=mock_botocore), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_sagemaker\", return_value=MagicMock()), \\\n             patch.dict(os.environ, {\"AWS_DEFAULT_REGION\": \"us-east-1\"}):\n\n            from cohere.manually_maintained.cohere_aws.client import Client\n\n            client = Client(aws_region=\"us-east-1\", mode=Mode.BEDROCK)\n            client.embed(\n                texts=[\"hello world\"],\n                input_type=\"search_document\",\n                model_id=\"cohere.embed-english-v3\",\n            )\n\n            self.assertNotIn(\"output_dimension\", captured_body)\n            self.assertNotIn(\"embedding_types\", captured_body)\n\n    def test_embed_with_embedding_types_returns_dict(self) -> None:\n        \"\"\"When embedding_types is specified, the API returns embeddings as a dict.\n        The client should return that dict rather than wrapping it in Embeddings.\"\"\"\n        mock_boto3 = MagicMock()\n        mock_botocore = MagicMock()\n\n        by_type_embeddings = {\"float\": [[0.1, 0.2]], \"int8\": [[1, 2]]}\n\n        def fake_invoke_model(**kwargs):  # type: ignore\n            mock_body = MagicMock()\n            mock_body.read.return_value = json.dumps({\n                \"embeddings\": by_type_embeddings,\n                \"response_type\": \"embeddings_by_type\",\n            }).encode()\n            return {\"body\": mock_body}\n\n        mock_bedrock_client = MagicMock()\n        mock_bedrock_client.invoke_model.side_effect = fake_invoke_model\n\n        def fake_boto3_client(service_name, **kwargs):  # type: ignore\n            if service_name == \"bedrock-runtime\":\n                return mock_bedrock_client\n            return MagicMock()\n\n        mock_boto3.client.side_effect = fake_boto3_client\n\n        with patch(\"cohere.manually_maintained.cohere_aws.client.lazy_boto3\", return_value=mock_boto3), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_botocore\", return_value=mock_botocore), \\\n             patch(\"cohere.manually_maintained.cohere_aws.client.lazy_sagemaker\", return_value=MagicMock()), \\\n             patch.dict(os.environ, {\"AWS_DEFAULT_REGION\": \"us-east-1\"}):\n\n            from cohere.manually_maintained.cohere_aws.client import Client\n\n            client = Client(aws_region=\"us-east-1\", mode=Mode.BEDROCK)\n            result = client.embed(\n                texts=[\"hello world\"],\n                input_type=\"search_document\",\n                model_id=\"cohere.embed-english-v3\",\n                embedding_types=[\"float\", \"int8\"],\n            )\n\n            self.assertIsInstance(result, dict)\n            self.assertEqual(result, by_type_embeddings)\n"
  },
  {
    "path": "tests/test_bedrock_client.py",
    "content": "import os\nimport unittest\n\nimport typing\nimport cohere\n\naws_access_key = os.getenv(\"AWS_ACCESS_KEY\")\naws_secret_key = os.getenv(\"AWS_SECRET_KEY\")\naws_session_token = os.getenv(\"AWS_SESSION_TOKEN\")\naws_region = os.getenv(\"AWS_REGION\")\nendpoint_type = os.getenv(\"ENDPOINT_TYPE\")\n\n\ndef _setup_boto3_env():\n    \"\"\"Bridge custom test env vars to standard boto3 credential env vars.\"\"\"\n    if aws_access_key:\n        os.environ[\"AWS_ACCESS_KEY_ID\"] = aws_access_key\n    if aws_secret_key:\n        os.environ[\"AWS_SECRET_ACCESS_KEY\"] = aws_secret_key\n    if aws_session_token:\n        os.environ[\"AWS_SESSION_TOKEN\"] = aws_session_token\n\n\n@unittest.skipIf(None == os.getenv(\"TEST_AWS\"), \"tests skipped because TEST_AWS is not set\")\nclass TestClient(unittest.TestCase):\n    platform: str = \"bedrock\"\n    models: typing.Dict[str, str] = {\n        \"chat_model\": \"cohere.command-r-plus-v1:0\",\n        \"embed_model\": \"cohere.embed-multilingual-v3\",\n        \"generate_model\": \"cohere.command-text-v14\",\n    }\n\n    def setUp(self) -> None:\n        self.client = cohere.BedrockClient(\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n        )\n\n    def test_rerank(self) -> None:\n        if self.platform != \"sagemaker\":\n            self.skipTest(\"Only sagemaker supports rerank\")\n\n        docs = [\n            'Carson City is the capital city of the American state of Nevada.',\n            'The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.',\n            'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.',\n            'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.']\n\n        response = self.client.rerank(\n            model=self.models[\"rerank_model\"],\n            query='What is the capital of the United States?',\n            documents=docs,\n            top_n=3,\n        )\n\n        self.assertEqual(len(response.results), 3)\n\n    def test_embed(self) -> None:\n        response = self.client.embed(\n            model=self.models[\"embed_model\"],\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n        )\n        print(response)\n\n    def test_generate(self) -> None:\n        response = self.client.generate(\n            model=self.models[\"generate_model\"],\n            prompt='Please explain to me how LLMs work',\n        )\n        print(response)\n\n    def test_generate_stream(self) -> None:\n        response = self.client.generate_stream(\n            model=self.models[\"generate_model\"],\n            prompt='Please explain to me how LLMs work',\n        )\n        for event in response:\n            print(event)\n            if event.event_type == \"text-generation\":\n                print(event.text, end='')\n\n    def test_chat(self) -> None:\n        response = self.client.chat(\n            model=self.models[\"chat_model\"],\n            message='Please explain to me how LLMs work',\n        )\n        print(response)\n\n        self.assertIsNotNone(response.text)\n        self.assertIsNotNone(response.generation_id)\n        self.assertIsNotNone(response.finish_reason)\n        \n        self.assertIsNotNone(response.meta)\n        if response.meta is not None:\n            self.assertIsNotNone(response.meta.tokens)\n            if response.meta.tokens is not None:\n                self.assertIsNotNone(response.meta.tokens.input_tokens)\n                self.assertIsNotNone(response.meta.tokens.output_tokens)\n\n            self.assertIsNotNone(response.meta.billed_units)\n            if response.meta.billed_units is not None:\n                self.assertIsNotNone(response.meta.billed_units.input_tokens)\n                self.assertIsNotNone(response.meta.billed_units.input_tokens)\n\n    def test_chat_stream(self) -> None:\n        response_types = set()\n        response = self.client.chat_stream(\n            model=self.models[\"chat_model\"],\n            message='Please explain to me how LLMs work',\n        )\n        for event in response:\n            response_types.add(event.event_type)\n            if event.event_type == \"text-generation\":\n                print(event.text, end='')\n                self.assertIsNotNone(event.text)\n            if event.event_type == \"stream-end\":\n                self.assertIsNotNone(event.finish_reason)\n                self.assertIsNotNone(event.response)\n                self.assertIsNotNone(event.response.text)\n\n        self.assertSetEqual(response_types, {\"text-generation\", \"stream-end\"})\n\n\n@unittest.skipIf(None == os.getenv(\"TEST_AWS\"), \"tests skipped because TEST_AWS is not set\")\nclass TestBedrockClientV2(unittest.TestCase):\n    \"\"\"Integration tests for BedrockClientV2 (httpx-based).\n\n    Fix 1 validation: If these pass, SigV4 signing uses the correct host header,\n    since the request would fail with a signature mismatch otherwise.\n    \"\"\"\n\n    def setUp(self) -> None:\n        self.client = cohere.BedrockClientV2(\n            aws_access_key=aws_access_key,\n            aws_secret_key=aws_secret_key,\n            aws_session_token=aws_session_token,\n            aws_region=aws_region,\n        )\n\n    def test_embed(self) -> None:\n        response = self.client.embed(\n            model=\"cohere.embed-multilingual-v3\",\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            embedding_types=[\"float\"],\n        )\n        self.assertIsNotNone(response)\n\n    def test_embed_with_output_dimension(self) -> None:\n        response = self.client.embed(\n            model=\"cohere.embed-english-v3\",\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            embedding_types=[\"float\"],\n            output_dimension=256,\n        )\n        self.assertIsNotNone(response)\n\n\n@unittest.skipIf(None == os.getenv(\"TEST_AWS\"), \"tests skipped because TEST_AWS is not set\")\nclass TestCohereAwsBedrockClient(unittest.TestCase):\n    \"\"\"Integration tests for cohere_aws.Client in Bedrock mode (boto3-based).\n\n    Validates:\n    - Fix 2: Client can be initialized with mode=BEDROCK without importing sagemaker\n    - Fix 3: embed() accepts output_dimension and embedding_types\n    \"\"\"\n    client: typing.Any = None\n\n    @classmethod\n    def setUpClass(cls) -> None:\n        _setup_boto3_env()\n        from cohere.manually_maintained.cohere_aws.client import Client\n        from cohere.manually_maintained.cohere_aws.mode import Mode\n        cls.client = Client(aws_region=aws_region, mode=Mode.BEDROCK)\n\n    def test_client_is_bedrock_mode(self) -> None:\n        from cohere.manually_maintained.cohere_aws.mode import Mode\n        self.assertEqual(self.client.mode, Mode.BEDROCK)\n\n    def test_embed(self) -> None:\n        response = self.client.embed(\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            model_id=\"cohere.embed-multilingual-v3\",\n        )\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n        self.assertGreater(len(response.embeddings), 0)\n\n    def test_embed_with_embedding_types(self) -> None:\n        response = self.client.embed(\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            model_id=\"cohere.embed-multilingual-v3\",\n            embedding_types=[\"float\"],\n        )\n        self.assertIsNotNone(response)\n        # When embedding_types is passed, the response is a raw dict\n        self.assertIsInstance(response, dict)\n        self.assertIn(\"float\", response)\n\n    def test_embed_with_output_dimension(self) -> None:\n        response = self.client.embed(\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            model_id=\"cohere.embed-english-v3\",\n            output_dimension=256,\n            embedding_types=[\"float\"],\n        )\n        self.assertIsNotNone(response)\n        # When embedding_types is passed, the response is a raw dict\n        self.assertIsInstance(response, dict)\n        self.assertIn(\"float\", response)\n\n    def test_embed_without_new_params(self) -> None:\n        \"\"\"Backwards compat: embed() still works without the new v4 params.\"\"\"\n        response = self.client.embed(\n            texts=[\"I love Cohere!\"],\n            input_type=\"search_document\",\n            model_id=\"cohere.embed-multilingual-v3\",\n        )\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n"
  },
  {
    "path": "tests/test_client.py",
    "content": "import json\nimport os\nimport unittest\n\nimport cohere\nfrom cohere import ChatConnector, ClassifyExample, CreateConnectorServiceAuth, Tool, \\\n    ToolParameterDefinitionsValue, ToolResult, ChatbotMessage, UserMessage, JsonObjectResponseFormat\n\nco = cohere.Client(timeout=10000)\n\npackage_dir = os.path.dirname(os.path.abspath(__file__))\nembed_job = os.path.join(package_dir, 'embed_job.jsonl')\n\n\nclass TestClient(unittest.TestCase):\n\n    def test_token_falls_back_on_env_variable(self) -> None:\n        cohere.Client(api_key=None)\n        cohere.Client(None)\n\n    def test_context_manager(self) -> None:\n        with cohere.Client(api_key=\"xxx\") as client:\n            self.assertIsNotNone(client)\n\n    def test_chat(self) -> None:\n        chat = co.chat(\n            chat_history=[\n                UserMessage(\n                    message=\"Who discovered gravity?\"),\n                ChatbotMessage(message=\"The man who is widely credited with discovering \"\n                               \"gravity is Sir Isaac Newton\")\n            ],\n            message=\"What year was he born?\",\n        )\n\n        print(chat)\n\n    def test_chat_stream(self) -> None:\n        stream = co.chat_stream(\n            chat_history=[\n                UserMessage(\n                    message=\"Who discovered gravity?\"),\n                ChatbotMessage(message=\"The man who is widely credited with discovering \"\n                               \"gravity is Sir Isaac Newton\")\n            ],\n            message=\"What year was he born?\",\n        )\n\n        events = set()\n\n        for chat_event in stream:\n            events.add(chat_event.event_type)\n            if chat_event.event_type == \"text-generation\":\n                print(chat_event.text)\n\n        self.assertTrue(\"text-generation\" in events)\n        self.assertTrue(\"stream-start\" in events)\n        self.assertTrue(\"stream-end\" in events)\n\n    def test_stream_equals_true(self) -> None:\n        with self.assertRaises(ValueError):\n            co.chat(\n                stream=True,  # type: ignore\n                message=\"What year was he born?\",\n            )\n\n    def test_deprecated_fn(self) -> None:\n        with self.assertRaises(ValueError):\n            co.check_api_key(\"dummy\", dummy=\"dummy\")  # type: ignore\n\n    def test_moved_fn(self) -> None:\n        with self.assertRaises(ValueError):\n            co.list_connectors(\"dummy\", dummy=\"dummy\")  # type: ignore\n\n    def test_embed(self) -> None:\n        response = co.embed(\n            texts=['hello', 'goodbye'],\n            model='embed-english-v3.0',\n            input_type=\"classification\",\n            embedding_types=[\"float\", \"int8\", \"uint8\", \"binary\", \"ubinary\"]\n        )\n\n        if response.response_type == \"embeddings_by_type\":\n            self.assertIsNotNone(response.embeddings.float)  # type: ignore\n            self.assertIsNotNone(response.embeddings.float_)\n\n            if response.embeddings.float_ is not None:\n                self.assertEqual(type(response.embeddings.float_[0][0]), float)\n            if response.embeddings.int8 is not None:\n                self.assertEqual(type(response.embeddings.int8[0][0]), int)\n            if response.embeddings.uint8 is not None:\n                self.assertEqual(type(response.embeddings.uint8[0][0]), int)\n            if response.embeddings.binary is not None:\n                self.assertEqual(type(response.embeddings.binary[0][0]), int)\n            if response.embeddings.ubinary is not None:\n                self.assertEqual(type(response.embeddings.ubinary[0][0]), int)\n\n        print(response)\n\n    def test_image_embed(self) -> None:\n        response = co.embed(\n            images=['data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII='],\n            model='embed-multilingual-v3.0',\n            input_type=\"image\",\n            embedding_types=[\"float\"]\n        )\n\n        if response.response_type == \"embeddings_by_type\":\n            self.assertIsNotNone(response.embeddings.float)  # type: ignore\n            self.assertIsNotNone(response.embeddings.float_)\n\n            if response.embeddings.float_ is not None:\n                self.assertEqual(type(response.embeddings.float_[0][0]), float)\n            if response.embeddings.int8 is not None:\n                self.assertEqual(type(response.embeddings.int8[0][0]), int)\n            if response.embeddings.uint8 is not None:\n                self.assertEqual(type(response.embeddings.uint8[0][0]), int)\n            if response.embeddings.binary is not None:\n                self.assertEqual(type(response.embeddings.binary[0][0]), int)\n            if response.embeddings.ubinary is not None:\n                self.assertEqual(type(response.embeddings.ubinary[0][0]), int)\n\n        print(response)\n\n    def test_embed_batch_types(self) -> None:\n        # batch more than 96 texts\n        response = co.embed(\n            texts=['hello'] * 100,\n            model='embed-english-v3.0',\n            input_type=\"classification\",\n            embedding_types=[\"float\", \"int8\", \"uint8\", \"binary\", \"ubinary\"]\n        )\n\n        if response.response_type == \"embeddings_by_type\":\n            self.assertEqual(len(response.texts or []), 100)\n            self.assertEqual(len(response.embeddings.float_ or []), 100)\n            self.assertEqual(len(response.embeddings.int8 or []), 100)\n            self.assertEqual(len(response.embeddings.uint8 or []), 100)\n            self.assertEqual(len(response.embeddings.binary or []), 100)\n            self.assertEqual(len(response.embeddings.ubinary or []), 100)\n        else:\n            self.fail(\"Expected embeddings_by_type response type\")\n\n        print(response)\n\n    def test_embed_batch_v1(self) -> None:\n        # batch more than 96 texts\n        response = co.embed(\n            texts=['hello'] * 100,\n            model='embed-english-v3.0',\n            input_type=\"classification\",\n        )\n\n        if response.response_type == \"embeddings_floats\":\n            self.assertEqual(len(response.embeddings), 100)\n        else:\n            self.fail(\"Expected embeddings_floats response type\")\n\n        print(response)\n\n    @unittest.skip(\"temp\")\n    def test_embed_job_crud(self) -> None:\n        dataset = co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        result = co.wait(dataset)\n\n        self.assertEqual(result.dataset.validation_status, \"validated\")\n\n        # start an embed job\n        job = co.embed_jobs.create(\n            dataset_id=dataset.id or \"\",\n            input_type=\"search_document\",\n            model='embed-english-v3.0')\n\n        print(job)\n\n        # list embed jobs\n        my_embed_jobs = co.embed_jobs.list()\n\n        print(my_embed_jobs)\n\n        emb_result = co.wait(job)\n\n        self.assertEqual(emb_result.status, \"complete\")\n\n        co.embed_jobs.cancel(job.job_id)\n\n        co.datasets.delete(dataset.id or \"\")\n\n    def test_rerank(self) -> None:\n        docs = [\n            'Carson City is the capital city of the American state of Nevada.',\n            'The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.',\n            'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.',\n            'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.']\n\n        response = co.rerank(\n            model='rerank-v3.5',\n            query='What is the capital of the United States?',\n            documents=docs,\n            top_n=3,\n        )\n\n        print(response)\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    def test_datasets_crud(self) -> None:\n        my_dataset = co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        print(my_dataset)\n\n        my_datasets = co.datasets.list()\n\n        print(my_datasets)\n\n        dataset = co.datasets.get(my_dataset.id or \"\")\n\n        print(dataset)\n\n        co.datasets.delete(my_dataset.id or \"\")\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    def test_save_load(self) -> None:\n        my_dataset = co.datasets.create(\n            name=\"test\",\n            type=\"embed-input\",\n            data=open(embed_job, 'rb'),\n        )\n\n        result = co.wait(my_dataset)\n\n        co.utils.save_dataset(result.dataset, \"dataset.jsonl\")\n\n        # assert files equal\n        self.assertTrue(os.path.exists(\"dataset.jsonl\"))\n        self.assertEqual(open(embed_job, 'rb').read(),\n                         open(\"dataset.jsonl\", 'rb').read())\n\n        print(result)\n\n        co.datasets.delete(my_dataset.id or \"\")\n\n    def test_tokenize(self) -> None:\n        response = co.tokenize(\n            text='tokenize me! :D',\n            model='command-a-03-2025',\n            offline=False,\n        )\n        print(response)\n\n    def test_detokenize(self) -> None:\n        response = co.detokenize(\n            tokens=[10104, 12221, 1315, 34, 1420, 69],\n            model=\"command-a-03-2025\",\n            offline=False,\n        )\n        print(response)\n\n\n    @unittest.skipIf(os.getenv(\"CO_API_URL\") is not None, \"Doesn't work in staging.\")\n    def test_tool_use(self) -> None:\n        tools = [\n            Tool(\n                name=\"sales_database\",\n                description=\"Connects to a database about sales volumes\",\n                parameter_definitions={\n                    \"day\": ToolParameterDefinitionsValue(\n                        description=\"Retrieves sales data from this day, formatted as YYYY-MM-DD.\",\n                        type=\"str\",\n                        required=True\n                    )}\n            )\n        ]\n\n        tool_parameters_response = co.chat(\n            message=\"How good were the sales on September 29 2023?\",\n            tools=tools,\n            model=\"command-nightly\",\n            preamble=\"\"\"\n                ## Task Description\n                You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n                ## Style Guide\n                Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n            \"\"\"\n        )\n\n        if tool_parameters_response.tool_calls is not None:\n            self.assertEqual(\n                tool_parameters_response.tool_calls[0].name, \"sales_database\")\n            self.assertEqual(tool_parameters_response.tool_calls[0].parameters, {\n                \"day\": \"2023-09-29\"})\n        else:\n            raise ValueError(\"Expected tool calls to be present\")\n\n        local_tools = {\n            \"sales_database\": lambda day: {\n                \"number_of_sales\": 120,\n                \"total_revenue\": 48500,\n                \"average_sale_value\": 404.17,\n                \"date\": \"2023-09-29\"\n            }\n        }\n\n        tool_results = []\n        for tool_call in tool_parameters_response.tool_calls:\n            output = local_tools[tool_call.name](**tool_call.parameters)\n            outputs = [output]\n\n            tool_results.append(ToolResult(\n                call=tool_call,\n                outputs=outputs\n            ))\n\n        cited_response = co.chat(\n            message=\"How good were the sales on September 29?\",\n            tools=tools,\n            tool_results=tool_results,\n            force_single_step=True,\n            model=\"command-nightly\",\n        )\n\n        self.assertEqual(cited_response.documents, [\n            {\n                \"average_sale_value\": \"404.17\",\n                \"date\": \"2023-09-29\",\n                \"id\": \"sales_database:0:0\",\n                \"number_of_sales\": \"120\",\n                \"total_revenue\": \"48500\",\n            }\n        ])\n\n    def test_local_tokenize(self) -> None:\n        response = co.tokenize(\n            model=\"command-a-03-2025\",\n            text=\"tokenize me! :D\"\n        )\n        print(response)\n\n    def test_local_detokenize(self) -> None:\n        response = co.detokenize(\n            model=\"command-a-03-2025\",\n            tokens=[10104, 12221, 1315, 34, 1420, 69]\n        )\n        print(response)\n"
  },
  {
    "path": "tests/test_client_init.py",
    "content": "import os\nimport typing\nimport unittest\n\nimport cohere\nfrom cohere import ToolMessage, UserMessage, AssistantMessage\n\nimport importlib.util\nHAS_BOTO3 = importlib.util.find_spec(\"boto3\") is not None\n\nclass TestClientInit(unittest.TestCase):\n    @unittest.skipUnless(HAS_BOTO3, \"boto3 not installed\")\n    def test_aws_inits(self) -> None:\n        cohere.BedrockClient()\n        cohere.BedrockClientV2()\n        cohere.SagemakerClient()\n        cohere.SagemakerClientV2()\n\n    def test_inits(self) -> None:\n        cohere.Client(api_key=\"n/a\")\n        cohere.ClientV2(api_key=\"n/a\")\n\n"
  },
  {
    "path": "tests/test_client_v2.py",
    "content": "import os\nimport typing\nimport unittest\n\nimport cohere\nfrom cohere import ToolMessage, UserMessage, AssistantMessage\n\nco = cohere.ClientV2(timeout=10000)\n\npackage_dir = os.path.dirname(os.path.abspath(__file__))\nembed_job = os.path.join(package_dir, \"embed_job.jsonl\")\n\n\nclass TestClientV2(unittest.TestCase):\n\n    def test_chat(self) -> None:\n        response = co.chat(\n            model=\"command-a-03-2025\", messages=[cohere.UserChatMessageV2(content=\"hello world!\")])\n\n        print(response.message)\n\n    def test_chat_stream(self) -> None:\n        stream = co.chat_stream(\n            model=\"command-a-03-2025\", messages=[cohere.UserChatMessageV2(content=\"hello world!\")])\n\n        events = set()\n\n        for chat_event in stream:\n            if chat_event is not None:\n                events.add(chat_event.type)\n                if chat_event.type == \"content-delta\":\n                    print(chat_event.delta)\n\n        self.assertTrue(\"message-start\" in events)\n        self.assertTrue(\"content-start\" in events)\n        self.assertTrue(\"content-delta\" in events)\n        self.assertTrue(\"content-end\" in events)\n        self.assertTrue(\"message-end\" in events)\n    \n    def test_legacy_methods_available(self) -> None:\n        self.assertTrue(hasattr(co, \"generate\"))\n        self.assertTrue(callable(getattr(co, \"generate\")))\n        self.assertTrue(hasattr(co, \"generate_stream\"))\n        self.assertTrue(callable(getattr(co, \"generate_stream\")))\n\n    @unittest.skip(\"Skip v2 test for now\")\n    def test_chat_documents(self) -> None:\n        from cohere import Document\n        documents = [\n            Document(data={\"title\": \"widget sales 2019\", \"text\": \"1 million\"}),\n            Document(data={\"title\": \"widget sales 2020\", \"text\": \"2 million\"}),\n            Document(data={\"title\": \"widget sales 2021\", \"text\": \"4 million\"}),\n        ]\n        response = co.chat(\n            messages=[cohere.UserChatMessageV2(\n                content=[cohere.TextContent(text=\"how many widges were sold in 2020?\")],\n            )],\n            model=\"command-a-03-2025\",\n            documents=documents,\n        )\n\n        print(response.message)\n\n    @unittest.skip(\"Skip v2 test for now\")\n    def test_chat_tools(self) -> None:\n        from typing import Sequence\n        get_weather_tool = cohere.ToolV2Function(\n            name=\"get_weather\",\n            description=\"gets the weather of a given location\",\n            parameters={\n                \"type\": \"object\",\n                \"properties\": {\n                    \"location\": {\n                        \"type\": \"str\",\n                        \"description\": \"the location to get weather, example: San Fransisco, CA\",\n                    }\n                },\n                \"required\": [\"location\"],\n            },\n        )\n        tools = [cohere.ToolV2(type=\"function\", function=get_weather_tool)]\n        messages: cohere.ChatMessages = [\n            cohere.UserChatMessageV2(content=\"what is the weather in Toronto?\")\n        ]\n        res = co.chat(model=\"command-a-03-2025\", tools=tools, messages=messages)\n\n        # call the get_weather tool\n        tool_result = {\"temperature\": \"30C\"}\n        tool_content: Sequence[cohere.TextToolContent] = [cohere.TextToolContent(text=\"The weather in Toronto is 30C\")]\n        # Use the first text content from the response if available, else fallback to str\n        assistant_content = res.message.content[0].text if (hasattr(res.message, 'content') and isinstance(res.message.content, list) and len(res.message.content) > 0 and hasattr(res.message.content[0], 'text')) else str(res.message)\n        messages.append(cohere.AssistantChatMessageV2(content=[cohere.TextAssistantMessageV2ContentItem(text=assistant_content)]))\n        if res.message.tool_calls is not None and res.message.tool_calls[0].id is not None:\n            messages.append(cohere.ToolChatMessageV2(\n                tool_call_id=res.message.tool_calls[0].id, content=list(tool_content)))\n\n        res = co.chat(tools=tools, messages=messages, model=\"command-a-03-2025\")\n        print(res.message)\n"
  },
  {
    "path": "tests/test_embed_streaming.py",
    "content": "\"\"\"Tests for memory-efficient embed_stream functionality.\n\nAll embed_stream code lives in manually maintained files (.fernignore protected):\n- src/cohere/client.py — Client.embed_stream()\n- src/cohere/manually_maintained/streaming_embed.py — StreamedEmbedding, extraction helpers\n\"\"\"\n\nimport unittest\n\nfrom cohere.manually_maintained.streaming_embed import (\n    StreamedEmbedding,\n    extract_embeddings_from_response,\n)\nfrom cohere.config import embed_stream_batch_size\n\n\nclass TestStreamedEmbedding(unittest.TestCase):\n    \"\"\"Test the StreamedEmbedding dataclass.\"\"\"\n\n    def test_creation(self):\n        emb = StreamedEmbedding(index=0, embedding=[0.1, 0.2], embedding_type=\"float\", text=\"hello\")\n        self.assertEqual(emb.index, 0)\n        self.assertEqual(emb.embedding, [0.1, 0.2])\n        self.assertEqual(emb.embedding_type, \"float\")\n        self.assertEqual(emb.text, \"hello\")\n\n    def test_text_optional(self):\n        emb = StreamedEmbedding(index=0, embedding=[0.1], embedding_type=\"float\")\n        self.assertIsNone(emb.text)\n\n\nclass TestExtractEmbeddings(unittest.TestCase):\n    \"\"\"Test extract_embeddings_from_response for V1 and V2 formats.\"\"\"\n\n    def test_v1_embeddings_floats(self):\n        \"\"\"V1 embeddings_floats response returns flat float embeddings.\"\"\"\n        response = {\n            \"response_type\": \"embeddings_floats\",\n            \"embeddings\": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],\n        }\n        results = list(extract_embeddings_from_response(response, [\"hello\", \"world\"]))\n\n        self.assertEqual(len(results), 2)\n        self.assertEqual(results[0].index, 0)\n        self.assertEqual(results[0].embedding, [0.1, 0.2, 0.3])\n        self.assertEqual(results[0].embedding_type, \"float\")\n        self.assertEqual(results[0].text, \"hello\")\n        self.assertEqual(results[1].index, 1)\n        self.assertEqual(results[1].text, \"world\")\n\n    def test_v1_embeddings_by_type(self):\n        \"\"\"V1 embeddings_by_type response returns typed embeddings.\"\"\"\n        response = {\n            \"response_type\": \"embeddings_by_type\",\n            \"embeddings\": {\n                \"float_\": [[0.1, 0.2], [0.3, 0.4]],\n                \"int8\": [[1, 2], [3, 4]],\n            },\n        }\n        results = list(extract_embeddings_from_response(response, [\"a\", \"b\"]))\n\n        # 2 texts * 2 types = 4 embeddings\n        self.assertEqual(len(results), 4)\n        float_results = [r for r in results if r.embedding_type == \"float\"]\n        int8_results = [r for r in results if r.embedding_type == \"int8\"]\n        self.assertEqual(len(float_results), 2)\n        self.assertEqual(len(int8_results), 2)\n\n    def test_v2_response_format(self):\n        \"\"\"V2 response (no response_type) returns dict embeddings.\"\"\"\n        response = {\n            \"embeddings\": {\n                \"float_\": [[0.1, 0.2], [0.3, 0.4]],\n            },\n        }\n        results = list(extract_embeddings_from_response(response, [\"x\", \"y\"]))\n\n        self.assertEqual(len(results), 2)\n        self.assertEqual(results[0].embedding_type, \"float\")\n        self.assertEqual(results[0].text, \"x\")\n\n    def test_global_offset(self):\n        \"\"\"Global offset adjusts indices for batched processing.\"\"\"\n        response = {\n            \"response_type\": \"embeddings_floats\",\n            \"embeddings\": [[0.1], [0.2]],\n        }\n        results = list(extract_embeddings_from_response(response, [\"c\", \"d\"], global_offset=100))\n\n        self.assertEqual(results[0].index, 100)\n        self.assertEqual(results[1].index, 101)\n\n    def test_empty_embeddings(self):\n        \"\"\"Empty response yields nothing.\"\"\"\n        response = {\"response_type\": \"embeddings_floats\", \"embeddings\": []}\n        results = list(extract_embeddings_from_response(response, []))\n        self.assertEqual(results, [])\n\n    def test_texts_shorter_than_embeddings(self):\n        \"\"\"Text is None when batch_texts runs out.\"\"\"\n        response = {\n            \"response_type\": \"embeddings_floats\",\n            \"embeddings\": [[0.1], [0.2], [0.3]],\n        }\n        results = list(extract_embeddings_from_response(response, [\"only_one\"]))\n\n        self.assertEqual(results[0].text, \"only_one\")\n        self.assertIsNone(results[1].text)\n        self.assertIsNone(results[2].text)\n\n\nclass TestBatchSizeConstant(unittest.TestCase):\n    \"\"\"Test that batch_size defaults come from config, not magic numbers.\"\"\"\n\n    def test_default_batch_size_matches_api_limit(self):\n        self.assertEqual(embed_stream_batch_size, 96)\n\n\nif __name__ == \"__main__\":\n    unittest.main()\n"
  },
  {
    "path": "tests/test_embed_utils.py",
    "content": "import unittest\n\nfrom cohere import EmbeddingsByTypeEmbedResponse, EmbedByTypeResponseEmbeddings, ApiMeta, ApiMetaBilledUnits, \\\n    ApiMetaApiVersion, EmbeddingsFloatsEmbedResponse\nfrom cohere.utils import merge_embed_responses\n\nebt_1 = EmbeddingsByTypeEmbedResponse(\n    response_type=\"embeddings_by_type\",\n    id=\"1\",\n    embeddings=EmbedByTypeResponseEmbeddings(\n        float_=[[0, 1, 2], [3, 4, 5]],\n        int8=[[0, 1, 2], [3, 4, 5]],\n        uint8=[[0, 1, 2], [3, 4, 5]],\n        binary=[[0, 1, 2], [3, 4, 5]],\n        ubinary=[[0, 1, 2], [3, 4, 5]],\n    ),\n    texts=[\"hello\", \"goodbye\"],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=1,\n            output_tokens=1,\n            search_units=1,\n            classifications=1\n        ),\n        warnings=[\"test_warning_1\"]\n    )\n)\n\nebt_2 = EmbeddingsByTypeEmbedResponse(\n    response_type=\"embeddings_by_type\",\n    id=\"2\",\n    embeddings=EmbedByTypeResponseEmbeddings(\n        float_=[[7, 8, 9], [10, 11, 12]],\n        int8=[[7, 8, 9], [10, 11, 12]],\n        uint8=[[7, 8, 9], [10, 11, 12]],\n        binary=[[7, 8, 9], [10, 11, 12]],\n        ubinary=[[7, 8, 9], [10, 11, 12]],\n    ),\n    texts=[\"bye\", \"seeya\"],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=2,\n            output_tokens=2,\n            search_units=2,\n            classifications=2\n        ),\n        warnings=[\"test_warning_1\", \"test_warning_2\"]\n    )\n)\n\nebt_partial_1 = EmbeddingsByTypeEmbedResponse(\n    response_type=\"embeddings_by_type\",\n    id=\"1\",\n    embeddings=EmbedByTypeResponseEmbeddings(\n        float_=[[0, 1, 2], [3, 4, 5]],\n        int8=[[0, 1, 2], [3, 4, 5]],\n        binary=[[5, 6, 7], [8, 9, 10]],\n    ),\n    texts=[\"hello\", \"goodbye\"],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=1,\n            output_tokens=1,\n            search_units=1,\n            classifications=1\n        ),\n        warnings=[\"test_warning_1\"]\n    )\n)\n\nebt_partial_2 = EmbeddingsByTypeEmbedResponse(\n    response_type=\"embeddings_by_type\",\n    id=\"2\",\n    embeddings=EmbedByTypeResponseEmbeddings(\n        float_=[[7, 8, 9], [10, 11, 12]],\n        int8=[[7, 8, 9], [10, 11, 12]],\n        binary=[[14, 15, 16], [17, 18, 19]],\n    ),\n    texts=[\"bye\", \"seeya\"],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=2,\n            output_tokens=2,\n            search_units=2,\n            classifications=2\n        ),\n        warnings=[\"test_warning_1\", \"test_warning_2\"]\n    )\n)\n\nebf_1 = EmbeddingsFloatsEmbedResponse(\n    response_type=\"embeddings_floats\",\n    id=\"1\",\n    texts=[\"hello\", \"goodbye\"],\n    embeddings=[[0, 1, 2], [3, 4, 5]],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=1,\n            output_tokens=1,\n            search_units=1,\n            classifications=1\n        ),\n        warnings=[\"test_warning_1\"]\n    )\n)\n\nebf_2 = EmbeddingsFloatsEmbedResponse(\n    response_type=\"embeddings_floats\",\n    id=\"2\",\n    texts=[\"bye\", \"seeya\"],\n    embeddings=[[7, 8, 9], [10, 11, 12]],\n    meta=ApiMeta(\n        api_version=ApiMetaApiVersion(version=\"1\"),\n        billed_units=ApiMetaBilledUnits(\n            input_tokens=2,\n            output_tokens=2,\n            search_units=2,\n            classifications=2\n        ),\n        warnings=[\"test_warning_1\", \"test_warning_2\"]\n    )\n)\n\n\nclass TestClient(unittest.TestCase):\n\n    def test_merge_embeddings_by_type(self) -> None:\n        resp = merge_embed_responses([\n            ebt_1,\n            ebt_2\n        ])\n\n        if resp.meta is None:\n            raise Exception(\"this is just for mpy\")\n\n        self.assertEqual(set(resp.meta.warnings or []), {\"test_warning_1\", \"test_warning_2\"})\n        self.assertEqual(resp, EmbeddingsByTypeEmbedResponse(\n            response_type=\"embeddings_by_type\",\n            id=\"1, 2\",\n            embeddings=EmbedByTypeResponseEmbeddings(\n                float_=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                int8=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                uint8=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                binary=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                ubinary=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n            ),\n            texts=[\"hello\", \"goodbye\", \"bye\", \"seeya\"],\n            meta=ApiMeta(\n                api_version=ApiMetaApiVersion(version=\"1\"),\n                billed_units=ApiMetaBilledUnits(\n                    input_tokens=3,\n                    output_tokens=3,\n                    search_units=3,\n                    classifications=3\n                ),\n                warnings=resp.meta.warnings  # order ignored\n            )\n        ))\n\n    def test_merge_embeddings_floats(self) -> None:\n        resp = merge_embed_responses([\n            ebf_1,\n            ebf_2\n        ])\n\n        if resp.meta is None:\n            raise Exception(\"this is just for mpy\")\n\n        self.assertEqual(set(resp.meta.warnings or []), {\"test_warning_1\", \"test_warning_2\"})\n        self.assertEqual(resp, EmbeddingsFloatsEmbedResponse(\n            response_type=\"embeddings_floats\",\n            id=\"1, 2\",\n            texts=[\"hello\", \"goodbye\", \"bye\", \"seeya\"],\n            embeddings=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n            meta=ApiMeta(\n                api_version=ApiMetaApiVersion(version=\"1\"),\n                billed_units=ApiMetaBilledUnits(\n                    input_tokens=3,\n                    output_tokens=3,\n                    search_units=3,\n                    classifications=3\n                ),\n                warnings=resp.meta.warnings  # order ignored\n            )\n        ))\n\n    def test_merge_partial_embeddings_floats(self) -> None:\n        resp = merge_embed_responses([\n            ebt_partial_1,\n            ebt_partial_2\n        ])\n\n        if resp.meta is None:\n            raise Exception(\"this is just for mpy\")\n\n        self.assertEqual(set(resp.meta.warnings or []), {\"test_warning_1\", \"test_warning_2\"})\n        self.assertEqual(resp, EmbeddingsByTypeEmbedResponse(\n            response_type=\"embeddings_by_type\",\n            id=\"1, 2\",\n            embeddings=EmbedByTypeResponseEmbeddings(\n                float_=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                int8=[[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12]],\n                binary=[[5, 6, 7], [8, 9, 10], [14, 15, 16], [17, 18, 19]],\n            ),\n            texts=[\"hello\", \"goodbye\", \"bye\", \"seeya\"],\n            meta=ApiMeta(\n                api_version=ApiMetaApiVersion(version=\"1\"),\n                billed_units=ApiMetaBilledUnits(\n                    input_tokens=3,\n                    output_tokens=3,\n                    search_units=3,\n                    classifications=3\n                ),\n                warnings=resp.meta.warnings  # order ignored\n            )\n        ))\n"
  },
  {
    "path": "tests/test_oci_client.py",
    "content": "\"\"\"Integration and unit tests for OCI Generative AI client.\n\nAll integration tests are validated against the live OCI Generative AI inference\nlayer (us-chicago-1). The OciClientV2 uses the V2 Cohere API format (COHEREV2)\nand communicates with the OCI inference endpoint at:\n    https://inference.generativeai.{region}.oci.oraclecloud.com\n\nIntegration test coverage:\n\n    V1 API (OciClient — Command R family):\n    Test                            Model                      What it proves\n    ------------------------------- -------------------------- ------------------------------------------\n    test_embed                      embed-english-v3.0         V1 embed returns 2x 1024-dim float vectors\n    test_chat                       command-r-08-2024          V1 chat returns text with COHERE apiFormat\n    test_chat_stream                command-r-08-2024          V1 streaming with text-generation events\n\n    V2 API (OciClientV2 — Command A family):\n    Test                            Model                      What it proves\n    ------------------------------- -------------------------- ------------------------------------------\n    test_embed_v2                   embed-english-v3.0         V2 embed returns dict with float_ key\n    test_embed_with_model_prefix_v2 cohere.embed-english-v3.0  Model normalization works\n    test_chat_v2                    command-a-03-2025          V2 chat returns message with COHEREV2 format\n    test_chat_stream_v2             command-a-03-2025          V2 SSE streaming with content-delta events\n    test_command_a_chat             command-a-03-2025          Command A chat via V2\n\n    Cross-cutting:\n    Test                            Model                      What it proves\n    ------------------------------- -------------------------- ------------------------------------------\n    test_config_file_auth           embed-english-v3.0         API key auth from config file\n    test_custom_profile_auth        embed-english-v3.0         Custom OCI profile auth\n    test_embed_english_v3           embed-english-v3.0         1024-dim embeddings\n    test_embed_multilingual_v3      embed-multilingual-v3.0    Multilingual model works\n    test_invalid_model              invalid-model-name         Error handling works\n    test_missing_compartment_id     --                         Raises TypeError\n\nRequirements:\n1. OCI SDK installed: pip install oci\n2. OCI credentials configured in ~/.oci/config\n3. TEST_OCI environment variable set to run\n4. OCI_COMPARTMENT_ID environment variable with valid OCI compartment OCID\n5. OCI_REGION environment variable (optional, defaults to us-chicago-1)\n\nRun with:\n    TEST_OCI=1 OCI_COMPARTMENT_ID=ocid1.compartment.oc1... pytest tests/test_oci_client.py\n\"\"\"\n\nimport os\nimport sys\nimport tempfile\nimport types\nimport unittest\nfrom unittest.mock import MagicMock, mock_open, patch\n\nimport cohere\n\nif \"tokenizers\" not in sys.modules:\n    tokenizers_stub = types.ModuleType(\"tokenizers\")\n    tokenizers_stub.Tokenizer = object  # type: ignore[attr-defined]\n    sys.modules[\"tokenizers\"] = tokenizers_stub\n\nif \"fastavro\" not in sys.modules:\n    fastavro_stub = types.ModuleType(\"fastavro\")\n    fastavro_stub.parse_schema = lambda schema: schema  # type: ignore[attr-defined]\n    fastavro_stub.reader = lambda *args, **kwargs: iter(())  # type: ignore[attr-defined]\n    fastavro_stub.writer = lambda *args, **kwargs: None  # type: ignore[attr-defined]\n    sys.modules[\"fastavro\"] = fastavro_stub\n\nif \"httpx_sse\" not in sys.modules:\n    httpx_sse_stub = types.ModuleType(\"httpx_sse\")\n    httpx_sse_stub.connect_sse = lambda *args, **kwargs: None  # type: ignore[attr-defined]\n    sys.modules[\"httpx_sse\"] = httpx_sse_stub\n\n\n@unittest.skipIf(os.getenv(\"TEST_OCI\") is None, \"TEST_OCI not set\")\nclass TestOciClient(unittest.TestCase):\n    \"\"\"Test OciClient (V1 API) with OCI Generative AI.\"\"\"\n\n    def setUp(self):\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        region = os.getenv(\"OCI_REGION\", \"us-chicago-1\")\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n\n        self.client = cohere.OciClient(\n            oci_region=region,\n            oci_compartment_id=compartment_id,\n            oci_profile=profile,\n        )\n\n    def test_embed(self):\n        \"\"\"Test embedding with V1 client.\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Hello world\", \"Cohere on OCI\"],\n            input_type=\"search_document\",\n        )\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n        self.assertEqual(len(response.embeddings), 2)\n        self.assertEqual(len(response.embeddings[0]), 1024)\n        self.assertEqual(response.response_type, \"embeddings_floats\")\n\n    def test_chat(self):\n        \"\"\"Test V1 chat with Command R.\"\"\"\n        response = self.client.chat(\n            model=\"command-r-08-2024\",\n            message=\"What is 2+2? Answer with just the number.\",\n        )\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.text)\n        self.assertIn(\"4\", response.text)\n\n    def test_chat_stream(self):\n        \"\"\"Test V1 streaming chat terminates and produces correct events.\"\"\"\n        events = []\n        for event in self.client.chat_stream(\n            model=\"command-r-08-2024\",\n            message=\"Count from 1 to 3.\",\n        ):\n            events.append(event)\n\n        self.assertTrue(len(events) > 0)\n        text_events = [e for e in events if hasattr(e, \"text\") and e.text]\n        self.assertTrue(len(text_events) > 0)\n\n        # Verify stream terminates with correct event lifecycle\n        event_types = [getattr(e, \"event_type\", None) for e in events]\n        self.assertEqual(event_types[0], \"stream-start\")\n        self.assertEqual(event_types[-1], \"stream-end\")\n\n\n@unittest.skipIf(os.getenv(\"TEST_OCI\") is None, \"TEST_OCI not set\")\nclass TestOciClientV2(unittest.TestCase):\n    \"\"\"Test OciClientV2 (v2 API) with OCI Generative AI.\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up OCI v2 client for each test.\"\"\"\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        region = os.getenv(\"OCI_REGION\", \"us-chicago-1\")\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n\n        self.client = cohere.OciClientV2(\n            oci_region=region,\n            oci_compartment_id=compartment_id,\n            oci_profile=profile,\n        )\n\n    def test_embed_v2(self):\n        \"\"\"Test embedding with v2 client.\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Hello from v2\", \"Second text\"],\n            input_type=\"search_document\",\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n        # V2 returns embeddings as a dict with \"float\" key\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_), 2)\n        # Verify embedding dimensions (1024 for embed-english-v3.0)\n        self.assertEqual(len(response.embeddings.float_[0]), 1024)\n        self.assertEqual(response.response_type, \"embeddings_by_type\")\n\n    def test_embed_with_model_prefix_v2(self):\n        \"\"\"Test embedding with 'cohere.' model prefix on v2 client.\"\"\"\n        response = self.client.embed(\n            model=\"cohere.embed-english-v3.0\",\n            texts=[\"Test with prefix\"],\n            input_type=\"search_document\",\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_), 1)\n\n    def test_chat_v2(self):\n        \"\"\"Test chat with v2 client.\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"Say hello\"}],\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.message)\n\n    def test_chat_vision_v2(self):\n        \"\"\"Test vision with inline image on Command A Vision.\"\"\"\n        import base64, struct, zlib\n\n        # Create a minimal 1x1 red PNG\n        raw = b'\\x00\\xff\\x00\\x00'\n        compressed = zlib.compress(raw)\n        def chunk(ctype, data):\n            c = ctype + data\n            return struct.pack('>I', len(data)) + c + struct.pack('>I', zlib.crc32(c) & 0xffffffff)\n        ihdr = struct.pack('>IIBBBBB', 1, 1, 8, 2, 0, 0, 0)\n        png = b'\\x89PNG\\r\\n\\x1a\\n' + chunk(b'IHDR', ihdr) + chunk(b'IDAT', compressed) + chunk(b'IEND', b'')\n        img_b64 = base64.b64encode(png).decode()\n\n        response = self.client.chat(\n            model=\"command-a-vision\",\n            messages=[{\n                \"role\": \"user\",\n                \"content\": [\n                    {\"type\": \"text\", \"text\": \"What color is this image? Reply with one word.\"},\n                    {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/png;base64,{img_b64}\"}},\n                ],\n            }],\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.message)\n        self.assertTrue(len(response.message.content) > 0)\n        # The 1x1 red pixel should be identified as red\n        self.assertIn(\"red\", response.message.content[0].text.lower())\n\n    def test_chat_tool_use_v2(self):\n        \"\"\"Test tool use with v2 client on OCI on-demand inference.\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"What's the weather in Toronto?\"}],\n            max_tokens=200,\n            tools=[{\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"location\": {\"type\": \"string\", \"description\": \"City name\"}\n                        },\n                        \"required\": [\"location\"],\n                    },\n                },\n            }],\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.message)\n        self.assertEqual(response.finish_reason, \"TOOL_CALL\")\n        self.assertTrue(len(response.message.tool_calls) > 0)\n        tool_call = response.message.tool_calls[0]\n        self.assertEqual(tool_call.function.name, \"get_weather\")\n        self.assertIn(\"Toronto\", tool_call.function.arguments)\n\n    def test_chat_tool_use_response_type_lowered(self):\n        \"\"\"Test that tool_call type is lowercased in response (OCI returns FUNCTION).\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"What's the weather in Toronto?\"}],\n            max_tokens=200,\n            tools=[{\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"location\": {\"type\": \"string\", \"description\": \"City name\"}\n                        },\n                        \"required\": [\"location\"],\n                    },\n                },\n            }],\n        )\n\n        self.assertEqual(response.finish_reason, \"TOOL_CALL\")\n        tool_call = response.message.tool_calls[0]\n        # OCI returns \"FUNCTION\" — SDK must lowercase to \"function\" for Cohere compat\n        self.assertEqual(tool_call.type, \"function\")\n\n    def test_chat_multi_turn_tool_use_v2(self):\n        \"\"\"Test multi-turn tool use: send tool result back after tool call.\"\"\"\n        # Step 1: Get a tool call\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"What's the weather in Toronto?\"}],\n            max_tokens=200,\n            tools=[{\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"location\": {\"type\": \"string\", \"description\": \"City name\"}\n                        },\n                        \"required\": [\"location\"],\n                    },\n                },\n            }],\n        )\n        self.assertEqual(response.finish_reason, \"TOOL_CALL\")\n        tool_call = response.message.tool_calls[0]\n\n        # Step 2: Send tool result back\n        response2 = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[\n                {\"role\": \"user\", \"content\": \"What's the weather in Toronto?\"},\n                {\n                    \"role\": \"assistant\",\n                    \"tool_calls\": [{\"id\": tool_call.id, \"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"arguments\": tool_call.function.arguments}}],\n                    \"tool_plan\": response.message.tool_plan,\n                },\n                {\n                    \"role\": \"tool\",\n                    \"tool_call_id\": tool_call.id,\n                    \"content\": [{\"type\": \"text\", \"text\": \"15°C, sunny\"}],\n                },\n            ],\n            max_tokens=200,\n            tools=[{\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": \"get_weather\",\n                    \"description\": \"Get current weather for a location\",\n                    \"parameters\": {\n                        \"type\": \"object\",\n                        \"properties\": {\n                            \"location\": {\"type\": \"string\", \"description\": \"City name\"}\n                        },\n                        \"required\": [\"location\"],\n                    },\n                },\n            }],\n        )\n\n        self.assertIsNotNone(response2.message)\n        # Model should respond with text incorporating the tool result\n        self.assertTrue(len(response2.message.content) > 0)\n\n    def test_chat_safety_mode_v2(self):\n        \"\"\"Test that safety_mode is uppercased for OCI.\"\"\"\n        # Cohere SDK enum values are already uppercase, but test lowercase too\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"Say hi\"}],\n            safety_mode=\"STRICT\",\n        )\n        self.assertIsNotNone(response.message)\n\n    def test_chat_stream_v2(self):\n        \"\"\"Test V2 streaming chat terminates and produces correct event lifecycle.\"\"\"\n        events = []\n        for event in self.client.chat_stream(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"Count from 1 to 3\"}],\n        ):\n            events.append(event)\n\n        self.assertTrue(len(events) > 0)\n\n        # Verify full event lifecycle: message-start → content-start → content-delta(s) → content-end → message-end\n        event_types = [e.type for e in events]\n        self.assertEqual(event_types[0], \"message-start\")\n        self.assertIn(\"content-start\", event_types)\n        self.assertIn(\"content-delta\", event_types)\n        self.assertIn(\"content-end\", event_types)\n        self.assertEqual(event_types[-1], \"message-end\")\n\n        # Verify we can extract text from content-delta events\n        full_text = \"\"\n        for event in events:\n            if (\n                hasattr(event, \"delta\")\n                and event.delta\n                and hasattr(event.delta, \"message\")\n                and event.delta.message\n                and hasattr(event.delta.message, \"content\")\n                and event.delta.message.content\n                and hasattr(event.delta.message.content, \"text\")\n                and event.delta.message.content.text is not None\n            ):\n                full_text += event.delta.message.content.text\n\n        self.assertTrue(len(full_text) > 0)\n\n@unittest.skipIf(os.getenv(\"TEST_OCI\") is None, \"TEST_OCI not set\")\nclass TestOciClientAuthentication(unittest.TestCase):\n    \"\"\"Test different OCI authentication methods.\"\"\"\n\n    def test_config_file_auth(self):\n        \"\"\"Test authentication using OCI config file.\"\"\"\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n        client = cohere.OciClientV2(\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=compartment_id,\n            oci_profile=profile,\n        )\n\n        # Test with a simple embed call\n        response = client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Auth test\"],\n            input_type=\"search_document\",\n        )\n\n        self.assertIsNotNone(response)\n        self.assertIsNotNone(response.embeddings)\n\n    def test_custom_profile_auth(self):\n        \"\"\"Test authentication using custom OCI profile.\"\"\"\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        client = cohere.OciClientV2(\n            oci_profile=profile,\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=compartment_id,\n        )\n\n        response = client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Profile auth test\"],\n            input_type=\"search_document\",\n        )\n\n        self.assertIsNotNone(response)\n\n\n@unittest.skipIf(os.getenv(\"TEST_OCI\") is None, \"TEST_OCI not set\")\nclass TestOciClientErrors(unittest.TestCase):\n    \"\"\"Test error handling in OCI client.\"\"\"\n\n    def test_missing_compartment_id(self):\n        \"\"\"Test error when compartment ID is missing.\"\"\"\n        with self.assertRaises(TypeError):\n            cohere.OciClientV2(\n                oci_region=\"us-chicago-1\",\n                # Missing oci_compartment_id\n            )\n\n    def test_invalid_model(self):\n        \"\"\"Test error handling with invalid model.\"\"\"\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n        client = cohere.OciClientV2(\n            oci_region=\"us-chicago-1\",\n            oci_compartment_id=compartment_id,\n            oci_profile=profile,\n        )\n\n        # OCI should return an error for invalid model\n        with self.assertRaises(Exception):\n            client.embed(\n                model=\"invalid-model-name\",\n                texts=[\"Test\"],\n                input_type=\"search_document\",\n            )\n\n\n@unittest.skipIf(os.getenv(\"TEST_OCI\") is None, \"TEST_OCI not set\")\nclass TestOciClientModels(unittest.TestCase):\n    \"\"\"Test different Cohere models on OCI.\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up OCI client for each test.\"\"\"\n        compartment_id = os.getenv(\"OCI_COMPARTMENT_ID\")\n        if not compartment_id:\n            self.skipTest(\"OCI_COMPARTMENT_ID not set\")\n\n        region = os.getenv(\"OCI_REGION\", \"us-chicago-1\")\n        profile = os.getenv(\"OCI_PROFILE\", \"DEFAULT\")\n\n        self.client = cohere.OciClientV2(\n            oci_region=region,\n            oci_compartment_id=compartment_id,\n            oci_profile=profile,\n        )\n\n    def test_embed_english_v3(self):\n        \"\"\"Test embed-english-v3.0 model.\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Test\"],\n            input_type=\"search_document\",\n        )\n        self.assertIsNotNone(response.embeddings)\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 1024)\n\n    def test_embed_multilingual_v3(self):\n        \"\"\"Test embed-multilingual-v3.0 model.\"\"\"\n        response = self.client.embed(\n            model=\"embed-multilingual-v3.0\",\n            texts=[\"Test\"],\n            input_type=\"search_document\",\n        )\n        self.assertIsNotNone(response.embeddings)\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 1024)\n\n    def test_command_a_chat(self):\n        \"\"\"Test command-a-03-2025 model for chat.\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[{\"role\": \"user\", \"content\": \"Hello\"}],\n        )\n        self.assertIsNotNone(response.message)\n\n    def test_embed_english_light_v3(self):\n        \"\"\"Test embed-english-light-v3.0 returns 384-dim vectors.\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-light-v3.0\",\n            texts=[\"Hello world\"],\n            input_type=\"search_document\",\n        )\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 384)\n\n    def test_embed_multilingual_light_v3(self):\n        \"\"\"Test embed-multilingual-light-v3.0 returns 384-dim vectors.\"\"\"\n        response = self.client.embed(\n            model=\"embed-multilingual-light-v3.0\",\n            texts=[\"Bonjour le monde\"],\n            input_type=\"search_document\",\n        )\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 384)\n\n    def test_embed_search_query_input_type(self):\n        \"\"\"Test embed with search_query input_type (distinct from search_document).\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"What is the capital of France?\"],\n            input_type=\"search_query\",\n        )\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 1024)\n\n    def test_embed_with_embedding_types(self):\n        \"\"\"Test embed with explicit embedding_types parameter.\"\"\"\n        response = self.client.embed(\n            model=\"embed-english-v3.0\",\n            texts=[\"Hello world\"],\n            input_type=\"search_document\",\n            embedding_types=[\"float\"],\n        )\n        self.assertIsNotNone(response.embeddings.float_)\n        self.assertEqual(len(response.embeddings.float_[0]), 1024)\n\n    def test_embed_with_truncate(self):\n        \"\"\"Test embed with truncate parameter.\"\"\"\n        long_text = \"hello \" * 1000\n        for mode in [\"NONE\", \"START\", \"END\"]:\n            response = self.client.embed(\n                model=\"embed-english-v3.0\",\n                texts=[long_text],\n                input_type=\"search_document\",\n                truncate=mode,\n            )\n            self.assertIsNotNone(response.embeddings.float_)\n            self.assertEqual(len(response.embeddings.float_[0]), 1024)\n\n    def test_command_r_plus_chat(self):\n        \"\"\"Test command-r-plus-08-2024 via V1 client.\"\"\"\n        v1_client = cohere.OciClient(\n            oci_region=os.getenv(\"OCI_REGION\", \"us-chicago-1\"),\n            oci_compartment_id=os.getenv(\"OCI_COMPARTMENT_ID\"),\n            oci_profile=os.getenv(\"OCI_PROFILE\", \"DEFAULT\"),\n        )\n        response = v1_client.chat(\n            model=\"command-r-plus-08-2024\",\n            message=\"What is 2+2? Answer with just the number.\",\n        )\n        self.assertIsNotNone(response.text)\n        self.assertIn(\"4\", response.text)\n\n    def test_v2_multi_turn_chat(self):\n        \"\"\"Test V2 chat with conversation history (multi-turn).\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[\n                {\"role\": \"user\", \"content\": \"My name is Alice.\"},\n                {\"role\": \"assistant\", \"content\": \"Nice to meet you, Alice!\"},\n                {\"role\": \"user\", \"content\": \"What is my name?\"},\n            ],\n        )\n        self.assertIsNotNone(response.message)\n        content = response.message.content[0].text\n        self.assertIn(\"Alice\", content)\n\n    def test_v2_system_message(self):\n        \"\"\"Test V2 chat with a system message.\"\"\"\n        response = self.client.chat(\n            model=\"command-a-03-2025\",\n            messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant. Always respond in exactly 3 words.\"},\n                {\"role\": \"user\", \"content\": \"Say hello.\"},\n            ],\n        )\n        self.assertIsNotNone(response.message)\n        self.assertIsNotNone(response.message.content[0].text)\n\n\nclass TestOciClientTransformations(unittest.TestCase):\n    \"\"\"Unit tests for OCI request/response transformations (no OCI credentials required).\"\"\"\n\n    def test_thinking_parameter_transformation(self):\n        \"\"\"Test that thinking parameter is correctly transformed to OCI format.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        cohere_body = {\n            \"model\": \"command-a-reasoning-08-2025\",\n            \"messages\": [{\"role\": \"user\", \"content\": \"What is 2+2?\"}],\n            \"thinking\": {\n                \"type\": \"enabled\",\n                \"token_budget\": 10000,\n            },\n        }\n\n        result = transform_request_to_oci(\"chat\", cohere_body, \"compartment-123\", is_v2=True)\n\n        # Verify thinking parameter is transformed with camelCase for OCI API\n        chat_request = result[\"chatRequest\"]\n        self.assertIn(\"thinking\", chat_request)\n        self.assertEqual(chat_request[\"thinking\"][\"type\"], \"ENABLED\")\n        self.assertEqual(chat_request[\"thinking\"][\"tokenBudget\"], 10000)  # camelCase for OCI\n\n    def test_thinking_parameter_disabled(self):\n        \"\"\"Test that disabled thinking is correctly transformed.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        cohere_body = {\n            \"model\": \"command-a-reasoning-08-2025\",\n            \"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}],\n            \"thinking\": {\n                \"type\": \"disabled\",\n            },\n        }\n\n        result = transform_request_to_oci(\"chat\", cohere_body, \"compartment-123\", is_v2=True)\n\n        chat_request = result[\"chatRequest\"]\n        self.assertIn(\"thinking\", chat_request)\n        self.assertEqual(chat_request[\"thinking\"][\"type\"], \"DISABLED\")\n        self.assertNotIn(\"token_budget\", chat_request[\"thinking\"])\n\n    def test_thinking_response_transformation(self):\n        \"\"\"Test that thinking content in response is correctly transformed.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        oci_response = {\n            \"chatResponse\": {\n                \"id\": \"test-id\",\n                \"message\": {\n                    \"role\": \"ASSISTANT\",\n                    \"content\": [\n                        {\"type\": \"THINKING\", \"thinking\": \"Let me think about this...\"},\n                        {\"type\": \"TEXT\", \"text\": \"The answer is 4.\"},\n                    ],\n                },\n                \"finishReason\": \"COMPLETE\",\n                \"usage\": {\"inputTokens\": 10, \"completionTokens\": 20},\n            }\n        }\n\n        result = transform_oci_response_to_cohere(\"chat\", oci_response, is_v2=True)\n\n        # Verify content types are lowercased\n        self.assertEqual(result[\"message\"][\"content\"][0][\"type\"], \"thinking\")\n        self.assertEqual(result[\"message\"][\"content\"][1][\"type\"], \"text\")\n\n    def test_stream_event_thinking_transformation(self):\n        \"\"\"Test that thinking content in stream events is correctly transformed.\"\"\"\n        from cohere.oci_client import transform_stream_event\n\n        # OCI thinking event\n        oci_event = {\n            \"message\": {\n                \"content\": [{\"type\": \"THINKING\", \"thinking\": \"Reasoning step...\"}]\n            }\n        }\n\n        result = transform_stream_event(\"chat\", oci_event, is_v2=True)\n\n        self.assertEqual(result[0][\"type\"], \"content-delta\")\n        self.assertIn(\"thinking\", result[0][\"delta\"][\"message\"][\"content\"])\n        self.assertEqual(result[0][\"delta\"][\"message\"][\"content\"][\"thinking\"], \"Reasoning step...\")\n\n    def test_stream_event_text_transformation(self):\n        \"\"\"Test that text content in stream events is correctly transformed.\"\"\"\n        from cohere.oci_client import transform_stream_event\n\n        # OCI text event\n        oci_event = {\n            \"message\": {\n                \"content\": [{\"type\": \"TEXT\", \"text\": \"The answer is...\"}]\n            }\n        }\n\n        result = transform_stream_event(\"chat\", oci_event, is_v2=True)\n\n        self.assertEqual(result[0][\"type\"], \"content-delta\")\n        self.assertIn(\"text\", result[0][\"delta\"][\"message\"][\"content\"])\n        self.assertEqual(result[0][\"delta\"][\"message\"][\"content\"][\"text\"], \"The answer is...\")\n\n    def test_thinking_parameter_none(self):\n        \"\"\"Test that thinking=None does not crash (issue: null guard).\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        cohere_body = {\n            \"model\": \"command-a-03-2025\",\n            \"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}],\n            \"thinking\": None,  # Explicitly set to None\n        }\n\n        # Should not crash with TypeError\n        result = transform_request_to_oci(\"chat\", cohere_body, \"compartment-123\", is_v2=True)\n\n        chat_request = result[\"chatRequest\"]\n        # thinking should not be in request when None\n        self.assertNotIn(\"thinking\", chat_request)\n\n    def test_v2_response_role_lowercased(self):\n        \"\"\"Test that V2 response message role is lowercased.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        oci_response = {\n            \"chatResponse\": {\n                \"id\": \"test-id\",\n                \"message\": {\n                    \"role\": \"ASSISTANT\",\n                    \"content\": [{\"type\": \"TEXT\", \"text\": \"Hello\"}],\n                },\n                \"finishReason\": \"COMPLETE\",\n                \"usage\": {\"inputTokens\": 10, \"completionTokens\": 20},\n            }\n        }\n\n        result = transform_oci_response_to_cohere(\"chat\", oci_response, is_v2=True)\n\n        # Role should be lowercased\n        self.assertEqual(result[\"message\"][\"role\"], \"assistant\")\n\n    def test_v2_response_finish_reason_uppercase(self):\n        \"\"\"Test that V2 response finish_reason stays uppercase.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        oci_response = {\n            \"chatResponse\": {\n                \"id\": \"test-id\",\n                \"message\": {\n                    \"role\": \"ASSISTANT\",\n                    \"content\": [{\"type\": \"TEXT\", \"text\": \"Hello\"}],\n                },\n                \"finishReason\": \"MAX_TOKENS\",\n                \"usage\": {\"inputTokens\": 10, \"completionTokens\": 20},\n            }\n        }\n\n        result = transform_oci_response_to_cohere(\"chat\", oci_response, is_v2=True)\n\n        # V2 finish_reason should stay uppercase\n        self.assertEqual(result[\"finish_reason\"], \"MAX_TOKENS\")\n\n    def test_v2_response_tool_calls_conversion(self):\n        \"\"\"Test that V2 response converts toolCalls to tool_calls.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        oci_response = {\n            \"chatResponse\": {\n                \"id\": \"test-id\",\n                \"message\": {\n                    \"role\": \"ASSISTANT\",\n                    \"content\": [{\"type\": \"TEXT\", \"text\": \"I'll help with that.\"}],\n                    \"toolCalls\": [\n                        {\n                            \"id\": \"call_123\",\n                            \"type\": \"function\",\n                            \"function\": {\"name\": \"get_weather\", \"arguments\": '{\"city\": \"London\"}'},\n                        }\n                    ],\n                },\n                \"finishReason\": \"TOOL_CALL\",\n                \"usage\": {\"inputTokens\": 10, \"completionTokens\": 20},\n            }\n        }\n\n        result = transform_oci_response_to_cohere(\"chat\", oci_response, is_v2=True)\n\n        # toolCalls should be converted to tool_calls\n        self.assertIn(\"tool_calls\", result[\"message\"])\n        self.assertNotIn(\"toolCalls\", result[\"message\"])\n        self.assertEqual(len(result[\"message\"][\"tool_calls\"]), 1)\n        self.assertEqual(result[\"message\"][\"tool_calls\"][0][\"id\"], \"call_123\")\n\n    def test_normalize_model_for_oci(self):\n        \"\"\"Test model name normalization for OCI.\"\"\"\n        from cohere.oci_client import normalize_model_for_oci\n\n        # Plain model name gets cohere. prefix\n        self.assertEqual(normalize_model_for_oci(\"command-a-03-2025\"), \"cohere.command-a-03-2025\")\n        # Already prefixed passes through\n        self.assertEqual(normalize_model_for_oci(\"cohere.embed-english-v3.0\"), \"cohere.embed-english-v3.0\")\n        # OCID passes through\n        self.assertEqual(\n            normalize_model_for_oci(\"ocid1.generativeaimodel.oc1.us-chicago-1.abc\"),\n            \"ocid1.generativeaimodel.oc1.us-chicago-1.abc\",\n        )\n\n    def test_transform_embed_request(self):\n        \"\"\"Test embed request transformation to OCI format.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        body = {\n            \"model\": \"embed-english-v3.0\",\n            \"texts\": [\"hello\", \"world\"],\n            \"input_type\": \"search_document\",\n            \"truncate\": \"end\",\n            \"embedding_types\": [\"float\", \"int8\"],\n        }\n        result = transform_request_to_oci(\"embed\", body, \"compartment-123\")\n\n        self.assertEqual(result[\"inputs\"], [\"hello\", \"world\"])\n        self.assertEqual(result[\"inputType\"], \"SEARCH_DOCUMENT\")\n        self.assertEqual(result[\"truncate\"], \"END\")\n        self.assertEqual(result[\"embeddingTypes\"], [\"float\", \"int8\"])\n        self.assertEqual(result[\"compartmentId\"], \"compartment-123\")\n        self.assertEqual(result[\"servingMode\"][\"modelId\"], \"cohere.embed-english-v3.0\")\n\n    def test_transform_embed_request_with_optional_params(self):\n        \"\"\"Test embed request forwards optional params.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        body = {\n            \"model\": \"embed-english-v3.0\",\n            \"inputs\": [{\"content\": [{\"type\": \"text\", \"text\": \"hello\"}]}],\n            \"input_type\": \"classification\",\n            \"max_tokens\": 256,\n            \"output_dimension\": 512,\n            \"priority\": 42,\n        }\n        result = transform_request_to_oci(\"embed\", body, \"compartment-123\")\n\n        self.assertEqual(result[\"inputs\"], body[\"inputs\"])\n        self.assertEqual(result[\"maxTokens\"], 256)\n        self.assertEqual(result[\"outputDimension\"], 512)\n        self.assertEqual(result[\"priority\"], 42)\n\n    def test_transform_embed_request_rejects_images(self):\n        \"\"\"Test embed request fails clearly for unsupported top-level images.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        with self.assertRaises(ValueError) as ctx:\n            transform_request_to_oci(\n                \"embed\",\n                {\n                    \"model\": \"embed-english-v3.0\",\n                    \"images\": [\"data:image/png;base64,abc\"],\n                    \"input_type\": \"classification\",\n                },\n                \"compartment-123\",\n            )\n\n        self.assertIn(\"top-level 'images' parameter\", str(ctx.exception))\n\n    def test_transform_chat_request_optional_params(self):\n        \"\"\"Test chat request transformation includes optional params.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        body = {\n            \"model\": \"command-a-03-2025\",\n            \"messages\": [{\"role\": \"user\", \"content\": \"Hi\"}],\n            \"max_tokens\": 100,\n            \"temperature\": 0.7,\n            \"stop_sequences\": [\"END\"],\n            \"frequency_penalty\": 0.5,\n            \"strict_tools\": True,\n            \"response_format\": {\"type\": \"json_object\"},\n            \"logprobs\": True,\n            \"tool_choice\": \"REQUIRED\",\n            \"priority\": 7,\n        }\n        result = transform_request_to_oci(\"chat\", body, \"compartment-123\", is_v2=True)\n\n        chat_req = result[\"chatRequest\"]\n        self.assertEqual(chat_req[\"maxTokens\"], 100)\n        self.assertEqual(chat_req[\"temperature\"], 0.7)\n        self.assertEqual(chat_req[\"stopSequences\"], [\"END\"])\n        self.assertEqual(chat_req[\"frequencyPenalty\"], 0.5)\n        self.assertTrue(chat_req[\"strictTools\"])\n        self.assertEqual(chat_req[\"responseFormat\"], {\"type\": \"json_object\"})\n        self.assertTrue(chat_req[\"logprobs\"])\n        self.assertEqual(chat_req[\"toolChoice\"], \"REQUIRED\")\n        self.assertEqual(chat_req[\"priority\"], 7)\n\n    def test_v2_client_rejects_v1_request(self):\n        \"\"\"Test OciClientV2 fails when given V1-style 'message' string.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        with self.assertRaises(ValueError) as ctx:\n            transform_request_to_oci(\n                \"chat\",\n                {\"model\": \"command-a-03-2025\", \"message\": \"Hello\"},\n                \"compartment-123\",\n                is_v2=True,\n            )\n        self.assertIn(\"OciClientV2\", str(ctx.exception))\n\n    def test_v1_client_rejects_v2_request(self):\n        \"\"\"Test OciClient fails when given V2-style 'messages' array.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        with self.assertRaises(ValueError) as ctx:\n            transform_request_to_oci(\n                \"chat\",\n                {\"model\": \"command-r-08-2024\", \"messages\": [{\"role\": \"user\", \"content\": \"Hi\"}]},\n                \"compartment-123\",\n                is_v2=False,\n            )\n        self.assertIn(\"OciClient \", str(ctx.exception))\n\n    def test_unsupported_endpoint_raises(self):\n        \"\"\"Test that transform_request_to_oci raises for unsupported endpoints.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        with self.assertRaises(ValueError) as ctx:\n            transform_request_to_oci(\"rerank\", {\"model\": \"rerank-v3.5\"}, \"compartment-123\")\n        self.assertIn(\"rerank\", str(ctx.exception))\n        self.assertIn(\"not supported\", str(ctx.exception))\n\n    def test_v1_chat_request_optional_params(self):\n        \"\"\"Test V1 chat request forwards supported optional params.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        body = {\n            \"model\": \"command-r-08-2024\",\n            \"message\": \"Hi\",\n            \"max_tokens\": 100,\n            \"temperature\": 0.7,\n            \"k\": 10,\n            \"p\": 0.8,\n            \"seed\": 123,\n            \"stop_sequences\": [\"END\"],\n            \"frequency_penalty\": 0.5,\n            \"presence_penalty\": 0.2,\n            \"documents\": [{\"title\": \"Doc\", \"text\": \"Body\"}],\n            \"tools\": [{\"name\": \"lookup\"}],\n            \"tool_results\": [{\"call\": {\"name\": \"lookup\"}}],\n            \"response_format\": {\"type\": \"json_object\"},\n            \"safety_mode\": \"NONE\",\n            \"priority\": 4,\n        }\n        result = transform_request_to_oci(\"chat\", body, \"compartment-123\", is_v2=False)\n\n        chat_req = result[\"chatRequest\"]\n        self.assertEqual(chat_req[\"apiFormat\"], \"COHERE\")\n        self.assertEqual(chat_req[\"message\"], \"Hi\")\n        self.assertEqual(chat_req[\"maxTokens\"], 100)\n        self.assertEqual(chat_req[\"temperature\"], 0.7)\n        self.assertEqual(chat_req[\"topK\"], 10)\n        self.assertEqual(chat_req[\"topP\"], 0.8)\n        self.assertEqual(chat_req[\"seed\"], 123)\n        self.assertEqual(chat_req[\"frequencyPenalty\"], 0.5)\n        self.assertEqual(chat_req[\"presencePenalty\"], 0.2)\n        self.assertEqual(chat_req[\"priority\"], 4)\n\n    def test_v1_stream_wrapper_preserves_finish_reason(self):\n        \"\"\"Test V1 stream-end uses the OCI finish reason from the final event.\"\"\"\n        import json\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: {\"text\": \"Hello\", \"isFinished\": false}\\n',\n            b'data: {\"text\": \" world\", \"isFinished\": true, \"finishReason\": \"MAX_TOKENS\"}\\n',\n            b\"data: [DONE]\\n\",\n        ]\n\n        events = [\n            json.loads(raw.decode(\"utf-8\"))\n            for raw in transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=False)\n        ]\n\n        # First event should be stream-start with generation_id\n        self.assertEqual(events[0][\"event_type\"], \"stream-start\")\n        self.assertIn(\"generation_id\", events[0])\n\n        self.assertEqual(events[3][\"event_type\"], \"stream-end\")\n        self.assertEqual(events[3][\"finish_reason\"], \"MAX_TOKENS\")\n        self.assertEqual(events[3][\"response\"][\"text\"], \"Hello world\")\n\n    def test_transform_chat_request_tool_message_fields(self):\n        \"\"\"Test tool message fields are converted to OCI names.\"\"\"\n        from cohere.oci_client import transform_request_to_oci\n\n        body = {\n            \"model\": \"command-a-03-2025\",\n            \"messages\": [\n                {\n                    \"role\": \"assistant\",\n                    \"content\": [{\"type\": \"text\", \"text\": \"Use tool\"}],\n                    \"tool_calls\": [{\"id\": \"call_1\"}],\n                    \"tool_plan\": \"Plan\",\n                },\n                {\n                    \"role\": \"tool\",\n                    \"tool_call_id\": \"call_1\",\n                    \"content\": [{\"type\": \"text\", \"text\": \"Result\"}],\n                },\n            ],\n        }\n\n        result = transform_request_to_oci(\"chat\", body, \"compartment-123\", is_v2=True)\n        assistant_message, tool_message = result[\"chatRequest\"][\"messages\"]\n        self.assertEqual(assistant_message[\"toolCalls\"], [{\"id\": \"call_1\"}])\n        self.assertEqual(assistant_message[\"toolPlan\"], \"Plan\")\n        self.assertEqual(tool_message[\"toolCallId\"], \"call_1\")\n\n    def test_get_oci_url_known_endpoints(self):\n        \"\"\"Test URL generation for known endpoints.\"\"\"\n        from cohere.oci_client import get_oci_url\n\n        url = get_oci_url(\"us-chicago-1\", \"embed\")\n        self.assertIn(\"/actions/embedText\", url)\n\n        url = get_oci_url(\"us-chicago-1\", \"chat\")\n        self.assertIn(\"/actions/chat\", url)\n\n\n    def test_get_oci_url_unknown_endpoint_raises(self):\n        \"\"\"Test that unknown endpoints raise ValueError instead of producing bad URLs.\"\"\"\n        from cohere.oci_client import get_oci_url\n\n        with self.assertRaises(ValueError) as ctx:\n            get_oci_url(\"us-chicago-1\", \"unknown_endpoint\")\n        self.assertIn(\"not supported\", str(ctx.exception))\n\n    def test_load_oci_config_missing_private_key_raises(self):\n        \"\"\"Test that direct credentials without private key raises clear error.\"\"\"\n        from cohere.oci_client import _load_oci_config\n\n        with patch(\"cohere.oci_client.lazy_oci\", return_value=MagicMock()):\n            with self.assertRaises(ValueError) as ctx:\n                _load_oci_config(\n                    auth_type=\"api_key\",\n                    config_path=None,\n                    profile=None,\n                    user_id=\"ocid1.user.oc1...\",\n                    fingerprint=\"xx:xx:xx\",\n                    tenancy_id=\"ocid1.tenancy.oc1...\",\n                    # No private_key_path or private_key_content\n                )\n            self.assertIn(\"oci_private_key_path\", str(ctx.exception))\n\n    def test_load_oci_config_ignores_inherited_session_auth(self):\n        \"\"\"Test that named API-key profiles do not inherit DEFAULT session auth fields.\"\"\"\n        from cohere.oci_client import _load_oci_config\n\n        config_text = \"\"\"\n[DEFAULT]\nsecurity_token_file=/tmp/default-token\n\n[API_KEY_AUTH]\nuser=ocid1.user.oc1..test\nfingerprint=aa:bb\nkey_file=/tmp/test.pem\ntenancy=ocid1.tenancy.oc1..test\nregion=us-chicago-1\n\"\"\".strip()\n\n        with tempfile.NamedTemporaryFile(\"w\", delete=False) as config_file:\n            config_file.write(config_text)\n            config_path = config_file.name\n\n        try:\n            mock_oci = MagicMock()\n            mock_oci.config.from_file.return_value = {\n                \"user\": \"ocid1.user.oc1..test\",\n                \"fingerprint\": \"aa:bb\",\n                \"key_file\": \"/tmp/test.pem\",\n                \"tenancy\": \"ocid1.tenancy.oc1..test\",\n                \"region\": \"us-chicago-1\",\n                \"security_token_file\": \"/tmp/default-token\",\n            }\n\n            with patch(\"cohere.oci_client.lazy_oci\", return_value=mock_oci):\n                config = _load_oci_config(\n                    auth_type=\"api_key\",\n                    config_path=config_path,\n                    profile=\"API_KEY_AUTH\",\n                )\n        finally:\n            os.unlink(config_path)\n\n        self.assertNotIn(\"security_token_file\", config)\n\n    def test_session_auth_prefers_security_token_signer(self):\n        \"\"\"Test session-based auth uses SecurityTokenSigner before API key signer.\"\"\"\n        from cohere.oci_client import map_request_to_oci\n\n        mock_oci = MagicMock()\n        mock_security_signer = MagicMock()\n        mock_oci.signer.load_private_key_from_file.return_value = \"private-key\"\n        mock_oci.auth.signers.SecurityTokenSigner.return_value = mock_security_signer\n\n        with patch(\"cohere.oci_client.lazy_oci\", return_value=mock_oci), patch(\n            \"builtins.open\", mock_open(read_data=\"session-token\")\n        ):\n            hook = map_request_to_oci(\n                oci_config={\n                    \"user\": \"ocid1.user.oc1..example\",\n                    \"fingerprint\": \"xx:xx\",\n                    \"tenancy\": \"ocid1.tenancy.oc1..example\",\n                    \"security_token_file\": \"~/.oci/token\",\n                    \"key_file\": \"~/.oci/key.pem\",\n                },\n                oci_region=\"us-chicago-1\",\n                oci_compartment_id=\"ocid1.compartment.oc1..example\",\n            )\n\n            request = MagicMock()\n            request.url.path = \"/v2/embed\"\n            request.read.return_value = b'{\"model\":\"embed-english-v3.0\",\"texts\":[\"hello\"]}'\n            request.method = \"POST\"\n            request.extensions = {}\n\n            hook(request)\n\n        # SecurityTokenSigner is called at least once (init) and again per request\n        # (token file is re-read on each signing call to pick up refreshed tokens).\n        mock_oci.auth.signers.SecurityTokenSigner.assert_called_with(\n            token=\"session-token\",\n            private_key=\"private-key\",\n        )\n        self.assertGreaterEqual(mock_oci.auth.signers.SecurityTokenSigner.call_count, 1)\n        mock_oci.signer.Signer.assert_not_called()\n\n    def test_session_token_refreshed_on_subsequent_requests(self):\n        \"\"\"Verify the refreshing signer picks up a new token written to the token file.\"\"\"\n        import tempfile\n        import os\n        from cohere.oci_client import map_request_to_oci\n\n        mock_oci = MagicMock()\n        mock_oci.signer.load_private_key_from_file.return_value = \"private-key\"\n\n        # Write initial token to a real temp file so we can overwrite it later.\n        with tempfile.NamedTemporaryFile(\"w\", suffix=\".token\", delete=False) as tf:\n            tf.write(\"token-v1\")\n            token_path = tf.name\n\n        try:\n            with patch(\"cohere.oci_client.lazy_oci\", return_value=mock_oci):\n                hook = map_request_to_oci(\n                    oci_config={\n                        \"security_token_file\": token_path,\n                        \"key_file\": \"/irrelevant.pem\",\n                    },\n                    oci_region=\"us-chicago-1\",\n                    oci_compartment_id=\"ocid1.compartment.oc1..example\",\n                )\n\n                def _make_request():\n                    req = MagicMock()\n                    req.url.path = \"/v2/embed\"\n                    req.read.return_value = b'{\"model\":\"embed-english-v3.0\",\"texts\":[\"hi\"]}'\n                    req.method = \"POST\"\n                    req.extensions = {}\n                    return req\n\n                # First request uses token-v1\n                hook(_make_request())\n                calls_after_first = mock_oci.auth.signers.SecurityTokenSigner.call_count\n\n                # Simulate token refresh by overwriting the file\n                with open(token_path, \"w\") as _f:\n                    _f.write(\"token-v2\")\n\n                # Second request — should re-read and use token-v2\n                hook(_make_request())\n                self.assertGreater(\n                    mock_oci.auth.signers.SecurityTokenSigner.call_count,\n                    calls_after_first,\n                    \"SecurityTokenSigner should be re-instantiated after token file update\",\n                )\n                # Verify the latest call used the refreshed token\n                all_calls = mock_oci.auth.signers.SecurityTokenSigner.call_args_list\n                last_call = all_calls[-1]\n                last_token = last_call.kwargs.get(\"token\") or (last_call.args[0] if last_call.args else None)\n                self.assertEqual(last_token, \"token-v2\", \"Last signing call must use the refreshed token\")\n        finally:\n            os.unlink(token_path)\n\n    def test_embed_response_lowercases_embedding_keys(self):\n        \"\"\"Test embed response uses lowercase keys expected by the SDK model.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        result = transform_oci_response_to_cohere(\n            \"embed\",\n            {\n                \"id\": \"embed-id\",\n                \"embeddings\": {\"FLOAT\": [[0.1, 0.2]], \"INT8\": [[1, 2]]},\n                \"usage\": {\"inputTokens\": 3, \"completionTokens\": 7},\n            },\n            is_v2=True,\n        )\n\n        self.assertIn(\"float\", result[\"embeddings\"])\n        self.assertIn(\"int8\", result[\"embeddings\"])\n        self.assertNotIn(\"FLOAT\", result[\"embeddings\"])\n        self.assertEqual(result[\"meta\"][\"tokens\"][\"output_tokens\"], 7)\n\n    def test_embed_response_includes_response_type_v1(self):\n        \"\"\"Test V1 embed response includes response_type=embeddings_floats for SDK union.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        result = transform_oci_response_to_cohere(\n            \"embed\",\n            {\n                \"id\": \"embed-id\",\n                \"embeddings\": [[0.1, 0.2]],\n                \"usage\": {\"inputTokens\": 3, \"completionTokens\": 0},\n            },\n            is_v2=False,\n        )\n\n        self.assertEqual(result[\"response_type\"], \"embeddings_floats\")\n\n    def test_embed_response_includes_response_type_v2(self):\n        \"\"\"Test V2 embed response includes response_type=embeddings_by_type for SDK union.\"\"\"\n        from cohere.oci_client import transform_oci_response_to_cohere\n\n        result = transform_oci_response_to_cohere(\n            \"embed\",\n            {\n                \"id\": \"embed-id\",\n                \"embeddings\": {\"FLOAT\": [[0.1, 0.2]]},\n                \"usage\": {\"inputTokens\": 3, \"completionTokens\": 0},\n            },\n            is_v2=True,\n        )\n\n        self.assertEqual(result[\"response_type\"], \"embeddings_by_type\")\n\n    def test_normalize_model_for_oci_rejects_empty_model(self):\n        \"\"\"Test model normalization fails clearly for empty model names.\"\"\"\n        from cohere.oci_client import normalize_model_for_oci\n\n        with self.assertRaises(ValueError) as ctx:\n            normalize_model_for_oci(\"\")\n        self.assertIn(\"non-empty model\", str(ctx.exception))\n\n    def test_stream_wrapper_emits_full_event_lifecycle(self):\n        \"\"\"Test that stream emits message-start, content-start, content-delta, content-end, message-end.\"\"\"\n        import json\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: {\"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \"Hello\"}]}}\\n',\n            b'data: {\"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \" world\"}]}, \"finishReason\": \"COMPLETE\"}\\n',\n            b'data: [DONE]\\n',\n        ]\n\n        events = []\n        for raw in transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True):\n            line = raw.decode(\"utf-8\").strip()\n            if line.startswith(\"data: \"):\n                events.append(json.loads(line[6:]))\n\n        event_types = [e[\"type\"] for e in events]\n        self.assertEqual(event_types[0], \"message-start\")\n        self.assertEqual(event_types[1], \"content-start\")\n        self.assertEqual(event_types[2], \"content-delta\")\n        self.assertEqual(event_types[3], \"content-delta\")\n        self.assertEqual(event_types[4], \"content-end\")\n        self.assertEqual(event_types[5], \"message-end\")\n\n        # Verify message-start has id and role\n        self.assertIn(\"id\", events[0])\n        self.assertEqual(events[0][\"delta\"][\"message\"][\"role\"], \"assistant\")\n\n        # Verify content-start has index and type\n        self.assertEqual(events[1][\"index\"], 0)\n        self.assertEqual(events[1][\"delta\"][\"message\"][\"content\"][\"type\"], \"text\")\n        self.assertEqual(events[5][\"delta\"][\"finish_reason\"], \"COMPLETE\")\n\n    def test_stream_wrapper_emits_new_content_block_on_thinking_transition(self):\n        \"\"\"Test streams emit a new content block when transitioning from thinking to text.\"\"\"\n        import json\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: {\"message\": {\"content\": [{\"type\": \"THINKING\", \"thinking\": \"Reasoning...\"}]}}\\n',\n            b'data: {\"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \"Answer\"}]}, \"finishReason\": \"COMPLETE\"}\\n',\n            b\"data: [DONE]\\n\",\n        ]\n\n        events = []\n        for raw in transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True):\n            line = raw.decode(\"utf-8\").strip()\n            if line.startswith(\"data: \"):\n                events.append(json.loads(line[6:]))\n\n        self.assertEqual(events[1][\"type\"], \"content-start\")\n        self.assertEqual(events[1][\"delta\"][\"message\"][\"content\"][\"type\"], \"thinking\")\n        self.assertEqual(events[2][\"type\"], \"content-delta\")\n        self.assertEqual(events[2][\"index\"], 0)\n        self.assertEqual(events[3], {\"type\": \"content-end\", \"index\": 0})\n        self.assertEqual(events[4][\"type\"], \"content-start\")\n        self.assertEqual(events[4][\"index\"], 1)\n        self.assertEqual(events[4][\"delta\"][\"message\"][\"content\"][\"type\"], \"text\")\n        self.assertEqual(events[5][\"type\"], \"content-delta\")\n        self.assertEqual(events[5][\"index\"], 1)\n\n    def test_stream_wrapper_no_spurious_block_on_finish_only_event(self):\n        \"\"\"Finish-only event after thinking block must not open a spurious empty text block.\"\"\"\n        import json\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: {\"message\": {\"content\": [{\"type\": \"THINKING\", \"thinking\": \"Reasoning...\"}]}}\\n',\n            b'data: {\"finishReason\": \"COMPLETE\"}\\n',\n            b\"data: [DONE]\\n\",\n        ]\n\n        events = []\n        for raw in transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True):\n            line = raw.decode(\"utf-8\").strip()\n            if line.startswith(\"data: \"):\n                events.append(json.loads(line[6:]))\n\n        types = [e[\"type\"] for e in events]\n        # Must not contain two content-start events\n        self.assertEqual(types.count(\"content-start\"), 1)\n        # The single content block must be thinking\n        cs = next(e for e in events if e[\"type\"] == \"content-start\")\n        self.assertEqual(cs[\"delta\"][\"message\"][\"content\"][\"type\"], \"thinking\")\n        # Must end cleanly\n        self.assertEqual(events[-1][\"type\"], \"message-end\")\n\n    def test_stream_wrapper_skips_malformed_json_with_warning(self):\n        \"\"\"Test that malformed JSON in SSE stream is skipped.\"\"\"\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: not-valid-json\\n',\n            b'data: {\"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \"hello\"}]}}\\n',\n            b'data: [DONE]\\n',\n        ]\n        events = list(transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True))\n        # Should get message-start + content-start + content-delta + content-end + message-end.\n        self.assertEqual(len(events), 5)\n\n    def test_stream_wrapper_skips_message_end_for_empty_stream(self):\n        \"\"\"Test empty streams do not emit message-end without a preceding message-start.\"\"\"\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        events = list(transform_oci_stream_wrapper(iter([b\"data: [DONE]\\n\"]), \"chat\", is_v2=True))\n\n        self.assertEqual(events, [])\n\n    def test_stream_wrapper_done_uses_current_content_index_after_transition(self):\n        \"\"\"Test fallback content-end uses the latest content index after type transitions.\"\"\"\n        import json\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        chunks = [\n            b'data: {\"message\": {\"content\": [{\"type\": \"THINKING\", \"thinking\": \"Reasoning...\"}]}}\\n',\n            b'data: {\"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \"Answer\"}]}}\\n',\n            b\"data: [DONE]\\n\",\n        ]\n\n        events = []\n        for raw in transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True):\n            line = raw.decode(\"utf-8\").strip()\n            if line.startswith(\"data: \"):\n                events.append(json.loads(line[6:]))\n\n        self.assertEqual(events[-2], {\"type\": \"content-end\", \"index\": 1})\n        self.assertEqual(events[-1][\"type\"], \"message-end\")\n\n    def test_stream_wrapper_raises_on_transform_error(self):\n        \"\"\"Test that transform errors in stream produce OCI-specific error.\"\"\"\n        from cohere.oci_client import transform_oci_stream_wrapper\n\n        # Event with structure that will cause transform_stream_event to fail\n        # (message is None, causing TypeError on \"content\" in None)\n        chunks = [\n            b'data: {\"message\": null}\\n',\n        ]\n        with self.assertRaises(RuntimeError) as ctx:\n            list(transform_oci_stream_wrapper(iter(chunks), \"chat\", is_v2=True))\n        self.assertIn(\"OCI stream event transformation failed\", str(ctx.exception))\n\n    def test_stream_event_finish_reason_keeps_final_text(self):\n        \"\"\"Test finish events keep final text before content-end.\"\"\"\n        from cohere.oci_client import transform_stream_event\n\n        events = transform_stream_event(\n            \"chat\",\n            {\n                \"message\": {\"content\": [{\"type\": \"TEXT\", \"text\": \" world\"}]},\n                \"finishReason\": \"COMPLETE\",\n            },\n            is_v2=True,\n        )\n\n        self.assertEqual(events[0][\"type\"], \"content-delta\")\n        self.assertEqual(events[0][\"delta\"][\"message\"][\"content\"][\"text\"], \" world\")\n        self.assertEqual(events[1][\"type\"], \"content-end\")\n\nif __name__ == \"__main__\":\n    unittest.main()\n"
  },
  {
    "path": "tests/test_oci_mypy.py",
    "content": "\"\"\"Mypy type-checking gate for OCI client code.\n\nRuns mypy on OCI source and test files and fails if any type errors are found.\nThis prevents type regressions from being introduced silently.\n\nRun with:\n    pytest tests/test_oci_mypy.py\n\"\"\"\n\nimport os\nimport shutil\nimport subprocess\nimport unittest\n\nMYPY_BIN = shutil.which(\"mypy\")\n\n# Files that must stay mypy-clean\nOCI_SOURCE_FILES = [\n    \"src/cohere/oci_client.py\",\n    \"src/cohere/manually_maintained/lazy_oci_deps.py\",\n]\n\nOCI_TEST_FILES = [\n    \"tests/test_oci_client.py\",\n]\n\n# --follow-imports=silent prevents mypy from crawling into transitive\n# dependencies (e.g. the AWS client) that have pre-existing errors.\n_MYPY_BASE = [\n    \"--config-file\", \"mypy.ini\",\n    \"--follow-imports=silent\",\n]\n\n\ndef _run_mypy(files: list[str], extra_env: dict[str, str] | None = None) -> tuple[int, str]:\n    \"\"\"Run mypy on the given files and return (exit_code, output).\"\"\"\n    assert MYPY_BIN is not None\n    env = {**os.environ, **(extra_env or {})}\n    result = subprocess.run(\n        [MYPY_BIN, *_MYPY_BASE, *files],\n        capture_output=True,\n        text=True,\n        env=env,\n    )\n    return result.returncode, (result.stdout + result.stderr).strip()\n\n\n@unittest.skipIf(MYPY_BIN is None, \"mypy not found on PATH\")\nclass TestOciMypy(unittest.TestCase):\n    \"\"\"Ensure OCI files pass mypy with no new errors.\"\"\"\n\n    def test_oci_source_types(self):\n        \"\"\"OCI source files must be free of mypy errors.\"\"\"\n        code, output = _run_mypy(OCI_SOURCE_FILES)\n        self.assertEqual(code, 0, f\"mypy found type errors in OCI source:\\n{output}\")\n\n    def test_oci_test_types(self):\n        \"\"\"OCI test files must be free of mypy errors.\"\"\"\n        # PYTHONPATH=src so mypy can resolve `import cohere`\n        code, output = _run_mypy(OCI_TEST_FILES, extra_env={\"PYTHONPATH\": \"src\"})\n        self.assertEqual(code, 0, f\"mypy found type errors in OCI tests:\\n{output}\")\n\n\nif __name__ == \"__main__\":\n    unittest.main()\n"
  },
  {
    "path": "tests/test_overrides.py",
    "content": "import unittest\nfrom contextlib import redirect_stderr\nimport logging\n\n\nfrom cohere import EmbedByTypeResponseEmbeddings\n\nLOGGER = logging.getLogger(__name__)\n\nclass TestClient(unittest.TestCase):\n\n    def test_float_alias(self) -> None:\n        embeds = EmbedByTypeResponseEmbeddings(float_=[[1.0]])\n        self.assertEqual(embeds.float_, [[1.0]])\n        self.assertEqual(embeds.float, [[1.0]])  # type: ignore\n"
  }
]