Repository: simonw/llm
Branch: main
Commit: c7cf7e506ebe
Files: 96
Total size: 1006.4 KB

Directory structure:
gitextract_ij31elfv/

├── .github/
│   ├── FUNDING.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── cog.yml
│       ├── publish.yml
│       ├── stable-docs.yml
│       └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── AGENTS.md
├── Justfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs/
│   ├── .gitignore
│   ├── Makefile
│   ├── _templates/
│   │   └── base.html
│   ├── aliases.md
│   ├── changelog.md
│   ├── conf.py
│   ├── contributing.md
│   ├── embeddings/
│   │   ├── cli.md
│   │   ├── index.md
│   │   ├── python-api.md
│   │   ├── storage.md
│   │   └── writing-plugins.md
│   ├── fragments.md
│   ├── help.md
│   ├── index.md
│   ├── logging.md
│   ├── openai-models.md
│   ├── other-models.md
│   ├── plugins/
│   │   ├── advanced-model-plugins.md
│   │   ├── directory.md
│   │   ├── index.md
│   │   ├── installing-plugins.md
│   │   ├── llm-markov/
│   │   │   ├── llm_markov.py
│   │   │   └── pyproject.toml
│   │   ├── plugin-hooks.md
│   │   ├── plugin-utilities.md
│   │   └── tutorial-model-plugin.md
│   ├── python-api.md
│   ├── related-tools.md
│   ├── requirements.txt
│   ├── schemas.md
│   ├── setup.md
│   ├── templates.md
│   ├── tools.md
│   └── usage.md
├── llm/
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli.py
│   ├── default_plugins/
│   │   ├── __init__.py
│   │   ├── default_tools.py
│   │   └── openai_models.py
│   ├── embeddings.py
│   ├── embeddings_migrations.py
│   ├── errors.py
│   ├── hookspecs.py
│   ├── migrations.py
│   ├── models.py
│   ├── plugins.py
│   ├── py.typed
│   ├── templates.py
│   ├── tools.py
│   └── utils.py
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── ruff.toml
└── tests/
    ├── cassettes/
    │   ├── test_tools/
    │   │   ├── test_tool_use_basic.yaml
    │   │   └── test_tool_use_chain_of_two_calls.yaml
    │   └── test_tools_streaming/
    │       ├── test_tools_streaming_variant_a.yaml
    │       ├── test_tools_streaming_variant_b.yaml
    │       └── test_tools_streaming_variant_c.yaml
    ├── conftest.py
    ├── test-llm-load-plugins.sh
    ├── test_aliases.py
    ├── test_async.py
    ├── test_attachments.py
    ├── test_chat.py
    ├── test_chat_templates.py
    ├── test_cli_openai_models.py
    ├── test_cli_options.py
    ├── test_embed.py
    ├── test_embed_cli.py
    ├── test_encode_decode.py
    ├── test_fragments_cli.py
    ├── test_keys.py
    ├── test_llm.py
    ├── test_llm_logs.py
    ├── test_migrate.py
    ├── test_plugins.py
    ├── test_templates.py
    ├── test_tools.py
    ├── test_tools_streaming.py
    └── test_utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/FUNDING.yml
================================================
github: [simonw]


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: pip
  directory: "/"
  schedule:
    interval: daily
  groups:
    python-packages:
      patterns:
        - "*"


================================================
FILE: .github/workflows/cog.yml
================================================
name: Run Cog

on:
  pull_request:
    types: [opened, synchronize]

permissions:
  contents: write
  pull-requests: write

jobs:
  run-cog:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ github.head_ref }}

      - name: Set up Python 3.11
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Install dependencies
        run: |
          pip install . --group dev

      - name: Run cog
        run: |
          cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md

      - name: Check for changes
        id: check-changes
        run: |
          if [ -n "$(git diff)" ]; then
            echo "changes=true" >> $GITHUB_OUTPUT
          else
            echo "changes=false" >> $GITHUB_OUTPUT
          fi

      - name: Commit and push if changed
        if: steps.check-changes.outputs.changes == 'true'
        run: |
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
          git add -A
          git commit -m "Ran cog"
          git push


================================================
FILE: .github/workflows/publish.yml
================================================
name: Publish Python Package

on:
  release:
    types: [created]

permissions:
  contents: read

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}
        cache: pip
        cache-dependency-path: setup.py
    - name: Install dependencies
      run: |
        pip install . --group dev
    - name: Run tests
      run: |
        pytest
  deploy:
    runs-on: ubuntu-latest
    environment: release
    permissions:
      id-token: write
    needs: [test]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.13'
        cache: pip
        cache-dependency-path: setup.py
    - name: Install dependencies
      run: |
        pip install setuptools wheel build
    - name: Build
      run: |
        python -m build
    - name: Publish
      uses: pypa/gh-action-pypi-publish@release/v1


================================================
FILE: .github/workflows/stable-docs.yml
================================================
name: Update Stable Docs

on:
  release:
    types: [published]
  push:
    branches:
    - main

permissions:
  contents: write

jobs:
  update_stable_docs:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v3
      with:
        fetch-depth: 0  # We need all commits to find docs/ changes
    - name: Set up Git user
      run: |
        git config user.name "Automated"
        git config user.email "actions@users.noreply.github.com"
    - name: Create stable branch if it does not yet exist
      run: |
        if ! git ls-remote --heads origin stable | grep stable; then
          git checkout -b stable
          # If there are any releases, copy docs/ in from most recent
          LATEST_RELEASE=$(git tag | sort -Vr | head -n1)
          if [ -n "$LATEST_RELEASE" ]; then
            rm -rf docs/
            git checkout $LATEST_RELEASE -- docs/
          fi
          git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes"
          git push -u origin stable
        fi
    - name: Handle Release
      if: github.event_name == 'release' && !github.event.release.prerelease
      run: |
        git fetch --all
        git checkout stable
        git reset --hard ${GITHUB_REF#refs/tags/}
        git push origin stable --force
    - name: Handle Commit to Main
      if: contains(github.event.head_commit.message, '!stable-docs')
      run: |
        git fetch origin
        git checkout -b stable origin/stable
        # Get the list of modified files in docs/ from the current commit
        FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/)
        # Check if the list of files is non-empty
        if [[ -n "$FILES" ]]; then
          # Checkout those files to the stable branch to over-write with their contents
          for FILE in $FILES; do
            git checkout ${{ github.sha }} -- $FILE
          done
          git add docs/
          git commit -m "Doc changes from ${{ github.sha }}"
          git push origin stable
        else
          echo "No changes to docs/ in this commit."
          exit 0
        fi


================================================
FILE: .github/workflows/test.yml
================================================
name: Test

on: [push, pull_request]

permissions:
  contents: read

jobs:
  test:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}
        cache: pip
        cache-dependency-path: setup.py
    - name: Install dependencies
      run: |
        pip install . --group dev
    - name: Run tests
      run: |
        python -m pytest -vv
    - name: Check if cog needs to be run
      if: matrix.os != 'windows-latest'
      run: |
        cog --check \
          -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \
          docs/**/*.md docs/*.md
    - name: Run Black
      if: matrix.os != 'windows-latest'
      run: |
        black --check .
    - name: Run mypy
      if: matrix.os != 'windows-latest'
      run: |
        mypy llm
    - name: Run ruff
      if: matrix.os != 'windows-latest'
      run: |
        ruff check .
    - name: Check it builds
      run: |
        python -m build
    - name: Run test-llm-load-plugins.sh
      if: matrix.os != 'windows-latest'
      run: |
        llm install llm-cluster llm-mistral
        ./tests/test-llm-load-plugins.sh
    - name: Upload artifact of builds
      if: matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest'
      uses: actions/upload-artifact@v4
      with:
        name: dist-${{ matrix.os }}-${{ matrix.python-version }}
        path: dist/*


================================================
FILE: .gitignore
================================================
.venv
__pycache__/
*.py[cod]
*$py.class
venv
.eggs
.pytest_cache
*.egg-info
.DS_Store
.idea/
.vscode/
uv.lock

================================================
FILE: .readthedocs.yaml
================================================
version: 2

build:
  os: ubuntu-22.04
  tools:
    python: "3.11"

sphinx:
  configuration: docs/conf.py

formats:
   - pdf
   - epub

python:
   install:
   - requirements: docs/requirements.txt
   - method: pip
     path: .


================================================
FILE: AGENTS.md
================================================
# AGENTS.md

This project uses a Python environment for development and tests.

## Setting up a development environment

1. Install the project with its test dependencies:
   ```bash
   pip install -e '.[test]'
   ```
2. Run the tests:
   ```bash
   pytest
   ```

## Building the documentation

Run the following commands if you want to build the docs locally:

```bash
cd docs
pip install -r requirements.txt
make html
```


================================================
FILE: Justfile
================================================
# Run tests and linters
@default: test lint

# Run pytest with supplied options
@test *options:
  uv run pytest {{options}}

# Run linters
@lint:
  echo "Linters..."
  echo "  Black"
  uv run black . --check
  echo "  cog"
  uv run cog --check \
    -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \
    README.md docs/*.md
  echo "  mypy"
  uv run mypy llm
  echo "  ruff"
  uv run ruff check .

# Run mypy
@mypy:
  uv run mypy llm

# Rebuild docs with cog
@cog:
  uv run cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md

# Serve live docs on localhost:8000
@docs: cog
  rm -rf docs/_build
  cd docs && uv run make livehtml

# Apply Black
@black:
  uv run black .

# Run automatic fixes
@fix: cog
  uv run ruff check . --fix
  uv run black .

# Push commit if tests pass
@push: test lint
  git push


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
global-exclude tests/*


================================================
FILE: README.md
================================================
<!-- [[[cog
# README.md is generated from docs/index.md using sphinx_markdown_builder
import tempfile
import subprocess
from pathlib import Path

readme_markdown = ''

with tempfile.TemporaryDirectory() as tmpdir:
    tmp_path = Path(tmpdir)
    # Run: sphinx-build -M markdown ./docs ./tmpdir
    subprocess.run([
        "sphinx-build",
        "-M", "markdown",
        "./docs",
        str(tmp_path)
    ], check=True)
    index_file = tmp_path / "markdown" / "index.md"
    readme_markdown = index_file.read_text(encoding="utf-8")

cog.out(readme_markdown)
]]] -->
# LLM

[![GitHub repo](https://img.shields.io/badge/github-repo-green)](https://github.com/simonw/llm)
[![PyPI](https://img.shields.io/pypi/v/llm.svg)](https://pypi.org/project/llm/)
[![Changelog](https://img.shields.io/github/v/release/simonw/llm?include_prereleases&label=changelog)](https://llm.datasette.io/en/stable/changelog.html)
[![Tests](https://github.com/simonw/llm/workflows/Test/badge.svg)](https://github.com/simonw/llm/actions?query=workflow%3ATest)
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/llm/blob/main/LICENSE)
[![Discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord-llm)
[![Homebrew](https://img.shields.io/homebrew/installs/dy/llm?color=yellow&label=homebrew&logo=homebrew)](https://formulae.brew.sh/formula/llm)

A CLI tool and Python library for interacting with **OpenAI**, **Anthropic’s Claude**, **Google’s Gemini**, **Meta’s Llama** and dozens of other Large Language Models, both via remote APIs and with models that can be installed and run on your own machine.

Watch **[Language models on the command-line](https://www.youtube.com/watch?v=QUXQNi6jQ30)** on YouTube for a demo or [read the accompanying detailed notes](https://simonwillison.net/2024/Jun/17/cli-language-models/).

With LLM you can:

- [Run prompts from the command-line](https://llm.datasette.io/en/stable/usage.html#usage-executing-prompts)
- [Store prompts and responses in SQLite](https://llm.datasette.io/en/stable/logging.html#logging)
- [Generate and store embeddings](https://llm.datasette.io/en/stable/embeddings/index.html#embeddings)
- [Extract structured content from text and images](https://llm.datasette.io/en/stable/schemas.html#schemas)
- [Grant models the ability to execute tools](https://llm.datasette.io/en/stable/tools.html#tools)
- … and much, much more

## Quick start

First, install LLM using `pip` or Homebrew or `pipx` or `uv`:

```bash
pip install llm
```

Or with Homebrew (see [warning note](https://llm.datasette.io/en/stable/setup.html#homebrew-warning)):

```bash
brew install llm
```

Or with [pipx](https://pypa.github.io/pipx/):

```bash
pipx install llm
```

Or with [uv](https://docs.astral.sh/uv/guides/tools/)

```bash
uv tool install llm
```

If you have an [OpenAI API key](https://platform.openai.com/api-keys) key you can run this:

```bash
# Paste your OpenAI API key into this
llm keys set openai

# Run a prompt (with the default gpt-4o-mini model)
llm "Ten fun names for a pet pelican"

# Extract text from an image
llm "extract text" -a scanned-document.jpg

# Use a system prompt against a file
cat myfile.py | llm -s "Explain this code"
```

Run prompts against [Gemini](https://aistudio.google.com/apikey) or [Anthropic](https://console.anthropic.com/) with their respective plugins:

```bash
llm install llm-gemini
llm keys set gemini
# Paste Gemini API key here
llm -m gemini-2.0-flash 'Tell me fun facts about Mountain View'

llm install llm-anthropic
llm keys set anthropic
# Paste Anthropic API key here
llm -m claude-4-opus 'Impress me with wild facts about turnips'
```

You can also [install a plugin](https://llm.datasette.io/en/stable/plugins/installing-plugins.html#installing-plugins) to access models that can run on your local device. If you use [Ollama](https://ollama.com/):

```bash
# Install the plugin
llm install llm-ollama

# Download and run a prompt against the Orca Mini 7B model
ollama pull llama3.2:latest
llm -m llama3.2:latest 'What is the capital of France?'
```

To start [an interactive chat](https://llm.datasette.io/en/stable/usage.html#usage-chat) with a model, use `llm chat`:

```bash
llm chat -m gpt-4.1
```

```default
Chatting with gpt-4.1
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> Tell me a joke about a pelican
Why don't pelicans like to tip waiters?

Because they always have a big bill!
```

More background on this project:

- [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/)
- [The LLM CLI tool now supports self-hosted language models via plugins](https://simonwillison.net/2023/Jul/12/llm/)
- [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/)
- [Build an image search engine with llm-clip, chat with models with llm chat](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/)
- [You can now run prompts against images, audio and video in your terminal using LLM](https://simonwillison.net/2024/Oct/29/llm-multi-modal/)
- [Structured data extraction from unstructured content using LLM schemas](https://simonwillison.net/2025/Feb/28/llm-schemas/)
- [Long context support in LLM 0.24 using fragments and template plugins](https://simonwillison.net/2025/Apr/7/long-context-llm/)

See also [the llm tag](https://simonwillison.net/tags/llm/) on my blog.

## Contents

* [Setup](https://llm.datasette.io/en/stable/setup.html)
  * [Installation](https://llm.datasette.io/en/stable/setup.html#installation)
  * [Upgrading to the latest version](https://llm.datasette.io/en/stable/setup.html#upgrading-to-the-latest-version)
  * [Using uvx](https://llm.datasette.io/en/stable/setup.html#using-uvx)
  * [A note about Homebrew and PyTorch](https://llm.datasette.io/en/stable/setup.html#a-note-about-homebrew-and-pytorch)
  * [Installing plugins](https://llm.datasette.io/en/stable/setup.html#installing-plugins)
  * [API key management](https://llm.datasette.io/en/stable/setup.html#api-key-management)
    * [Saving and using stored keys](https://llm.datasette.io/en/stable/setup.html#saving-and-using-stored-keys)
    * [Passing keys using the –key option](https://llm.datasette.io/en/stable/setup.html#passing-keys-using-the-key-option)
    * [Keys in environment variables](https://llm.datasette.io/en/stable/setup.html#keys-in-environment-variables)
  * [Configuration](https://llm.datasette.io/en/stable/setup.html#configuration)
    * [Setting a custom default model](https://llm.datasette.io/en/stable/setup.html#setting-a-custom-default-model)
    * [Setting a custom directory location](https://llm.datasette.io/en/stable/setup.html#setting-a-custom-directory-location)
    * [Turning SQLite logging on and off](https://llm.datasette.io/en/stable/setup.html#turning-sqlite-logging-on-and-off)
* [Usage](https://llm.datasette.io/en/stable/usage.html)
  * [Executing a prompt](https://llm.datasette.io/en/stable/usage.html#executing-a-prompt)
    * [Model options](https://llm.datasette.io/en/stable/usage.html#model-options)
    * [Attachments](https://llm.datasette.io/en/stable/usage.html#attachments)
    * [System prompts](https://llm.datasette.io/en/stable/usage.html#system-prompts)
    * [Tools](https://llm.datasette.io/en/stable/usage.html#tools)
    * [Extracting fenced code blocks](https://llm.datasette.io/en/stable/usage.html#extracting-fenced-code-blocks)
    * [Schemas](https://llm.datasette.io/en/stable/usage.html#schemas)
    * [Fragments](https://llm.datasette.io/en/stable/usage.html#fragments)
    * [Continuing a conversation](https://llm.datasette.io/en/stable/usage.html#continuing-a-conversation)
    * [Tips for using LLM with Bash or Zsh](https://llm.datasette.io/en/stable/usage.html#tips-for-using-llm-with-bash-or-zsh)
    * [Completion prompts](https://llm.datasette.io/en/stable/usage.html#completion-prompts)
  * [Starting an interactive chat](https://llm.datasette.io/en/stable/usage.html#starting-an-interactive-chat)
  * [Listing available models](https://llm.datasette.io/en/stable/usage.html#listing-available-models)
  * [Setting default options for models](https://llm.datasette.io/en/stable/usage.html#setting-default-options-for-models)
* [OpenAI models](https://llm.datasette.io/en/stable/openai-models.html)
  * [Configuration](https://llm.datasette.io/en/stable/openai-models.html#configuration)
  * [OpenAI language models](https://llm.datasette.io/en/stable/openai-models.html#openai-language-models)
  * [Model features](https://llm.datasette.io/en/stable/openai-models.html#model-features)
  * [OpenAI embedding models](https://llm.datasette.io/en/stable/openai-models.html#openai-embedding-models)
  * [OpenAI completion models](https://llm.datasette.io/en/stable/openai-models.html#openai-completion-models)
  * [Adding more OpenAI models](https://llm.datasette.io/en/stable/openai-models.html#adding-more-openai-models)
* [Other models](https://llm.datasette.io/en/stable/other-models.html)
  * [Installing and using a local model](https://llm.datasette.io/en/stable/other-models.html#installing-and-using-a-local-model)
  * [OpenAI-compatible models](https://llm.datasette.io/en/stable/other-models.html#openai-compatible-models)
    * [Extra HTTP headers](https://llm.datasette.io/en/stable/other-models.html#extra-http-headers)
* [Tools](https://llm.datasette.io/en/stable/tools.html)
  * [How tools work](https://llm.datasette.io/en/stable/tools.html#how-tools-work)
  * [Trying out tools](https://llm.datasette.io/en/stable/tools.html#trying-out-tools)
  * [LLM’s implementation of tools](https://llm.datasette.io/en/stable/tools.html#llm-s-implementation-of-tools)
  * [Default tools](https://llm.datasette.io/en/stable/tools.html#default-tools)
  * [Tips for implementing tools](https://llm.datasette.io/en/stable/tools.html#tips-for-implementing-tools)
* [Schemas](https://llm.datasette.io/en/stable/schemas.html)
  * [Schemas tutorial](https://llm.datasette.io/en/stable/schemas.html#schemas-tutorial)
    * [Getting started with dogs](https://llm.datasette.io/en/stable/schemas.html#getting-started-with-dogs)
    * [Extracting people from a news articles](https://llm.datasette.io/en/stable/schemas.html#extracting-people-from-a-news-articles)
  * [Using JSON schemas](https://llm.datasette.io/en/stable/schemas.html#using-json-schemas)
  * [Ways to specify a schema](https://llm.datasette.io/en/stable/schemas.html#ways-to-specify-a-schema)
  * [Concise LLM schema syntax](https://llm.datasette.io/en/stable/schemas.html#concise-llm-schema-syntax)
  * [Saving reusable schemas in templates](https://llm.datasette.io/en/stable/schemas.html#saving-reusable-schemas-in-templates)
  * [Browsing logged JSON objects created using schemas](https://llm.datasette.io/en/stable/schemas.html#browsing-logged-json-objects-created-using-schemas)
* [Templates](https://llm.datasette.io/en/stable/templates.html)
  * [Getting started with <code>–save</code>](https://llm.datasette.io/en/stable/templates.html#getting-started-with-save)
  * [Using a template](https://llm.datasette.io/en/stable/templates.html#using-a-template)
  * [Listing available templates](https://llm.datasette.io/en/stable/templates.html#listing-available-templates)
  * [Templates as YAML files](https://llm.datasette.io/en/stable/templates.html#templates-as-yaml-files)
    * [System prompts](https://llm.datasette.io/en/stable/templates.html#system-prompts)
    * [Fragments](https://llm.datasette.io/en/stable/templates.html#fragments)
    * [Options](https://llm.datasette.io/en/stable/templates.html#options)
    * [Tools](https://llm.datasette.io/en/stable/templates.html#tools)
    * [Schemas](https://llm.datasette.io/en/stable/templates.html#schemas)
    * [Additional template variables](https://llm.datasette.io/en/stable/templates.html#additional-template-variables)
    * [Specifying default parameters](https://llm.datasette.io/en/stable/templates.html#specifying-default-parameters)
    * [Configuring code extraction](https://llm.datasette.io/en/stable/templates.html#configuring-code-extraction)
    * [Setting a default model for a template](https://llm.datasette.io/en/stable/templates.html#setting-a-default-model-for-a-template)
  * [Template loaders from plugins](https://llm.datasette.io/en/stable/templates.html#template-loaders-from-plugins)
* [Fragments](https://llm.datasette.io/en/stable/fragments.html)
  * [Using fragments in a prompt](https://llm.datasette.io/en/stable/fragments.html#using-fragments-in-a-prompt)
  * [Using fragments in chat](https://llm.datasette.io/en/stable/fragments.html#using-fragments-in-chat)
  * [Browsing fragments](https://llm.datasette.io/en/stable/fragments.html#browsing-fragments)
  * [Setting aliases for fragments](https://llm.datasette.io/en/stable/fragments.html#setting-aliases-for-fragments)
  * [Viewing fragments in your logs](https://llm.datasette.io/en/stable/fragments.html#viewing-fragments-in-your-logs)
  * [Using fragments from plugins](https://llm.datasette.io/en/stable/fragments.html#using-fragments-from-plugins)
  * [Listing available fragment prefixes](https://llm.datasette.io/en/stable/fragments.html#listing-available-fragment-prefixes)
* [Model aliases](https://llm.datasette.io/en/stable/aliases.html)
  * [Listing aliases](https://llm.datasette.io/en/stable/aliases.html#listing-aliases)
  * [Adding a new alias](https://llm.datasette.io/en/stable/aliases.html#adding-a-new-alias)
  * [Removing an alias](https://llm.datasette.io/en/stable/aliases.html#removing-an-alias)
  * [Viewing the aliases file](https://llm.datasette.io/en/stable/aliases.html#viewing-the-aliases-file)
* [Embeddings](https://llm.datasette.io/en/stable/embeddings/index.html)
  * [Embedding with the CLI](https://llm.datasette.io/en/stable/embeddings/cli.html)
    * [llm embed](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed)
    * [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi)
    * [llm similar](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-similar)
    * [llm embed-models](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-models)
    * [llm collections list](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-collections-list)
    * [llm collections delete](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-collections-delete)
  * [Using embeddings from Python](https://llm.datasette.io/en/stable/embeddings/python-api.html)
    * [Working with collections](https://llm.datasette.io/en/stable/embeddings/python-api.html#working-with-collections)
    * [Retrieving similar items](https://llm.datasette.io/en/stable/embeddings/python-api.html#retrieving-similar-items)
    * [SQL schema](https://llm.datasette.io/en/stable/embeddings/python-api.html#sql-schema)
  * [Writing plugins to add new embedding models](https://llm.datasette.io/en/stable/embeddings/writing-plugins.html)
    * [Embedding binary content](https://llm.datasette.io/en/stable/embeddings/writing-plugins.html#embedding-binary-content)
  * [Embedding storage format](https://llm.datasette.io/en/stable/embeddings/storage.html)
* [Plugins](https://llm.datasette.io/en/stable/plugins/index.html)
  * [Installing plugins](https://llm.datasette.io/en/stable/plugins/installing-plugins.html)
    * [Listing installed plugins](https://llm.datasette.io/en/stable/plugins/installing-plugins.html#listing-installed-plugins)
    * [Running with a subset of plugins](https://llm.datasette.io/en/stable/plugins/installing-plugins.html#running-with-a-subset-of-plugins)
  * [Plugin directory](https://llm.datasette.io/en/stable/plugins/directory.html)
    * [Local models](https://llm.datasette.io/en/stable/plugins/directory.html#local-models)
    * [Remote APIs](https://llm.datasette.io/en/stable/plugins/directory.html#remote-apis)
    * [Tools](https://llm.datasette.io/en/stable/plugins/directory.html#tools)
    * [Fragments and template loaders](https://llm.datasette.io/en/stable/plugins/directory.html#fragments-and-template-loaders)
    * [Embedding models](https://llm.datasette.io/en/stable/plugins/directory.html#embedding-models)
    * [Extra commands](https://llm.datasette.io/en/stable/plugins/directory.html#extra-commands)
    * [Just for fun](https://llm.datasette.io/en/stable/plugins/directory.html#just-for-fun)
  * [Plugin hooks](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html)
    * [register_commands(cli)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-commands-cli)
    * [register_models(register)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-models-register)
    * [register_embedding_models(register)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-embedding-models-register)
    * [register_tools(register)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-tools-register)
    * [register_template_loaders(register)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-template-loaders-register)
    * [register_fragment_loaders(register)](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-fragment-loaders-register)
  * [Developing a model plugin](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html)
    * [The initial structure of the plugin](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#the-initial-structure-of-the-plugin)
    * [Installing your plugin to try it out](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#installing-your-plugin-to-try-it-out)
    * [Building the Markov chain](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#building-the-markov-chain)
    * [Executing the Markov chain](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#executing-the-markov-chain)
    * [Adding that to the plugin](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#adding-that-to-the-plugin)
    * [Understanding execute()](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#understanding-execute)
    * [Prompts and responses are logged to the database](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#prompts-and-responses-are-logged-to-the-database)
    * [Adding options](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#adding-options)
    * [Distributing your plugin](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#distributing-your-plugin)
    * [GitHub repositories](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#github-repositories)
    * [Publishing plugins to PyPI](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#publishing-plugins-to-pypi)
    * [Adding metadata](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#adding-metadata)
    * [What to do if it breaks](https://llm.datasette.io/en/stable/plugins/tutorial-model-plugin.html#what-to-do-if-it-breaks)
  * [Advanced model plugins](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html)
    * [Tip: lazily load expensive dependencies](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#tip-lazily-load-expensive-dependencies)
    * [Models that accept API keys](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#models-that-accept-api-keys)
    * [Async models](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#async-models)
    * [Supporting schemas](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#supporting-schemas)
    * [Supporting tools](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#supporting-tools)
    * [Attachments for multi-modal models](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#attachments-for-multi-modal-models)
    * [Tracking token usage](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#tracking-token-usage)
    * [Tracking resolved model names](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#tracking-resolved-model-names)
    * [LLM_RAISE_ERRORS](https://llm.datasette.io/en/stable/plugins/advanced-model-plugins.html#llm-raise-errors)
  * [Utility functions for plugins](https://llm.datasette.io/en/stable/plugins/plugin-utilities.html)
    * [llm.get_key()](https://llm.datasette.io/en/stable/plugins/plugin-utilities.html#llm-get-key)
    * [llm.user_dir()](https://llm.datasette.io/en/stable/plugins/plugin-utilities.html#llm-user-dir)
    * [llm.ModelError](https://llm.datasette.io/en/stable/plugins/plugin-utilities.html#llm-modelerror)
    * [Response.fake()](https://llm.datasette.io/en/stable/plugins/plugin-utilities.html#response-fake)
* [Python API](https://llm.datasette.io/en/stable/python-api.html)
  * [Basic prompt execution](https://llm.datasette.io/en/stable/python-api.html#basic-prompt-execution)
    * [System prompts](https://llm.datasette.io/en/stable/python-api.html#system-prompts)
    * [Attachments](https://llm.datasette.io/en/stable/python-api.html#attachments)
    * [Tools](https://llm.datasette.io/en/stable/python-api.html#tools)
    * [Schemas](https://llm.datasette.io/en/stable/python-api.html#schemas)
    * [Fragments](https://llm.datasette.io/en/stable/python-api.html#fragments)
    * [Model options](https://llm.datasette.io/en/stable/python-api.html#model-options)
    * [Passing an API key](https://llm.datasette.io/en/stable/python-api.html#passing-an-api-key)
    * [Models from plugins](https://llm.datasette.io/en/stable/python-api.html#models-from-plugins)
    * [Accessing the underlying JSON](https://llm.datasette.io/en/stable/python-api.html#accessing-the-underlying-json)
    * [Token usage](https://llm.datasette.io/en/stable/python-api.html#token-usage)
    * [Streaming responses](https://llm.datasette.io/en/stable/python-api.html#streaming-responses)
  * [Async models](https://llm.datasette.io/en/stable/python-api.html#async-models)
    * [Tool functions can be sync or async](https://llm.datasette.io/en/stable/python-api.html#tool-functions-can-be-sync-or-async)
    * [Tool use for async models](https://llm.datasette.io/en/stable/python-api.html#tool-use-for-async-models)
  * [Conversations](https://llm.datasette.io/en/stable/python-api.html#conversations)
    * [Conversations using tools](https://llm.datasette.io/en/stable/python-api.html#conversations-using-tools)
  * [Listing models](https://llm.datasette.io/en/stable/python-api.html#listing-models)
  * [Running code when a response has completed](https://llm.datasette.io/en/stable/python-api.html#running-code-when-a-response-has-completed)
  * [Other functions](https://llm.datasette.io/en/stable/python-api.html#other-functions)
    * [set_alias(alias, model_id)](https://llm.datasette.io/en/stable/python-api.html#set-alias-alias-model-id)
    * [remove_alias(alias)](https://llm.datasette.io/en/stable/python-api.html#remove-alias-alias)
    * [set_default_model(alias)](https://llm.datasette.io/en/stable/python-api.html#set-default-model-alias)
    * [get_default_model()](https://llm.datasette.io/en/stable/python-api.html#get-default-model)
    * [set_default_embedding_model(alias) and get_default_embedding_model()](https://llm.datasette.io/en/stable/python-api.html#set-default-embedding-model-alias-and-get-default-embedding-model)
* [Logging to SQLite](https://llm.datasette.io/en/stable/logging.html)
  * [Viewing the logs](https://llm.datasette.io/en/stable/logging.html#viewing-the-logs)
    * [-s/–short mode](https://llm.datasette.io/en/stable/logging.html#s-short-mode)
    * [Logs for a conversation](https://llm.datasette.io/en/stable/logging.html#logs-for-a-conversation)
    * [Searching the logs](https://llm.datasette.io/en/stable/logging.html#searching-the-logs)
    * [Filtering past a specific ID](https://llm.datasette.io/en/stable/logging.html#filtering-past-a-specific-id)
    * [Filtering by model](https://llm.datasette.io/en/stable/logging.html#filtering-by-model)
    * [Filtering by prompts that used specific fragments](https://llm.datasette.io/en/stable/logging.html#filtering-by-prompts-that-used-specific-fragments)
    * [Filtering by prompts that used specific tools](https://llm.datasette.io/en/stable/logging.html#filtering-by-prompts-that-used-specific-tools)
    * [Browsing data collected using schemas](https://llm.datasette.io/en/stable/logging.html#browsing-data-collected-using-schemas)
  * [Browsing logs using Datasette](https://llm.datasette.io/en/stable/logging.html#browsing-logs-using-datasette)
  * [Backing up your database](https://llm.datasette.io/en/stable/logging.html#backing-up-your-database)
  * [SQL schema](https://llm.datasette.io/en/stable/logging.html#sql-schema)
* [Related tools](https://llm.datasette.io/en/stable/related-tools.html)
  * [strip-tags](https://llm.datasette.io/en/stable/related-tools.html#strip-tags)
  * [ttok](https://llm.datasette.io/en/stable/related-tools.html#ttok)
  * [Symbex](https://llm.datasette.io/en/stable/related-tools.html#symbex)
* [CLI reference](https://llm.datasette.io/en/stable/help.html)
  * [llm  –help](https://llm.datasette.io/en/stable/help.html#llm-help)
    * [llm prompt –help](https://llm.datasette.io/en/stable/help.html#llm-prompt-help)
    * [llm chat –help](https://llm.datasette.io/en/stable/help.html#llm-chat-help)
    * [llm keys –help](https://llm.datasette.io/en/stable/help.html#llm-keys-help)
    * [llm logs –help](https://llm.datasette.io/en/stable/help.html#llm-logs-help)
    * [llm models –help](https://llm.datasette.io/en/stable/help.html#llm-models-help)
    * [llm templates –help](https://llm.datasette.io/en/stable/help.html#llm-templates-help)
    * [llm schemas –help](https://llm.datasette.io/en/stable/help.html#llm-schemas-help)
    * [llm tools –help](https://llm.datasette.io/en/stable/help.html#llm-tools-help)
    * [llm aliases –help](https://llm.datasette.io/en/stable/help.html#llm-aliases-help)
    * [llm fragments –help](https://llm.datasette.io/en/stable/help.html#llm-fragments-help)
    * [llm plugins –help](https://llm.datasette.io/en/stable/help.html#llm-plugins-help)
    * [llm install –help](https://llm.datasette.io/en/stable/help.html#llm-install-help)
    * [llm uninstall –help](https://llm.datasette.io/en/stable/help.html#llm-uninstall-help)
    * [llm embed –help](https://llm.datasette.io/en/stable/help.html#llm-embed-help)
    * [llm embed-multi –help](https://llm.datasette.io/en/stable/help.html#llm-embed-multi-help)
    * [llm similar –help](https://llm.datasette.io/en/stable/help.html#llm-similar-help)
    * [llm embed-models –help](https://llm.datasette.io/en/stable/help.html#llm-embed-models-help)
    * [llm collections –help](https://llm.datasette.io/en/stable/help.html#llm-collections-help)
    * [llm openai –help](https://llm.datasette.io/en/stable/help.html#llm-openai-help)
* [Contributing](https://llm.datasette.io/en/stable/contributing.html)
  * [Updating recorded HTTP API interactions and associated snapshots](https://llm.datasette.io/en/stable/contributing.html#updating-recorded-http-api-interactions-and-associated-snapshots)
  * [Debugging tricks](https://llm.datasette.io/en/stable/contributing.html#debugging-tricks)
  * [Documentation](https://llm.datasette.io/en/stable/contributing.html#documentation)
  * [Release process](https://llm.datasette.io/en/stable/contributing.html#release-process)

* [Changelog](https://llm.datasette.io/en/stable/changelog.html)
<!-- [[[end]]] -->


================================================
FILE: docs/.gitignore
================================================
_build


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SPHINXPROJ    = sqlite-utils
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

livehtml:
	sphinx-autobuild -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0)


================================================
FILE: docs/_templates/base.html
================================================
{%- extends "!base.html" %}

{%- block htmltitle -%}
{% if not docstitle %}
  <title>{{ title|striptags|e }}</title>
{% elif pagename == master_doc %}
  <title>LLM: A CLI utility and Python library for interacting with Large Language Models</title>
{% else %}
  <title>{{ title|striptags|e }} - {{ docstitle|striptags|e }}</title>
{% endif %}
{%- endblock -%}

{% block site_meta %}
{{ super() }}
<script defer data-domain="llm.datasette.io" src="https://plausible.io/js/plausible.js"></script>
{% endblock %}


================================================
FILE: docs/aliases.md
================================================
(aliases)=
# Model aliases

LLM supports model aliases, which allow you to refer to a model by a short name instead of its full ID.

## Listing aliases

To list current aliases, run this:

```bash
llm aliases
```
Example output:

<!-- [[[cog
from click.testing import CliRunner
from llm.cli import cli
result = CliRunner().invoke(cli, ["aliases", "list"])
cog.out("```\n{}```".format(result.output))
]]] -->
```
4o                  : gpt-4o
chatgpt-4o          : chatgpt-4o-latest
4o-mini             : gpt-4o-mini
4.1                 : gpt-4.1
4.1-mini            : gpt-4.1-mini
4.1-nano            : gpt-4.1-nano
3.5                 : gpt-3.5-turbo
chatgpt             : gpt-3.5-turbo
chatgpt-16k         : gpt-3.5-turbo-16k
3.5-16k             : gpt-3.5-turbo-16k
4                   : gpt-4
gpt4                : gpt-4
4-32k               : gpt-4-32k
gpt-4-turbo-preview : gpt-4-turbo
4-turbo             : gpt-4-turbo
4t                  : gpt-4-turbo
gpt-4.5             : gpt-4.5-preview
3.5-instruct        : gpt-3.5-turbo-instruct
chatgpt-instruct    : gpt-3.5-turbo-instruct
ada                 : text-embedding-ada-002 (embedding)
ada-002             : text-embedding-ada-002 (embedding)
3-small             : text-embedding-3-small (embedding)
3-large             : text-embedding-3-large (embedding)
3-small-512         : text-embedding-3-small-512 (embedding)
3-large-256         : text-embedding-3-large-256 (embedding)
3-large-1024        : text-embedding-3-large-1024 (embedding)
```
<!-- [[[end]]] -->

Add `--json` to get that list back as JSON:

```bash
llm aliases list --json
```
Example output:
```json
{
    "3.5": "gpt-3.5-turbo",
    "chatgpt": "gpt-3.5-turbo",
    "4": "gpt-4",
    "gpt4": "gpt-4",
    "ada": "ada-002"
}
```

## Adding a new alias

The `llm aliases set <alias> <model-id>` command can be used to add a new alias:

```bash
llm aliases set mini gpt-4o-mini
```
You can also pass one or more `-q search` options to set an alias on the first model matching those search terms:
```bash
llm aliases set mini -q 4o -q mini
```
Now you can run the `gpt-4o-mini` model using the `mini` alias like this:
```bash
llm -m mini 'An epic Greek-style saga about a cheesecake that builds a SQL database from scratch'
```
Aliases can be set for both regular models and {ref}`embedding models <embeddings>` using the same command. To set an alias of `oai` for the OpenAI `ada-002` embedding model use this:
```bash
llm aliases set oai ada-002
```
Now you can embed a string using that model like so:
```bash
llm embed -c 'hello world' -m oai
```
Output:
```
[-0.014945968054234982, 0.0014304015785455704, ...]
```

## Removing an alias

The `llm aliases remove <alias>` command will remove the specified alias:

```bash
llm aliases remove mini
```

## Viewing the aliases file

Aliases are stored in an `aliases.json` file in the LLM configuration directory.

To see the path to that file, run this:

```bash
llm aliases path
```
To view the content of that file, run this:

```bash
cat "$(llm aliases path)"
```

================================================
FILE: docs/changelog.md
================================================
# Changelog

(v0_29)=
## 0.29 (2025-03-17)

- The `-t/--template` option now works correctly with the `-x/--extract` and `--xl/--extract-last` flags.
- `llm logs` now shows any additional model options in the Markdown output. [#1322](https://github.com/simonw/llm/issues/1322)
- New OpenAI models: `gpt-5.4`, `gpt-5.4-mini`, `gpt-5.4-nano`. [#1376](https://github.com/simonw/llm/issues/1376)

(v0_28)=
## 0.28 (2025-12-12)

- New OpenAI models: `gpt-5.1`, `gpt-5.1-chat-latest`, `gpt-5.2` and `gpt-5.2-chat-latest`. [#1300](https://github.com/simonw/llm/issues/1300), [#1317](https://github.com/simonw/llm/issues/1317)
- LLM now requires Python 3.10 or higher. Python 3.14 is now covered by the tests.
- When fetching URLs as fragments using `llm -f URL`, the request now includes a custom user-agent header: `llm/VERSION (https://llm.datasette.io/)`. [#1309](https://github.com/simonw/llm/issues/1309)
- Fixed a bug where fragments were not correctly registered with their source when using `llm chat`. Thanks, [Giuseppe Rota](https://github.com/grota). [#1316](https://github.com/simonw/llm/pull/1316)
- Fixed some file descriptor leak warnings. Thanks, [Eric Bloch](https://github.com/eedeebee). [#1313](https://github.com/simonw/llm/issues/1313)
- Fixed a deprecation warning for `asyncio.iscoroutinefunction`.
- Type annotations for the OpenAI Chat, AsyncChat and Completion `execute()` methods. Thanks, [Arjan Mossel](https://github.com/ar-jan). [#1315](https://github.com/simonw/llm/pull/1315)
- The project now uses `uv` and dependency groups for development. See the updated {ref}`contributing documentation <contributing>`. [#1318](https://github.com/simonw/llm/issues/1318)

(v0_27_1)=
## 0.27.1 (2025-08-11)

- `llm chat -t template` now correctly loads any tools that are included in that template. [#1239](https://github.com/simonw/llm/issues/1239)
- Fixed a bug where `llm -m gpt5 -o reasoning_effort minimal --save gm` saved a template containing invalid YAML. [#1237](https://github.com/simonw/llm/issues/1237)
- Fixed a bug where running `llm chat -t template` could cause prompts to be duplicated. [#1240](https://github.com/simonw/llm/issues/1240)
- Less confusing error message if a requested toolbox class is unavailable. [#1238](https://github.com/simonw/llm/issues/1238)

(v0_27)=
## 0.27 (2025-08-11)

This release adds support for the new **GPT-5 family** of models from OpenAI. It also enhances tool calling in a number of ways, including allowing {ref}`templates <prompt-templates>` to bundle pre-configured tools.

### New features

- New models: `gpt-5`, `gpt-5-mini` and `gpt-5-nano`. [#1229](https://github.com/simonw/llm/issues/1229)
- LLM {ref}`templates <prompt-templates>` can now include a list of tools. These can be named tools from plugins or arbitrary Python function blocks, see {ref}`Tools in templates <prompt-templates-tools>`. [#1009](https://github.com/simonw/llm/issues/1009)
- Tools {ref}`can now return attachments <python-api-tools-attachments>`, for models that support features such as image input. [#1014](https://github.com/simonw/llm/issues/1014)
- New methods on the `Toolbox` class: `.add_tool()`, `.prepare()` and `.prepare_async()`, described in {ref}`Dynamic toolboxes <python-api-tools-dynamic>`. [#1111](https://github.com/simonw/llm/issues/1111)
- New `model.conversation(before_call=x, after_call=y)` attributes for registering callback functions to run before and after tool calls. See  {ref}`tool debugging hooks <python-api-tools-debug-hooks>` for details. [#1088](https://github.com/simonw/llm/issues/1088)
- Some model providers can serve different models from the same configured URL - [llm-llama-server](https://github.com/simonw/llm-llama-server) for example. Plugins for these providers can now record the resolved model ID of the model that was used to the LLM logs using the `response.set_resolved_model(model_id)` method. [#1117](https://github.com/simonw/llm/issues/1117)
- Raising `llm.CancelToolCall` now only cancels the current tool call, passing an error back to the model and allowing it to continue. [#1148](https://github.com/simonw/llm/issues/1148)
- New `-l/--latest` option for `llm logs -q searchterm` for searching logs ordered by date (most recent first) instead of the default relevance search. [#1177](https://github.com/simonw/llm/issues/1177)

### Bug fixes and documentation

- Fix for various bugs with different formats of streaming function responses for OpenAI-compatible models. Thanks, [James Sanford](https://github.com/jamessanford). [#1218](https://github.com/simonw/llm/pull/1218)
- The `register_embedding_models` hook is [now documented](https://llm.datasette.io/en/stable/plugins/plugin-hooks.html#register-embedding-models-register). [#1049](https://github.com/simonw/llm/issues/1049)
- Show visible stack trace for `llm templates show invalid-template-name`. [#1053](https://github.com/simonw/llm/issues/1053)
- Handle invalid tool names more gracefully in `llm chat`. [#1104](https://github.com/simonw/llm/issues/1104)
- Add a {ref}`Tool plugins <plugin-directory-tools>` section to the plugin directory. [#1110](https://github.com/simonw/llm/issues/1110)
- Error on `register(Klass)` if the passed class is not a subclass of `Toolbox`. [#1114](https://github.com/simonw/llm/issues/1114)
- Add `-h` for `--help` for all `llm` CLI commands. [#1134](https://github.com/simonw/llm/issues/1134)
- Add missing `dataclasses` to advanced model plugins docs. [#1137](https://github.com/simonw/llm/issues/1137)
- Fixed a bug where `llm logs -T llm_version "version" --async` incorrectly recorded just one single log entry when it should have recorded two. [#1150](https://github.com/simonw/llm/issues/1150)
- All extra OpenAI model keys in `extra-openai-models.yaml` are {ref}`now documented <openai-compatible-models>`. [#1228](https://github.com/simonw/llm/issues/1228)

(v0_26)=
## 0.26 (2025-05-27)

**Tool support** is finally here! This release adds support exposing {ref}`tools <tools>` to LLMs, previously described in the release notes for {ref}`0.26a0 <v0_26_a0>` and {ref}`0.26a1 <v0_26_a1>`.

Read **[Large Language Models can run tools in your terminal with LLM 0.26](https://simonwillison.net/2025/May/27/llm-tools/)** for a detailed overview of the new features.

Also in this release:

- Two new {ref}`default tools <tools-default>`: `llm_version()` and `llm_time()`. [#1096](https://github.com/simonw/llm/issues/1096), [#1103](https://github.com/simonw/llm/issues/1103)
- Documentation on {ref}`how to add tool supports to a model plugin <advanced-model-plugins-tools>`. [#1000](https://github.com/simonw/llm/issues/1000)
- Added a {ref}`prominent warning <tools-warning>` about the risk of prompt injection when using tools. [#1097](https://github.com/simonw/llm/issues/1097)
- Switched to using monotonic ULIDs for the response IDs in the logs, fixing some intermittent test failures. [#1099](https://github.com/simonw/llm/issues/1099)
- New `tool_instances` table records details of Toolbox instances created while executing a prompt. [#1089](https://github.com/simonw/llm/issues/1089)
- `llm.get_key()` is now a {ref}`documented utility function <plugin-utilities-get-key>`. [#1094](https://github.com/simonw/llm/issues/1094)

(v0_26_a1)=
## 0.26a1 (2025-05-25)

Hopefully the last alpha before a stable release that includes tool support.

### Features

*   **Plugin-provided tools can now be grouped into "Toolboxes".**
    *   Toolboxes (`llm.Toolbox` classes) allow plugins to expose multiple related tools that share state or configuration (e.g., a `Memory` tool or `Filesystem` tool). ([#1059](https://github.com/simonw/llm/issues/1059), [#1086](https://github.com/simonw/llm/issues/1086))
*   **Tool support for `llm chat`.**
    *   The `llm chat` command now accepts `--tool` and `--functions` arguments, allowing interactive chat sessions to use tools. ([#1004](https://github.com/simonw/llm/issues/1004), [#1062](https://github.com/simonw/llm/issues/1062))
*   **Tools can now execute asynchronously.**
    *   Models that implement `AsyncModel` can now run tools, including tool functions defined as `async def`. ([#1063](https://github.com/simonw/llm/issues/1063))
*   **`llm chat` now supports adding fragments during a session.**
    *   Use the new `!fragment <id>` command while chatting to insert content from a fragment. Initial fragments can also be passed to `llm chat` using `-f` or `--sf`. Thanks, [Dan Turkel](https://github.com/daturkel). ([#1044](https://github.com/simonw/llm/issues/1044), [#1048](https://github.com/simonw/llm/issues/1048))
*   **Filter `llm logs` by tools.**
    *   New `--tool <name>` option to filter logs to show only responses that involved a specific tool (e.g., `--tool simple_eval`).
    *   The `--tools` flag shows all responses that used any tool. ([#1013](https://github.com/simonw/llm/issues/1013), [#1072](https://github.com/simonw/llm/issues/1072))
*   **`llm schemas list` can output JSON.**
    *   Added `--json` and `--nl` (newline-delimited JSON) options to `llm schemas list` for programmatic access to saved schema definitions. ([#1070](https://github.com/simonw/llm/issues/1070))
*   **Filter `llm similar` results by ID prefix.**
    *   The new `--prefix` option for `llm similar` allows searching for similar items only within IDs that start with a specified string (e.g., `llm similar my-collection --prefix 'docs/'`). Thanks, [Dan Turkel](https://github.com/daturkel). ([#1052](https://github.com/simonw/llm/issues/1052))
*   **Control chained tool execution limit.**
    *   New `--chain-limit <N>` (or `--cl`) option for `llm prompt` and `llm chat` to specify the maximum number of consecutive tool calls allowed for a single prompt. Defaults to 5; set to 0 for unlimited. ([#1025](https://github.com/simonw/llm/issues/1025))
*   **`llm plugins --hook <NAME>` option.**
    *   Filter the list of installed plugins to only show those that implement a specific plugin hook. ([#1047](https://github.com/simonw/llm/issues/1047))
* `llm tools list` now shows toolboxes and their methods. ([#1013](https://github.com/simonw/llm/issues/1013))
* `llm prompt` and `llm chat` now automatically re-enable plugin-provided tools when continuing a conversation (`-c` or `--cid`). ([#1020](https://github.com/simonw/llm/issues/1020))
* The `--tools-debug` option now pretty-prints JSON tool results for improved readability. ([#1083](https://github.com/simonw/llm/issues/1083))
* New `LLM_TOOLS_DEBUG` environment variable to permanently enable `--tools-debug`. ([#1045](https://github.com/simonw/llm/issues/1045))
* `llm chat` sessions now correctly respect default model options configured with `llm models set-options`. Thanks, [André Arko](https://github.com/indirect). ([#985](https://github.com/simonw/llm/issues/985))
* New `--pre` option for `llm install` to allow installing pre-release packages. ([#1060](https://github.com/simonw/llm/issues/1060))
* OpenAI models (`gpt-4o`, `gpt-4o-mini`) now explicitly declare support for tools and vision. ([#1037](https://github.com/simonw/llm/issues/1037))
* The `supports_tools` parameter is now supported in `extra-openai-models.yaml`. Thanks, [Mahesh Hegde ](https://github.com/mahesh-hegde). ([#1068](https://github.com/simonw/llm/issues/1068))

### Bug fixes

*   Fixed a bug where the `name` parameter in `register(function, name="name")` was ignored for tool plugins. ([#1032](https://github.com/simonw/llm/issues/1032))
*   Ensure `pathlib.Path` objects are cast to `str` before passing to `click.edit` in `llm templates edit`. Thanks, [Abizer Lokhandwala](https://github.com/abizer). ([#1031](https://github.com/simonw/llm/issues/1031))


(v0_26_a0)=
## 0.26a0 (2025-05-13)

This is the first alpha to introduce {ref}`support for tools<tools>`! Models with tool capability (which includes the default OpenAI model family) can now be granted access to execute Python functions as part of responding to a prompt.

Tools are supported by {ref}`the command-line interface <usage-tools>`:

```bash
llm --functions '
def multiply(x: int, y: int) -> int:
    """Multiply two numbers."""
    return x * y
' 'what is 34234 * 213345'
```
And in {ref}`the Python API <python-api-tools>`, using a new `model.chain()` method for executing multiple prompts in a sequence:
```python
import llm

def multiply(x: int, y: int) -> int:
    """Multiply two numbers."""
    return x * y

model = llm.get_model("gpt-4.1-mini")
response = model.chain(
    "What is 34234 * 213345?",
    tools=[multiply]
)
print(response.text())
```
New tools can also be defined using the {ref}`register_tools() plugin hook <plugin-hooks-register-tools>`. They can then be called by name from the command-line like this:
```bash
llm -T multiply 'What is 34234 * 213345?'
```
Tool support is currently under **active development**. Consult [this milestone](https://github.com/simonw/llm/milestone/12) for the latest status.

(v0_25)=
## 0.25 (2025-05-04)

- New plugin feature: {ref}`plugin-hooks-register-fragment-loaders` plugins can now return a mixture of fragments and attachments. The [llm-video-frames](https://github.com/simonw/llm-video-frames) plugin is the first to take advantage of this mechanism. [#972](https://github.com/simonw/llm/issues/972)
- New OpenAI models: `gpt-4.1`, `gpt-4.1-mini`, `gpt-41-nano`, `o3`, `o4-mini`. [#945](https://github.com/simonw/llm/issues/945), [#965](https://github.com/simonw/llm/issues/965), [#976](https://github.com/simonw/llm/issues/976).
- New environment variables: `LLM_MODEL` and `LLM_EMBEDDING_MODEL` for setting the model to use without needing to specify `-m model_id` every time. [#932](https://github.com/simonw/llm/issues/932)
- New command: `llm fragments loaders`, to list all currently available fragment loader prefixes provided by plugins. [#941](https://github.com/simonw/llm/issues/941)
- `llm fragments` command now shows fragments ordered by the date they were first used. [#973](https://github.com/simonw/llm/issues/973)
- `llm chat` now includes a `!edit` command for editing a prompt using your default terminal text editor. Thanks, [Benedikt Willi](https://github.com/Hopiu). [#969](https://github.com/simonw/llm/pull/969)
- Allow `-t` and `--system` to be used at the same time. [#916](https://github.com/simonw/llm/issues/916)
- Fixed a bug where accessing a model via its alias would fail to respect any default options set for that model. [#968](https://github.com/simonw/llm/issues/968)
- Improved documentation for {ref}`extra-openai-models.yaml <openai-compatible-models>`. Thanks, [Rahim Nathwani](https://github.com/rahimnathwani) and [Dan Guido](https://github.com/dguido). [#950](https://github.com/simonw/llm/pull/950), [#957](https://github.com/simonw/llm/pull/957)
- `llm -c/--continue` now works correctly with the `-d/--database` option. `llm chat` now accepts that `-d/--database` option. Thanks, [Sukhbinder Singh](https://github.com/sukhbinder). [#933](https://github.com/simonw/llm/issues/933)

(v0_25a0)=
## 0.25a0 (2025-04-10)

- `llm models --options` now shows keys and environment variables for models that use API keys. Thanks, [Steve Morin](https://github.com/smorin). [#903](https://github.com/simonw/llm/issues/903)
- Added `py.typed` marker file so LLM can now be used as a dependency in projects that use `mypy` without a warning. [#887](https://github.com/simonw/llm/issues/887)
- `$` characters can now be used in templates by escaping them as `$$`. Thanks, [@guspix](https://github.com/guspix). [#904](https://github.com/simonw/llm/issues/904)
- LLM now uses `pyproject.toml` instead of `setup.py`. [#908](https://github.com/simonw/llm/issues/908)

(v0_24_2)=
## 0.24.2 (2025-04-08)

- Fixed a bug on Windows with the new `llm -t path/to/file.yaml` feature. [#901](https://github.com/simonw/llm/issues/901)

(v0_24_1)=
## 0.24.1 (2025-04-08)

- Templates can now be specified as a path to a file on disk, using `llm -t path/to/file.yaml`. This makes them consistent with how `-f` fragments are loaded. [#897](https://github.com/simonw/llm/issues/897)
- `llm logs backup /tmp/backup.db` command for {ref}`backing up your <logging-backup>` `logs.db` database. [#879](https://github.com/simonw/llm/issues/879)

(v0_24)=
## 0.24 (2025-04-07)

Support for **fragments** to help assemble prompts for long context models. Improved support for **templates** to support attachments and fragments. New plugin hooks for providing custom loaders for both templates and fragments. See [Long context support in LLM 0.24 using fragments and template plugins](https://simonwillison.net/2025/Apr/7/long-context-llm/) for more on this release.

The new [llm-docs](https://github.com/simonw/llm-docs) plugin demonstrates these new features. Install it like this:

```bash
llm install llm-docs
```
Now you can ask questions of the LLM documentation like this:

```bash
llm -f docs: 'How do I save a new template?'
```
The `docs:` prefix is registered by the plugin. The plugin fetches the LLM documentation for your installed version (from the [docs-for-llms](https://github.com/simonw/docs-for-llms) repository) and uses that as a prompt fragment to help answer your question.

Two more new plugins are [llm-templates-github](https://github.com/simonw/llm-templates-github) and [llm-templates-fabric](https://github.com/simonw/llm-templates-fabric).

`llm-templates-github` lets you share and use templates on GitHub. You can run my [Pelican riding a bicycle](https://simonwillison.net/tags/pelican-riding-a-bicycle/) benchmark against a model like this:

```bash
llm install llm-templates-github
llm -t gh:simonw/pelican-svg -m o3-mini
```
This executes [this pelican-svg.yaml](https://github.com/simonw/llm-templates/blob/main/pelican-svg.yaml) template stored in my [simonw/llm-templates](https://github.com/simonw/llm-templates) repository, using a new repository naming convention.

To share your own templates, create a repository on GitHub under your user account called `llm-templates` and start saving `.yaml` files to it.

[llm-templates-fabric](https://github.com/simonw/llm-templates-fabric) provides a similar mechanism for loading templates from  Daniel Miessler's [fabric collection](https://github.com/danielmiessler/fabric):

```bash
llm install llm-templates-fabric
curl https://simonwillison.net/2025/Apr/6/only-miffy/ | \
  llm -t f:extract_main_idea
```

Major new features:

- New {ref}`fragments feature <fragments>`. Fragments can be used to assemble long prompts from multiple existing pieces - URLs, file paths or previously used fragments. These will be stored de-duplicated in the database avoiding wasting space storing multiple long context pieces. Example usage: `llm -f https://llm.datasette.io/robots.txt 'explain this file'`. [#617](https://github.com/simonw/llm/issues/617)
- The `llm logs` file now accepts `-f` fragment references too, and will show just logged prompts that used those fragments.
- {ref}`register_template_loaders() plugin hook <plugin-hooks-register-template-loaders>` allowing plugins to register new `prefix:value` custom template loaders. [#809](https://github.com/simonw/llm/issues/809)
- {ref}`register_fragment_loaders() plugin hook <plugin-hooks-register-fragment-loaders>` allowing plugins to register new `prefix:value` custom fragment loaders. [#886](https://github.com/simonw/llm/issues/886)
- {ref}`llm fragments <fragments-browsing>` family of commands for browsing fragments that have been previously logged to the database.
- The new [llm-openai plugin](https://github.com/simonw/llm-openai-plugin) provides support for **o1-pro** (which is not supported by the OpenAI mechanism used by LLM core). Future OpenAI features will migrate to this plugin instead of LLM core itself.

Improvements to templates:

- `llm -t $URL` option can now take a URL to a YAML template. [#856](https://github.com/simonw/llm/issues/856)
- Templates can now store default model options. [#845](https://github.com/simonw/llm/issues/845)
- Executing a template that does not use the `$input` variable no longer blocks LLM waiting for input, so prompt templates can now be used to try different models using `llm -t pelican-svg -m model_id`. [#835](https://github.com/simonw/llm/issues/835)
- `llm templates` command no longer crashes if one of the listed template files contains invalid YAML. [#880](https://github.com/simonw/llm/issues/880)
- Attachments can now be stored in templates. [#826](https://github.com/simonw/llm/issues/826)

Other changes:

- New {ref}`llm models options <usage-executing-default-options>` family of commands for setting default options for particular models. [#829](https://github.com/simonw/llm/issues/829)
- `llm logs list`, `llm schemas list` and `llm schemas show` all now take a `-d/--database` option with an optional path to a SQLite database. They used to take `-p/--path` but that was inconsistent with other commands. `-p/--path` still works but is excluded from `--help` and will be removed in a future LLM release. [#857](https://github.com/simonw/llm/issues/857)
- `llm logs -e/--expand` option for expanding fragments. [#881](https://github.com/simonw/llm/issues/881)
- `llm prompt -d path-to-sqlite.db` option can now be used to write logs to a custom SQLite database. [#858](https://github.com/simonw/llm/issues/858)
- `llm similar -p/--plain` option providing more human-readable output than the default JSON. [#853](https://github.com/simonw/llm/issues/853)
- `llm logs -s/--short` now truncates to include the end of the prompt too. Thanks, [Sukhbinder Singh](https://github.com/sukhbinder). [#759](https://github.com/simonw/llm/issues/759)
- Set the `LLM_RAISE_ERRORS=1` environment variable to raise errors during prompts rather than suppressing them, which means you can run `python -i -m llm 'prompt'` and then drop into a debugger on errors with `import pdb; pdb.pm()`. [#817](https://github.com/simonw/llm/issues/817)
- Improved [--help output](https://llm.datasette.io/en/stable/help.html#llm-embed-multi-help) for `llm embed-multi`. [#824](https://github.com/simonw/llm/issues/824)
- `llm models -m X` option which can be passed multiple times with model IDs to see the details of just those models. [#825](https://github.com/simonw/llm/issues/825)
- OpenAI models now accept PDF attachments. [#834](https://github.com/simonw/llm/issues/834)
- `llm prompt -q gpt -q 4o` option - pass `-q searchterm` one or more times to execute a prompt against the first model that matches all of those strings - useful for if you can't remember the full model ID. [#841](https://github.com/simonw/llm/issues/841)
- {ref}`OpenAI compatible models <openai-compatible-models>` configured using `extra-openai-models.yaml` now support `supports_schema: true`, `vision: true` and `audio: true` options. Thanks [@adaitche](https://github.com/adaitche) and [@giuli007](https://github.com/giuli007). [#819](https://github.com/simonw/llm/pull/819), [#843](https://github.com/simonw/llm/pull/843)


(v0_24a1)=
## 0.24a1 (2025-04-06)

- New Fragments feature. [#617](https://github.com/simonw/llm/issues/617)
- `register_fragment_loaders()` plugin hook. [#809](https://github.com/simonw/llm/issues/886)

(v0_24a0)=
## 0.24a0 (2025-02-28)

- Alpha release with experimental `register_template_loaders()` plugin hook. [#809](https://github.com/simonw/llm/issues/809)

(v0_23)=
## 0.23 (2025-02-28)

Support for **schemas**, for getting supported models to output JSON that matches a specified JSON schema. See also [Structured data extraction from unstructured content using LLM schemas](https://simonwillison.net/2025/Feb/28/llm-schemas/) for background on this feature. [#776](https://github.com/simonw/llm/issues/776)

- New `llm prompt --schema '{JSON schema goes here}` option for specifying a schema that should be used for the output from the model. The {ref}`schemas documentation <schemas>` has more details and a tutorial.
- Schemas can also be defined using a {ref}`concise schema specification <schemas-dsl>`, for example `llm prompt --schema 'name, bio, age int'`. [#790](https://github.com/simonw/llm/issues/790)
- Schemas can also be specified by passing a filename and through {ref}`several other methods <schemas-specify>`. [#780](https://github.com/simonw/llm/issues/780)
- New {ref}`llm schemas family of commands <help-schemas>`: `llm schemas list`, `llm schemas show`, and `llm schemas dsl` for debugging the new concise schema language. [#781](https://github.com/simonw/llm/issues/781)
- Schemas can now be saved to templates using `llm --schema X --save template-name` or through modifying the {ref}`template YAML <prompt-templates-yaml>`. [#778](https://github.com/simonw/llm/issues/778)
- The {ref}`llm logs <logging>` command now has new options for extracting data collected using schemas: `--data`, `--data-key`, `--data-array`, `--data-ids`. [#782](https://github.com/simonw/llm/issues/782)
- New `llm logs --id-gt X` and `--id-gte X` options. [#801](https://github.com/simonw/llm/issues/801)
- New `llm models --schemas` option for listing models that support schemas. [#797](https://github.com/simonw/llm/issues/797)
- `model.prompt(..., schema={...})` parameter for specifying a schema from Python. This accepts either a dictionary JSON schema definition or a Pydantic `BaseModel` subclass, see {ref}`schemas in the Python API docs <python-api-schemas>`.
- The default OpenAI plugin now enables schemas across all supported models. Run `llm models --schemas` for a list of these.
- The [llm-anthropic](https://github.com/simonw/llm-anthropic) and [llm-gemini](https://github.com/simonw/llm-gemini) plugins have been upgraded to add schema support for those models. Here's documentation on how to {ref}`add schema support to a model plugin <advanced-model-plugins-schemas>`.

Other smaller changes:

- [GPT-4.5 preview](https://openai.com/index/introducing-gpt-4-5/) is now a supported model: `llm -m gpt-4.5 'a joke about a pelican and a wolf'` [#795](https://github.com/simonw/llm/issues/795)
- The prompt string is now optional when calling `model.prompt()` from the Python API, so `model.prompt(attachments=llm.Attachment(url=url)))` now works. [#784](https://github.com/simonw/llm/issues/784)
- `extra-openai-models.yaml` now supports a `reasoning: true` option. Thanks, [Kasper Primdal Lauritzen](https://github.com/KPLauritzen). [#766](https://github.com/simonw/llm/pull/766)
- LLM now depends on Pydantic v2 or higher. Pydantic v1 is no longer supported. [#520](https://github.com/simonw/llm/issues/520)


(v0_22)=
## 0.22 (2025-02-16)

See also [LLM 0.22, the annotated release notes](https://simonwillison.net/2025/Feb/17/llm/).

- Plugins that provide models that use API keys can now subclass the new `llm.KeyModel` and `llm.AsyncKeyModel` classes. This results in the API key being passed as a new `key` parameter to their `.execute()` methods, and means that Python users can pass a key as the `model.prompt(..., key=)` - see {ref}`Passing an API key <python-api-models-api-keys>`. Plugin developers should consult the new documentation on writing {ref}`Models that accept API keys <advanced-model-plugins-api-keys>`. [#744](https://github.com/simonw/llm/issues/744)
- New OpenAI model: `chatgpt-4o-latest`. This model ID accesses the current model being used to power ChatGPT, which can change without warning. [#752](https://github.com/simonw/llm/issues/752)
- New `llm logs -s/--short` flag, which returns a greatly shortened version of the matching log entries in YAML format with a truncated prompt and without including the response. [#737](https://github.com/simonw/llm/issues/737)
- Both `llm models` and `llm embed-models` now take multiple `-q` search fragments. You can now search for all models matching "gemini" and "exp" using `llm models -q gemini -q exp`. [#748](https://github.com/simonw/llm/issues/748)
- New `llm embed-multi --prepend X` option for prepending a string to each value before it is embedded - useful for models such as [nomic-embed-text-v2-moe](https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe) that require passages to start with a string like `"search_document: "`. [#745](https://github.com/simonw/llm/issues/745)
- The `response.json()` and `response.usage()` methods are {ref}`now documented <python-api-underlying-json>`.
- Fixed a bug where conversations that were loaded from the database could not be continued using `asyncio` prompts. [#742](https://github.com/simonw/llm/issues/742)
- New plugin for macOS users: [llm-mlx](https://github.com/simonw/llm-mlx), which provides [extremely high performance access](https://simonwillison.net/2025/Feb/15/llm-mlx/) to a wide range of local models using Apple's MLX framework.
- The `llm-claude-3` plugin has been renamed to [llm-anthropic](https://github.com/simonw/llm-anthropic).

(v0_21)=
## 0.21 (2025-01-31)

- New model: `o3-mini`. [#728](https://github.com/simonw/llm/issues/728)
- The `o3-mini` and `o1` models now support a `reasoning_effort` option which can be set to `low`, `medium` or `high`.
- `llm prompt` and `llm logs` now have a `--xl/--extract-last` option for extracting the last fenced code block in the response - a complement to the existing `--x/--extract` option. [#717](https://github.com/simonw/llm/issues/717)

(v0_20)=
## 0.20 (2025-01-22)

- New model, `o1`. This model does not yet support streaming. [#676](https://github.com/simonw/llm/issues/676)
- `o1-preview` and `o1-mini` models now support streaming.
- New models, `gpt-4o-audio-preview` and `gpt-4o-mini-audio-preview`. [#677](https://github.com/simonw/llm/issues/677)
- `llm prompt -x/--extract` option, which returns just the content of the first fenced code block in the response. Try `llm prompt -x 'Python function to reverse a string'`. [#681](https://github.com/simonw/llm/issues/681)
  - Creating a template using `llm ... --save x` now supports the `-x/--extract` option, which is saved to the template. YAML templates can set this option using `extract: true`.
  - New `llm logs -x/--extract` option extracts the first fenced code block from matching logged responses.
- New `llm models -q 'search'` option returning models that case-insensitively match the search query. [#700](https://github.com/simonw/llm/issues/700)
- Installation documentation now also includes `uv`. Thanks, [Ariel Marcus](https://github.com/ajmarcus). [#690](https://github.com/simonw/llm/pull/690) and [#702](https://github.com/simonw/llm/issues/702)
- `llm models` command now shows the current default model at the bottom of the listing. Thanks, [Amjith Ramanujam](https://github.com/amjith). [#688](https://github.com/simonw/llm/pull/688)
- {ref}`Plugin directory <plugin-directory>` now includes `llm-venice`, `llm-bedrock`, `llm-deepseek` and `llm-cmd-comp`.
- Fixed bug where some dependency version combinations could cause a `Client.__init__() got an unexpected keyword argument 'proxies'` error. [#709](https://github.com/simonw/llm/issues/709)
- OpenAI embedding models are now available using their full names of `text-embedding-ada-002`, `text-embedding-3-small` and `text-embedding-3-large` - the previous names are still supported as aliases. Thanks, [web-sst](https://github.com/web-sst). [#654](https://github.com/simonw/llm/pull/654)

(v0_19_1)=
## 0.19.1 (2024-12-05)

- FIxed bug where `llm.get_models()` and `llm.get_async_models()` returned the same model multiple times. [#667](https://github.com/simonw/llm/issues/667)

(v0_19)=
## 0.19 (2024-12-01)

- Tokens used by a response are now logged to new `input_tokens` and `output_tokens` integer columns and a `token_details` JSON string column, for the default OpenAI models and models from other plugins that {ref}`implement this feature <advanced-model-plugins-usage>`. [#610](https://github.com/simonw/llm/issues/610)
- `llm prompt` now takes a `-u/--usage` flag to display token usage at the end of the response.
- `llm logs -u/--usage` shows token usage information for logged responses.
- `llm prompt ... --async` responses are now logged to the database. [#641](https://github.com/simonw/llm/issues/641)
- `llm.get_models()` and `llm.get_async_models()` functions, {ref}`documented here <python-api-listing-models>`. [#640](https://github.com/simonw/llm/issues/640)
- `response.usage()` and async response `await response.usage()` methods, returning a `Usage(input=2, output=1, details=None)` dataclass. [#644](https://github.com/simonw/llm/issues/644)
- `response.on_done(callback)` and `await response.on_done(callback)` methods for specifying a callback to be executed when a response has completed, {ref}`documented here <python-api-response-on-done>`. [#653](https://github.com/simonw/llm/issues/653)
- Fix for bug running `llm chat` on Windows 11. Thanks, [Sukhbinder Singh](https://github.com/sukhbinder). [#495](https://github.com/simonw/llm/issues/495)

(v0_19a2)=
## 0.19a2 (2024-11-20)

- `llm.get_models()` and `llm.get_async_models()` functions, {ref}`documented here <python-api-listing-models>`. [#640](https://github.com/simonw/llm/issues/640)

(v0_19a1)=
## 0.19a1 (2024-11-19)

- `response.usage()` and async response `await response.usage()` methods, returning a `Usage(input=2, output=1, details=None)` dataclass. [#644](https://github.com/simonw/llm/issues/644)

(v0_19a0)=
## 0.19a0 (2024-11-19)

- Tokens used by a response are now logged to new `input_tokens` and `output_tokens` integer columns and a `token_details` JSON string column, for the default OpenAI models and models from other plugins that {ref}`implement this feature <advanced-model-plugins-usage>`. [#610](https://github.com/simonw/llm/issues/610)
- `llm prompt` now takes a `-u/--usage` flag to display token usage at the end of the response.
- `llm logs -u/--usage` shows token usage information for logged responses.
- `llm prompt ... --async` responses are now logged to the database. [#641](https://github.com/simonw/llm/issues/641)

(v0_18)=
## 0.18 (2024-11-17)

- Initial support for async models. Plugins can now provide an `AsyncModel` subclass that can be accessed in the Python API using the new `llm.get_async_model(model_id)` method. See {ref}`async models in the Python API docs<python-api-async>` and {ref}`implementing async models in plugins <advanced-model-plugins-async>`. [#507](https://github.com/simonw/llm/issues/507)
- OpenAI models all now include async models, so function calls such as `llm.get_async_model("gpt-4o-mini")` will return an async model.
- `gpt-4o-audio-preview` model can be used to send audio attachments to the GPT-4o audio model. [#608](https://github.com/simonw/llm/issues/608)
- Attachments can now be sent without requiring a prompt. [#611](https://github.com/simonw/llm/issues/611)
- `llm models --options` now includes information on whether a model supports attachments. [#612](https://github.com/simonw/llm/issues/612)
- `llm models --async` shows available async models.
- Custom OpenAI-compatible models can now be marked as `can_stream: false` in the YAML if they do not support streaming. Thanks, [Chris Mungall](https://github.com/cmungall). [#600](https://github.com/simonw/llm/pull/600)
- Fixed bug where OpenAI usage data was incorrectly serialized to JSON. [#614](https://github.com/simonw/llm/issues/614)
- Standardized on `audio/wav` MIME type for audio attachments rather than `audio/wave`. [#603](https://github.com/simonw/llm/issues/603)

(v0_18a1)=
## 0.18a1 (2024-11-14)

- Fixed bug where conversations did not work for async OpenAI models. [#632](https://github.com/simonw/llm/issues/632)
- `__repr__` methods for `Response` and `AsyncResponse`.

(v0_18a0)=
## 0.18a0 (2024-11-13)

Alpha support for **async models**. [#507](https://github.com/simonw/llm/issues/507)

Multiple [smaller changes](https://github.com/simonw/llm/compare/0.17.1...0.18a0).

(v0_17)=
## 0.17 (2024-10-29)

Support for **attachments**, allowing multi-modal models to accept images, audio, video and other formats. [#578](https://github.com/simonw/llm/issues/578)

The default OpenAI `gpt-4o` and `gpt-4o-mini` models can both now be prompted with JPEG, GIF, PNG and WEBP images.

Attachments {ref}`in the CLI <usage-attachments>` can be URLs:

```bash
llm -m gpt-4o "describe this image" \
  -a https://static.simonwillison.net/static/2024/pelicans.jpg
```
Or file paths:
```bash
llm -m gpt-4o-mini "extract text" -a image1.jpg -a image2.jpg
```
Or binary data, which may need to use `--attachment-type` to specify the MIME type:
```bash
cat image | llm -m gpt-4o-mini "extract text" --attachment-type - image/jpeg
```

Attachments are also available {ref}`in the Python API <python-api-attachments>`:

```python
model = llm.get_model("gpt-4o-mini")
response = model.prompt(
    "Describe these images",
    attachments=[
        llm.Attachment(path="pelican.jpg"),
        llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg"),
    ]
)
```
Plugins that provide alternative models can support attachments, see {ref}`advanced-model-plugins-attachments` for details.

The latest **[llm-claude-3](https://github.com/simonw/llm-claude-3)** plugin now supports attachments for Anthropic's Claude 3 and 3.5 models. The **[llm-gemini](https://github.com/simonw/llm-gemini)** plugin supports attachments for Google's Gemini 1.5 models.

Also in this release: OpenAI models now record their `"usage"` data in the database even when the response was streamed. These records can be viewed using `llm logs --json`. [#591](https://github.com/simonw/llm/issues/591)

(v0_17a0)=
## 0.17a0 (2024-10-28)

Alpha support for **attachments**. [#578](https://github.com/simonw/llm/issues/578)

(v0_16)=
## 0.16 (2024-09-12)

- OpenAI models now use the internal `self.get_key()` mechanism, which means they can be used from Python code in a way that will pick up keys that have been configured using `llm keys set` or the `OPENAI_API_KEY` environment variable. [#552](https://github.com/simonw/llm/issues/552). This code now works correctly:
    ```python
    import llm
    print(llm.get_model("gpt-4o-mini").prompt("hi"))
    ```
- New documented API methods: `llm.get_default_model()`, `llm.set_default_model(alias)`, `llm.get_default_embedding_model(alias)`, `llm.set_default_embedding_model()`. [#553](https://github.com/simonw/llm/issues/553)
- Support for OpenAI's new [o1 family](https://openai.com/o1/) of preview models, `llm -m o1-preview "prompt"` and `llm -m o1-mini "prompt"`. These models are currently only available to [tier 5](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-five) OpenAI API users, though this may change in the future. [#570](https://github.com/simonw/llm/issues/570)

(v0_15)=
## 0.15 (2024-07-18)

- Support for OpenAI's [new GPT-4o mini](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) model: `llm -m gpt-4o-mini 'rave about pelicans in French'` [#536](https://github.com/simonw/llm/issues/536)
- `gpt-4o-mini` is now the default model if you do not {ref}`specify your own default <setup-default-model>`, replacing GPT-3.5 Turbo. GPT-4o mini is both cheaper and better than GPT-3.5 Turbo.
- Fixed a bug where `llm logs -q 'flourish' -m haiku` could not combine both the `-q` search query and the `-m` model specifier. [#515](https://github.com/simonw/llm/issues/515)

(v0_14)=
## 0.14 (2024-05-13)

- Support for OpenAI's [new GPT-4o](https://openai.com/index/hello-gpt-4o/) model: `llm -m gpt-4o 'say hi in Spanish'` [#490](https://github.com/simonw/llm/issues/490)
- The `gpt-4-turbo` alias is now a model ID, which indicates the latest version of OpenAI's GPT-4 Turbo text and image model. Your existing `logs.db` database may contain records under the previous model ID of `gpt-4-turbo-preview`. [#493](https://github.com/simonw/llm/issues/493)
- New `llm logs -r/--response` option for outputting just the last captured response, without wrapping it in Markdown and accompanying it with the prompt. [#431](https://github.com/simonw/llm/issues/431)
- Nine new {ref}`plugins <plugin-directory>` since version 0.13:
  - **[llm-claude-3](https://github.com/simonw/llm-claude-3)** supporting Anthropic's [Claude 3 family](https://www.anthropic.com/news/claude-3-family) of models.
  - **[llm-command-r](https://github.com/simonw/llm-command-r)** supporting Cohere's Command R and [Command R Plus](https://txt.cohere.com/command-r-plus-microsoft-azure/) API models.
  - **[llm-reka](https://github.com/simonw/llm-reka)** supports the [Reka](https://www.reka.ai/) family of models via their API.
  - **[llm-perplexity](https://github.com/hex/llm-perplexity)** by Alexandru Geana supporting the [Perplexity Labs](https://docs.perplexity.ai/) API models, including `llama-3-sonar-large-32k-online` which can search for things online and `llama-3-70b-instruct`.
  - **[llm-groq](https://github.com/angerman/llm-groq)** by Moritz Angermann providing access to fast models hosted by [Groq](https://console.groq.com/docs/models).
  - **[llm-fireworks](https://github.com/simonw/llm-fireworks)** supporting models hosted by [Fireworks AI](https://fireworks.ai/).
  - **[llm-together](https://github.com/wearedevx/llm-together)** adds support for the [Together AI](https://www.together.ai/) extensive family of hosted openly licensed models.
  - **[llm-embed-onnx](https://github.com/simonw/llm-embed-onnx)** provides seven embedding models that can be executed using the ONNX model framework.
  - **[llm-cmd](https://github.com/simonw/llm-cmd)** accepts a prompt for a shell command, runs that prompt and populates the result in your shell so you can review it, edit it and then hit `<enter>` to execute or `ctrl+c` to cancel, see [this post for details](https://simonwillison.net/2024/Mar/26/llm-cmd/).

(v0_13_1)=
## 0.13.1 (2024-01-26)

- Fix for `No module named 'readline'` error on Windows. [#407](https://github.com/simonw/llm/issues/407)

(v0_13)=
## 0.13 (2024-01-26)

See also [LLM 0.13: The annotated release notes](https://simonwillison.net/2024/Jan/26/llm/).

- Added support for new OpenAI embedding models: `3-small` and `3-large` and three variants of those with different dimension sizes, 
`3-small-512`, `3-large-256` and `3-large-1024`. See {ref}`OpenAI embedding models <openai-models-embedding>` for details. [#394](https://github.com/simonw/llm/issues/394)
- The default `gpt-4-turbo` model alias now points to `gpt-4-turbo-preview`, which uses the most recent OpenAI GPT-4 turbo model (currently `gpt-4-0125-preview`). [#396](https://github.com/simonw/llm/issues/396)
- New OpenAI model aliases `gpt-4-1106-preview` and `gpt-4-0125-preview`.
- OpenAI models now support a `-o json_object 1` option which will cause their output to be returned as a valid JSON object. [#373](https://github.com/simonw/llm/issues/373)
- New {ref}`plugins <plugin-directory>` since the last release include [llm-mistral](https://github.com/simonw/llm-mistral), [llm-gemini](https://github.com/simonw/llm-gemini), [llm-ollama](https://github.com/taketwo/llm-ollama) and [llm-bedrock-meta](https://github.com/flabat/llm-bedrock-meta).
- The `keys.json` file for storing API keys is now created with `600` file permissions. [#351](https://github.com/simonw/llm/issues/351)
- Documented {ref}`a pattern <homebrew-warning>` for installing plugins that depend on PyTorch using the Homebrew version of LLM, despite Homebrew using Python 3.12 when PyTorch have not yet released a stable package for that Python version. [#397](https://github.com/simonw/llm/issues/397)
- Underlying OpenAI Python library has been upgraded to `>1.0`. It is possible this could cause compatibility issues with LLM plugins that also depend on that library. [#325](https://github.com/simonw/llm/issues/325)
- Arrow keys now work inside the `llm chat` command. [#376](https://github.com/simonw/llm/issues/376)
- `LLM_OPENAI_SHOW_RESPONSES=1` environment variable now outputs much more detailed information about the HTTP request and response made to OpenAI (and OpenAI-compatible) APIs. [#404](https://github.com/simonw/llm/issues/404)
- Dropped support for Python 3.7.

(v0_12)=
## 0.12 (2023-11-06)

- Support for the [new GPT-4 Turbo model](https://openai.com/blog/new-models-and-developer-products-announced-at-devday) from OpenAI. Try it using `llm chat -m gpt-4-turbo` or `llm chat -m 4t`. [#323](https://github.com/simonw/llm/issues/323)
- New `-o seed 1` option for OpenAI models which sets a seed that can attempt to evaluate the prompt deterministically. [#324](https://github.com/simonw/llm/issues/324)

(v0_11_2)=
## 0.11.2 (2023-11-06)

- Pin to version of OpenAI Python library prior to 1.0 to avoid breaking. [#327](https://github.com/simonw/llm/issues/327)

(v0_11_1)=
## 0.11.1 (2023-10-31)

- Fixed a bug where `llm embed -c "text"` did not correctly pick up the configured {ref}`default embedding model <embeddings-cli-embed-models-default>`. [#317](https://github.com/simonw/llm/issues/317)
- New plugins: [llm-python](https://github.com/simonw/llm-python), [llm-bedrock-anthropic](https://github.com/sblakey/llm-bedrock-anthropic) and [llm-embed-jina](https://github.com/simonw/llm-embed-jina) (described in [Execute Jina embeddings with a CLI using llm-embed-jina](https://simonwillison.net/2023/Oct/26/llm-embed-jina/)).
- [llm-gpt4all](https://github.com/simonw/llm-gpt4all) now uses the new GGUF model format. [simonw/llm-gpt4all#16](https://github.com/simonw/llm-gpt4all/issues/16)

(v0_11)=
## 0.11 (2023-09-18)

LLM now supports the new OpenAI `gpt-3.5-turbo-instruct` model, and OpenAI completion (as opposed to chat completion) models in general. [#284](https://github.com/simonw/llm/issues/284)

```bash
llm -m gpt-3.5-turbo-instruct 'Reasons to tame a wild beaver:'
```
OpenAI completion models like this support a `-o logprobs 3` option, which accepts a number between 1 and 5 and will include the log probabilities (for each produced token, what were the top 3 options considered by the model) in the logged response.

```bash
llm -m gpt-3.5-turbo-instruct 'Say hello succinctly' -o logprobs 3
```
You can then view the `logprobs` that were recorded in the SQLite logs database like this:
```bash
sqlite-utils "$(llm logs path)" \
  'select * from responses order by id desc limit 1' | \
  jq '.[0].response_json' -r | jq
```
Truncated output looks like this:
```
  [
    {
      "text": "Hi",
      "top_logprobs": [
        {
          "Hi": -0.13706253,
          "Hello": -2.3714375,
          "Hey": -3.3714373
        }
      ]
    },
    {
      "text": " there",
      "top_logprobs": [
        {
          " there": -0.96057636,
          "!\"": -0.5855763,
          ".\"": -3.2574513
        }
      ]
    }
  ]
```
Also in this release:

- The `llm.user_dir()` function, used by plugins, now ensures the directory exists before returning it. [#275](https://github.com/simonw/llm/issues/275)
- New `LLM_OPENAI_SHOW_RESPONSES=1` environment variable for displaying the full HTTP response returned by OpenAI compatible APIs. [#286](https://github.com/simonw/llm/issues/286)
- The `llm embed-multi` command now has a `--batch-size X` option for setting the batch size to use when processing embeddings - useful if you have limited memory available. [#273](https://github.com/simonw/llm/issues/273)
- The `collection.embed_multi()` method also now accepts an optional `batch_size=int` argument.
- Fixed two bugs with `llm embed-multi --files` relating to handling of directories. Thanks, [ealvar3z](https://github.com/ealvar3z). [#274](https://github.com/simonw/llm/issues/274), [#280](https://github.com/simonw/llm/issues/280)

(v0_10)=
## 0.10 (2023-09-12)

The two major features in this release are the **{ref}`llm chat command <usage-chat>`** and support for **embedding binary data**.

See [Build an image search engine with llm-clip, chat with models with llm chat](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/) for more background on these features.

### llm chat

The new `llm chat` command starts an ongoing chat conversation with a model in your terminal. It works with all models supported by LLM and its {ref}`plugins <plugins>`, including locally installed models such as Llama 2. [#231](https://github.com/simonw/llm/issues/231)

This offers a big performance boost for local models, since they don't need to be freshly loaded into memory for each prompt.

Here's an example chat with Llama 2 13B, provided by the [llm-mlc](https://github.com/simonw/llm-mlc) plugin.

```bash
llm chat -m mlc-chat-Llama-2-13b-chat-hf-q4f16_1
```
This starts a chat session:
```
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
> Who are you?
Hello! I'm just an AI, here to assist you with any questions you may have.
My name is LLaMA, and I'm a large language model trained to provide helpful
and informative responses to a wide range of questions and topics. I'm here
to help you find the information you need, while ensuring a safe and
respectful conversation. Please feel free to ask me anything, and I'll do my
best to provide a helpful and accurate response.
> Tell me a joke about otters
Sure, here's a joke about otters:

Why did the otter go to the party?

Because he heard it was a "whale" of a time!

(Get it? Whale, like a big sea mammal, but also a "wild" or "fun" time.
Otters are known for their playful and social nature, so it's a lighthearted
and silly joke.)

I hope that brought a smile to your face! Do you have any other questions or
topics you'd like to discuss?
> exit
```
Chat sessions are {ref}`logged to SQLite <logging>` - use `llm logs` to view them. They can accept system prompts, templates and model options - consult {ref}`the chat documentation <usage-chat>` for details.

### Binary embedding support

LLM's {ref}`embeddings feature <embeddings>` has been expanded to provide support for embedding binary data, in addition to text. [#254](https://github.com/simonw/llm/pull/254)

This enables models like [CLIP](https://openai.com/research/clip), supported by the new **[llm-clip](https://github.com/simonw/llm-clip)** plugin.

CLIP is a multi-modal embedding model which can embed images and text into the same vector space. This means you can use it to create an embedding index of photos, and then search for the embedding vector for "a happy dog" and get back images that are semantically closest to that string.

To create embeddings for every JPEG in a directory stored in a `photos` collection, run:

```bash
llm install llm-clip
llm embed-multi photos --files photos/ '*.jpg' --binary -m clip
```
Now you can search for photos of raccoons using:
```
llm similar photos -c 'raccoon'
```
This spits out a list of images, ranked by how similar they are to the string "raccoon":
```
{"id": "IMG_4801.jpeg", "score": 0.28125139257127457, "content": null, "metadata": null}
{"id": "IMG_4656.jpeg", "score": 0.26626441704164294, "content": null, "metadata": null}
{"id": "IMG_2944.jpeg", "score": 0.2647445926996852, "content": null, "metadata": null}
...
```

### Also in this release

- The {ref}`LLM_LOAD_PLUGINS environment variable <llm-load-plugins>` can be used to control which plugins are loaded when `llm` starts running. [#256](https://github.com/simonw/llm/issues/256)
- The `llm plugins --all` option includes builtin plugins in the list of plugins. [#259](https://github.com/simonw/llm/issues/259)
- The `llm embed-db` family of commands has been renamed to `llm collections`. [#229](https://github.com/simonw/llm/issues/229)
- `llm embed-multi --files` now has an `--encoding` option and defaults to falling back to `latin-1` if a file cannot be processed as `utf-8`. [#225](https://github.com/simonw/llm/issues/225)

(v0_10_a1)=
## 0.10a1 (2023-09-11)

- Support for embedding binary data. [#254](https://github.com/simonw/llm/pull/254)
- `llm chat` now works for models with API keys. [#247](https://github.com/simonw/llm/issues/247)
- `llm chat -o` for passing options to a model. [#244](https://github.com/simonw/llm/issues/244)
- `llm chat --no-stream` option. [#248](https://github.com/simonw/llm/issues/248)
- `LLM_LOAD_PLUGINS` environment variable. [#256](https://github.com/simonw/llm/issues/256)
- `llm plugins --all` option for including builtin plugins. [#259](https://github.com/simonw/llm/issues/259)
- `llm embed-db` has been renamed to `llm collections`. [#229](https://github.com/simonw/llm/issues/229)
- Fixed bug where `llm embed -c` option was treated as a filepath, not a string. Thanks, [mhalle](https://github.com/mhalle). [#263](https://github.com/simonw/llm/pull/263)

(v0_10_a0)=
## 0.10a0 (2023-09-04)

- New {ref}`llm chat <usage-chat>` command for starting an interactive terminal chat with a model. [#231](https://github.com/simonw/llm/issues/231)
- `llm embed-multi --files` now has an `--encoding` option and defaults to falling back to `latin-1` if a file cannot be processed as `utf-8`. [#225](https://github.com/simonw/llm/issues/225)

(v0_9)=
## 0.9 (2023-09-03)

The big new feature in this release is support for **embeddings**. See [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/) for additional details.

{ref}`Embedding models <embeddings>` take a piece of text - a word, sentence, paragraph or even a whole article, and convert that into an array of floating point numbers. [#185](https://github.com/simonw/llm/issues/185)

This embedding vector can be thought of as representing a position in many-dimensional-space, where the distance between two vectors represents how semantically similar they are to each other within the content of a language model.

Embeddings can be used to find **related documents**, and also to implement **semantic search** - where a user can search for a phrase and get back results that are semantically similar to that phrase even if they do not share any exact keywords.

LLM now provides both CLI and Python APIs for working with embeddings. Embedding models are defined by plugins, so you can install additional models using the {ref}`plugins mechanism <installing-plugins>`.

The first two embedding models supported by LLM are:

- OpenAI's [ada-002](https://platform.openai.com/docs/guides/embeddings) embedding model, available via an inexpensive API if you set an OpenAI key using `llm keys set openai`.
- The [sentence-transformers](https://www.sbert.net/) family of models, available via the new [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) plugin.

See {ref}`embeddings-cli` for detailed instructions on working with embeddings using LLM.

The new commands for working with embeddings are:

- **{ref}`llm embed <embeddings-cli-embed>`** - calculate embeddings for content and return them to the console or store them in a SQLite database.
- **{ref}`llm embed-multi <embeddings-cli-embed-multi>`** - run bulk embeddings for multiple strings, using input from a CSV, TSV or JSON file, data from a SQLite database or data found by scanning the filesystem. [#215](https://github.com/simonw/llm/issues/215)
- **{ref}`llm similar <embeddings-cli-similar>`** - run similarity searches against your stored embeddings - starting with a search phrase or finding content related to a previously stored vector. [#190](https://github.com/simonw/llm/issues/190)
- **{ref}`llm embed-models <embeddings-cli-embed-models>`** - list available embedding models.
- `llm embed-db` - commands for inspecting and working with the default embeddings SQLite database.

There's also a new {ref}`llm.Collection <embeddings-python-collections>` class for creating and searching collections of embedding from Python code, and a {ref}`llm.get_embedding_model() <embeddings-python-api>` interface for embedding strings directly. [#191](https://github.com/simonw/llm/issues/191)

(v0_8_1)=
## 0.8.1 (2023-08-31)

- Fixed bug where first prompt would show an error if the `io.datasette.llm` directory had not yet been created. [#193](https://github.com/simonw/llm/issues/193)
- Updated documentation to recommend a different `llm-gpt4all` model since the one we were using is no longer available. [#195](https://github.com/simonw/llm/issues/195)

(v0_8)=
## 0.8 (2023-08-20)

- The output format for `llm logs` has changed. Previously it was JSON - it's now a much more readable Markdown format suitable for pasting into other documents. [#160](https://github.com/simonw/llm/issues/160)
  - The new `llm logs --json` option can be used to get the old JSON format.
  - Pass `llm logs --conversation ID` or `--cid ID` to see the full logs for a specific conversation.
- You can now combine piped input and a prompt in a single command: `cat script.py | llm 'explain this code'`. This works even for models that do not support {ref}`system prompts <usage-system-prompts>`. [#153](https://github.com/simonw/llm/issues/153)
- Additional {ref}`openai-compatible-models` can now be configured with custom HTTP headers. This enables platforms such as [openrouter.ai](https://openrouter.ai/) to be used with LLM, which can provide Claude access even without an Anthropic API key.
- Keys set in `keys.json` are now used in preference to environment variables. [#158](https://github.com/simonw/llm/issues/158)
- The documentation now includes a {ref}`plugin directory <plugin-directory>` listing all available plugins for LLM. [#173](https://github.com/simonw/llm/issues/173)
- New {ref}`related tools <related-tools>` section in the documentation describing `ttok`, `strip-tags` and `symbex`. [#111](https://github.com/simonw/llm/issues/111)
- The `llm models`, `llm aliases` and `llm templates` commands now default to running the same command as `llm models list` and `llm aliases list` and `llm templates list`. [#167](https://github.com/simonw/llm/issues/167)
- New `llm keys` (aka `llm keys list`) command for listing the names of all configured keys. [#174](https://github.com/simonw/llm/issues/174)
- Two new Python API functions, `llm.set_alias(alias, model_id)` and `llm.remove_alias(alias)` can be used to configure aliases from within Python code. [#154](https://github.com/simonw/llm/pull/154)
- LLM is now compatible with both Pydantic 1 and Pydantic 2. This means you can install `llm` as a Python dependency in a project that depends on Pydantic 1 without running into dependency conflicts. Thanks, [Chris Mungall](https://github.com/cmungall). [#147](https://github.com/simonw/llm/pull/147)
- `llm.get_model(model_id)` is now documented as raising `llm.UnknownModelError` if the requested model does not exist. [#155](https://github.com/simonw/llm/issues/155)

(v0_7_1)=
## 0.7.1 (2023-08-19)

- Fixed a bug where some users would see an `AlterError: No such column: log.id` error when attempting to use this tool, after upgrading to the latest [sqlite-utils 3.35 release](https://sqlite-utils.datasette.io/en/stable/changelog.html#v3-35). [#162](https://github.com/simonw/llm/issues/162)

(v0_7)=
## 0.7 (2023-08-12)

The new {ref}`aliases` commands can be used to configure additional aliases for models, for example:

```bash
llm aliases set turbo gpt-3.5-turbo-16k
```
Now you can run the 16,000 token `gpt-3.5-turbo-16k` model like this:

```bash
llm -m turbo 'An epic Greek-style saga about a cheesecake that builds a SQL database from scratch'
```
Use `llm aliases list` to see a list of aliases and `llm aliases remove turbo` to remove one again. [#151](https://github.com/simonw/llm/issues/151)

### Notable new plugins

- **[llm-mlc](https://github.com/simonw/llm-mlc)** can run local models released by the [MLC project](https://mlc.ai/mlc-llm/), including models that can take advantage of the GPU on Apple Silicon M1/M2 devices.
- **[llm-llama-cpp](https://github.com/simonw/llm-llama-cpp)** uses [llama.cpp](https://github.com/ggerganov/llama.cpp) to run models published in the GGML format. See [Run Llama 2 on your own Mac using LLM and Homebrew](https://simonwillison.net/2023/Aug/1/llama-2-mac/) for more details.

### Also in this release

- OpenAI models now have min and max validation on their floating point options. Thanks, Pavel Král. [#115](https://github.com/simonw/llm/issues/115)
- Fix for bug where `llm templates list` raised an error if a template had an empty prompt. Thanks, Sherwin Daganato. [#132](https://github.com/simonw/llm/pull/132)
- Fixed bug in `llm install --editable` option which prevented installation of `.[test]`. [#136](https://github.com/simonw/llm/issues/136)
- `llm install --no-cache-dir` and `--force-reinstall` options. [#146](https://github.com/simonw/llm/issues/146)

(v0_6_1)=
## 0.6.1 (2023-07-24)

- LLM can now be installed directly from Homebrew core: `brew install llm`. [#124](https://github.com/simonw/llm/issues/124)
- Python API documentation now covers {ref}`python-api-system-prompts`.
- Fixed incorrect example in the {ref}`prompt-templates` documentation. Thanks, Jorge Cabello. [#125](https://github.com/simonw/llm/pull/125)

(v0_6)=
## 0.6 (2023-07-18)

- Models hosted on [Replicate](https://replicate.com/) can now be accessed using the [llm-replicate](https://github.com/simonw/llm-replicate) plugin, including the new Llama 2 model from Meta AI. More details here: [Accessing Llama 2 from the command-line with the llm-replicate plugin](https://simonwillison.net/2023/Jul/18/accessing-llama-2/).
- Model providers that expose an API that is compatible with the OpenAPI API format, including self-hosted model servers such as [LocalAI](https://github.com/go-skynet/LocalAI), can now be accessed using {ref}`additional configuration <openai-compatible-models>` for the default OpenAI plugin. [#106](https://github.com/simonw/llm/issues/106)
- OpenAI models that are not yet supported by LLM can also {ref}`be configured <openai-extra-models>` using the new `extra-openai-models.yaml` configuration file. [#107](https://github.com/simonw/llm/issues/107)
- The {ref}`llm logs command <logging-view>` now accepts a `-m model_id` option to filter logs to a specific model. Aliases can be used here in addition to model IDs. [#108](https://github.com/simonw/llm/issues/108)
- Logs now have a SQLite full-text search index against their prompts and responses, and the `llm logs -q SEARCH` option can be used to return logs that match a search term. [#109](https://github.com/simonw/llm/issues/109)

(v0_5)=
## 0.5 (2023-07-12)

LLM now supports **additional language models**, thanks to a new {ref}`plugins mechanism <installing-plugins>` for installing additional models.

Plugins are available for 19 models in addition to the default OpenAI ones:

- [llm-gpt4all](https://github.com/simonw/llm-gpt4all) adds support for 17 models that can download and run on your own device, including Vicuna, Falcon and wizardLM.
- [llm-mpt30b](https://github.com/simonw/llm-mpt30b) adds support for the MPT-30B model, a 19GB download.
- [llm-palm](https://github.com/simonw/llm-palm) adds support for Google's PaLM 2 via the Google API.

A comprehensive tutorial, {ref}`writing a plugin to support a new model <tutorial-model-plugin>` describes how to add new models by building plugins in detail.

### New features

- {ref}`python-api` documentation for using LLM models, including models from plugins, directly from Python. [#75](https://github.com/simonw/llm/issues/75)
- Messages are now logged to the database by default - no need to run the `llm init-db` command any more, which has been removed. Instead, you can toggle this behavior off using `llm logs off` or turn it on again using `llm logs on`. The `llm logs status` command shows the current status of the log database. If logging is turned off, passing `--log` to the `llm prompt` command will cause that prompt to be logged anyway. [#98](https://github.com/simonw/llm/issues/98)
- New database schema for logged messages, with `conversations` and `responses` tables. If you have previously used the old `logs` table it will continue to exist but will no longer be written to. [#91](https://github.com/simonw/llm/issues/91)
- New `-o/--option name value` syntax for setting options for models, such as temperature. Available options differ for different models. [#63](https://github.com/simonw/llm/issues/63)
- `llm models list --options` command for viewing all available model options. [#82](https://github.com/simonw/llm/issues/82)
- `llm "prompt" --save template` option for saving a prompt directly to a template. [#55](https://github.com/simonw/llm/issues/55)
- Prompt templates can now specify {ref}`default values <prompt-default-parameters>` for parameters. Thanks,  Chris Mungall. [#57](https://github.com/simonw/llm/pull/57)
- `llm openai models` command to list all available OpenAI models from their API. [#70](https://github.com/simonw/llm/issues/70)
- `llm models default MODEL_ID` to set a different model as the default to be used when `llm` is run without the `-m/--model` option. [#31](https://github.com/simonw/llm/issues/31)

### Smaller improvements

- `llm -s` is now a shortcut for `llm --system`. [#69](https://github.com/simonw/llm/issues/69)
- `llm -m 4-32k` alias for `gpt-4-32k`.
- `llm install -e directory` command for installing a plugin from a local directory.
- The `LLM_USER_PATH` environment variable now controls the location of the directory in which LLM stores its data. This replaces the old `LLM_KEYS_PATH` and `LLM_LOG_PATH` and `LLM_TEMPLATES_PATH` variables. [#76](https://github.com/simonw/llm/issues/76)
- Documentation covering {ref}`plugin-utilities`.
- Documentation site now uses Plausible for analytics. [#79](https://github.com/simonw/llm/issues/79)

(v0_4_1)=
## 0.4.1 (2023-06-17)

- LLM can now be installed using Homebrew: `brew install simonw/llm/llm`. [#50](https://github.com/simonw/llm/issues/50)
- `llm` is now styled LLM in the documentation. [#45](https://github.com/simonw/llm/issues/45)
- Examples in documentation now include a copy button. [#43](https://github.com/simonw/llm/issues/43)
- `llm templates` command no longer has its display disrupted by newlines. [#42](https://github.com/simonw/llm/issues/42)
- `llm templates` command now includes system prompt, if set. [#44](https://github.com/simonw/llm/issues/44)

(v0_4)=
## 0.4 (2023-06-17)

This release includes some backwards-incompatible changes:

- The `-4` option for GPT-4 is now `-m 4`.
- The `--code` option has been removed.
- The `-s` option has been removed as streaming is now the default. Use `--no-stream` to opt out of streaming.

### Prompt templates

{ref}`prompt-templates` is a new feature that allows prompts to be saved as templates and re-used with different variables.

Templates can be created using the `llm templates edit` command:

```bash
llm templates edit summarize
```
Templates are YAML - the following template defines summarization using a system prompt:

```yaml
system: Summarize this text
```
The template can then be executed like this:
```bash
cat myfile.txt | llm -t summarize
```
Templates can include both system prompts, regular prompts and indicate the model they should use. They can reference variables such as `$input` for content piped to the tool, or other variables that are passed using the new `-p/--param` option.

This example adds a `voice` parameter:

```yaml
system: Summarize this text in the voice of $voice
```
Then to run it (via [strip-tags](https://github.com/simonw/strip-tags) to remove HTML tags from the input):
```bash
curl -s 'https://til.simonwillison.net/macos/imovie-slides-and-audio' | \
  strip-tags -m | llm -t summarize -p voice GlaDOS
```
Example output:

> My previous test subject seemed to have learned something new about iMovie. They exported keynote slides as individual images [...] Quite impressive for a human.

The {ref}`prompt-templates` documentation provides more detailed examples.

### Continue previous chat

You can now use `llm` to continue a previous conversation with the OpenAI chat models (`gpt-3.5-turbo` and `gpt-4`). This will include your previous prompts and responses in the prompt sent to the API, allowing the model to continue within the same context.

Use the new `-c/--continue` option to continue from the previous message thread:

```bash
llm "Pretend to be a witty gerbil, say hi briefly"
```
> Greetings, dear human! I am a clever gerbil, ready to entertain you with my quick wit and endless energy.
```bash
llm "What do you think of snacks?" -c
```
> Oh, how I adore snacks, dear human! Crunchy carrot sticks, sweet apple slices, and chewy yogurt drops are some of my favorite treats. I could nibble on them all day long!

The `-c` option will continue from the most recent logged message.

To continue a different chat, pass an integer ID to the `--chat` option. This should be the ID of a previously logged message. You can find these IDs using the `llm logs` command.

Thanks [Amjith Ramanujam](https://github.com/amjith) for contributing to this feature. [#6](https://github.com/simonw/llm/issues/6)

### New mechanism for storing API keys

API keys for language models such as those by OpenAI can now be saved using the new `llm keys` family of commands.

To set the default key to be used for the OpenAI APIs, run this:

```bash
llm keys set openai
```
Then paste in your API key.

Keys can also be passed using the new `--key` command line option - this can be a full key or the alias of a key that has been previously stored.

See {ref}`api-keys` for more. [#13](https://github.com/simonw/llm/issues/13)

### New location for the logs.db database

The `logs.db` database that stores a history of executed prompts no longer lives at `~/.llm/log.db` - it can now be found in a location that better fits the host operating system, which can be seen using:

```bash
llm logs path
```
On macOS this is `~/Library/Application Support/io.datasette.llm/logs.db`.

To open that database using Datasette, run this:

```bash
datasette "$(llm logs path)"
```
You can upgrade your existing installation by copying your database to the new location like this:
```bash
cp ~/.llm/log.db "$(llm logs path)"
rm -rf ~/.llm # To tidy up the now obsolete directory
```
The database schema has changed, and will be updated automatically the first time you run the command.

That schema is [included in the documentation](https://llm.datasette.io/en/stable/logging.html#sql-schema). [#35](https://github.com/simonw/llm/issues/35)

### Other changes

- New `llm logs --truncate` option (shortcut `-t`) which truncates the displayed prompts to make the log output easier to read. [#16](https://github.com/simonw/llm/issues/16)
- Documentation now spans multiple pages and lives at <https://llm.datasette.io/> [#21](https://github.com/simonw/llm/issues/21)
- Default `llm chatgpt` command has been renamed to `llm prompt`. [#17](https://github.com/simonw/llm/issues/17)
- Removed `--code` option in favour of new prompt templates mechanism. [#24](https://github.com/simonw/llm/issues/24)
- Responses are now streamed by default, if the model supports streaming. The `-s/--stream` option has been removed. A new `--no-stream` option can be used to opt-out of streaming.  [#25](https://github.com/simonw/llm/issues/25)
- The `-4/--gpt4` option has been removed in favour of `-m 4` or `-m gpt4`, using a new mechanism that allows models to have additional short names.
- The new `gpt-3.5-turbo-16k` model with a 16,000 token context length can now also be accessed using `-m chatgpt-16k` or `-m 3.5-16k`. Thanks, Benjamin Kirkbride. [#37](https://github.com/simonw/llm/issues/37)
- Improved display of error messages from OpenAI. [#15](https://github.com/simonw/llm/issues/15)

(v0_3)=
## 0.3 (2023-05-17)

- `llm logs` command for browsing logs of previously executed completions. [#3](https://github.com/simonw/llm/issues/3)
- `llm "Python code to output factorial 10" --code` option which sets a system prompt designed to encourage code to be output without any additional explanatory text. [#5](https://github.com/simonw/llm/issues/5)
- Tool can now accept a prompt piped directly to standard input. [#11](https://github.com/simonw/llm/issues/11)

(v0_2)=
## 0.2 (2023-04-01)

- If a SQLite database exists in `~/.llm/log.db` all prompts and responses are logged to that file. The `llm init-db` command can be used to create this file. [#2](https://github.com/simonw/llm/issues/2)

(v0_1)=
## 0.1 (2023-04-01)

- Initial prototype release. [#1](https://github.com/simonw/llm/issues/1)


================================================
FILE: docs/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from subprocess import PIPE, Popen

# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))


# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "myst_parser",
    "sphinx_copybutton",
    "sphinx_markdown_builder",
    "sphinx.ext.autodoc",
]
myst_enable_extensions = ["colon_fence"]

markdown_http_base = "https://llm.datasette.io/en/stable"
markdown_uri_doc_suffix = ".html"

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"

# The master toctree document.
master_doc = "index"

# General information about the project.
project = "LLM"
copyright = "2025, Simon Willison"
author = "Simon Willison"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
pipe = Popen("git describe --tags --always", stdout=PIPE, shell=True)
git_version = pipe.stdout.read().decode("utf8")

if git_version:
    version = git_version.rsplit("-", 1)[0]
    release = git_version
else:
    version = ""
    release = ""

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = "furo"

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.

html_theme_options = {}
html_title = "LLM"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []


# -- Options for HTMLHelp output ------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = "llm-doc"


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (
        master_doc,
        "llm.tex",
        "LLM documentation",
        "Simon Willison",
        "manual",
    )
]


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    (
        master_doc,
        "llm",
        "LLM documentation",
        [author],
        1,
    )
]


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        master_doc,
        "llm",
        "LLM documentation",
        author,
        "llm",
        " Access large language models from the command-line ",
        "Miscellaneous",
    )
]


================================================
FILE: docs/contributing.md
================================================
(contributing)=
# Contributing

To contribute to this tool, first checkout the code. Then run the tests with `uv run`:
```bash
cd llm
uv run pytest
```
You can run your development copy of `llm` using `uv run` as well:
```bash
uv run llm --help
```

## Updating recorded HTTP API interactions and associated snapshots

This project uses [pytest-recording](https://github.com/kiwicom/pytest-recording) to record OpenAI API responses for some of the tests, and [syrupy](https://github.com/syrupy-project/syrupy) to capture snapshots of their results.

If you add a new test that calls the API you can capture the API response and snapshot like this:
```bash
PYTEST_OPENAI_API_KEY="$(llm keys get openai)" uv run pytest --record-mode once --snapshot-update
```
Then review the new snapshots in `tests/__snapshots__/` to make sure they look correct.

## Debugging tricks

The default OpenAI plugin has a debugging mechanism for showing the exact requests and responses that were sent to the OpenAI API.

Set the `LLM_OPENAI_SHOW_RESPONSES` environment variable like this:
```bash
LLM_OPENAI_SHOW_RESPONSES=1 uv run llm -m chatgpt 'three word slogan for an an otter-run bakery'
```
This will output details of the API requests and responses to the console.

Use `--no-stream` to see a more readable version of the body that avoids streaming the response:

```bash
LLM_OPENAI_SHOW_RESPONSES=1 uv run llm -m chatgpt --no-stream \
  'three word slogan for an an otter-run bakery'
```

## Documentation

Documentation for this project uses [MyST](https://myst-parser.readthedocs.io/) - it is written in Markdown and rendered using Sphinx.

To build the documentation locally, run the following:
```bash
just docs
```
This will start a live preview server, using [sphinx-autobuild](https://pypi.org/project/sphinx-autobuild/).

The CLI `--help` examples in the documentation are managed using [Cog](https://github.com/nedbat/cog). Update those files like this:
```bash
just cog
```
You'll need [Just](https://github.com/casey/just) installed to run these commands.

## Release process

To release a new version:

1. Update `docs/changelog.md` with the new changes.
2. Update the version number in `pyproject.toml`
3. Run `just cog` to update `docs/fragments.md` with the new version number.
4. [Create a GitHub release](https://github.com/simonw/llm/releases/new) for the new version.
5. Wait for the package to push to PyPI and then...
6. Run the [regenerate.yaml](https://github.com/simonw/homebrew-llm/actions/workflows/regenerate.yaml) workflow to update the Homebrew tap to the latest version.


================================================
FILE: docs/embeddings/cli.md
================================================
(embeddings-cli)=
# Embedding with the CLI

LLM provides command-line utilities for calculating and storing embeddings for pieces of content.

(embeddings-cli-embed)=
## llm embed

The `llm embed` command can be used to calculate embedding vectors for a string of content. These can be returned directly to the terminal, stored in a SQLite database, or both.

### Returning embeddings to the terminal

The simplest way to use this command is to pass content to it using the `-c/--content` option, like this:

```bash
llm embed -c 'This is some content' -m 3-small
```
`-m 3-small` specifies the OpenAI `text-embedding-3-small` model. You will need to have set an OpenAI API key using `llm keys set openai` for this to work.

You can install plugins to access other models. The [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) plugin can be used to run models on your own laptop, such as the [MiniLM-L6](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) model:

```bash
llm install llm-sentence-transformers
llm embed -c 'This is some content' -m sentence-transformers/all-MiniLM-L6-v2
```

The `llm embed` command returns a JSON array of floating point numbers directly to the terminal:

```json
[0.123, 0.456, 0.789...]
```
You can omit the `-m/--model` option if you set a {ref}`default embedding model <embeddings-cli-embed-models-default>`.

You can also set the `LLM_EMBEDDING_MODEL` environment variable to set a default model for all `llm embed` commands in the current shell session:

```bash
export LLM_EMBEDDING_MODEL=3-small
llm embed -c 'This is some content'
```

LLM also offers a binary storage format for embeddings, described in {ref}`embeddings storage format <embeddings-storage>`.

You can output embeddings using that format as raw bytes using `--format blob`, or in hexadecimal using `--format hex`, or in Base64 using `--format base64`:

```bash
llm embed -c 'This is some content' -m 3-small --format base64
```
This outputs:
```
8NGzPFtdgTqHcZw7aUT6u+++WrwwpZo8XbSxv...
```
Some models such as [llm-clip](https://github.com/simonw/llm-clip) can run against binary data. You can pass in binary data using the `-i` and `--binary` options:

```bash
llm embed --binary -m clip -i image.jpg
```
Or from standard input like this:
```bash
cat image.jpg | llm embed --binary -m clip -i -
```

(embeddings-collections)=
### Storing embeddings in SQLite

Embeddings are much more useful if you store them somewhere, so you can calculate similarity scores between different embeddings later on.

LLM includes the concept of a **collection** of embeddings. A collection groups together a set of stored embeddings created using the same model, each with a unique ID within that collection.

Embeddings also store a hash of the content that was embedded. This hash is later used to avoid calculating duplicate embeddings for the same content.

First, we'll set a default model so we don't have to keep repeating it:
```bash
llm embed-models default 3-small
```

The `llm embed` command can store results directly in a named collection like this:

```bash
llm embed quotations philkarlton-1 -c \
  'There are only two hard things in Computer Science: cache invalidation and naming things'
```
This stores the given text in the `quotations` collection under the key `philkarlton-1`.

You can also pipe content to standard input, like this:
```bash
cat one.txt | llm embed files one
```
This will store the embedding for the contents of `one.txt` in the `files` collection under the key `one`.

A collection will be created the first time you mention it.

Collections have a fixed embedding model, which is the model that was used for the first embedding stored in that collection.

In the above example this would have been the default embedding model at the time that the command was run.

The following example stores the embedding for the string "my happy hound" in a collection called `phrases` under the key `hound` and using the model `3-small`:

```bash
llm embed phrases hound -m 3-small -c 'my happy hound'
```
By default, the SQLite database used to store embeddings is the `embeddings.db` in the user content directory managed by LLM.

You can see the path to this directory by running `llm collections path`.

You can store embeddings in a different SQLite database by passing a path to it using the `-d/--database` option to `llm embed`. If this file does not exist yet the command will create it:

```bash
llm embed phrases hound -d my-embeddings.db -c 'my happy hound'
```
This creates a database file called `my-embeddings.db` in the current directory.

(embeddings-collections-content-metadata)=
#### Storing content and metadata

By default, only the entry ID and the embedding vector are stored in the database table.

You can store a copy of the original text in the `content` column by passing the `--store` option:

```bash
llm embed phrases hound -c 'my happy hound' --store
```
You can also store a JSON object containing arbitrary metadata in the `metadata` column by passing the `--metadata` option. This example uses both `--store` and `--metadata` options:

```bash
llm embed phrases hound \
  -m 3-small \
  -c 'my happy hound' \
  --metadata '{"name": "Hound"}' \
  --store
```
Data stored in this way will be returned by calls to `llm similar`, for example:
```bash
llm similar phrases -c 'hound'
```
```
{"id": "hound", "score": 0.8484683588631485, "content": "my happy hound", "metadata": {"name": "Hound"}}
```

(embeddings-cli-embed-multi)=
## llm embed-multi

The `llm embed` command embeds a single string at a time.

`llm embed-multi` can be used to embed multiple strings at once, taking advantage of any efficiencies that the embedding model may provide when processing multiple strings.

This command can be called in one of three ways:

1. With a CSV, TSV, JSON or newline-delimited JSON file
2. With a SQLite database and a SQL query
3. With one or more paths to directories, each accompanied by a glob pattern

All three mechanisms support these options:

- `-m model_id` to specify the embedding model to use
- `-d database.db` to specify a different database file to store the embeddings in
- `--store` to store the original content in the embeddings table in addition to the embedding vector
- `--prefix` to prepend a prefix to the stored ID of each item
- `--prepend` to prepend a string to the content before embedding 
- `--batch-size SIZE` to process embeddings in batches of the specified size

The `--prepend` option is useful for embedding models that require you to prepend a special token to the content before embedding it. [nomic-embed-text-v2-moe](https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe) for example requires documents to be prepended `'search_document: '` and search queries to be prepended `'search_query: '`.

(embeddings-cli-embed-multi-csv-etc)=
### Embedding data from a CSV, TSV or JSON file

You can embed data from a CSV, TSV or JSON file by passing that file to the command as the second option, after the collection name.

Your file must contain at least two columns. The first one is expected to contain the ID of the item, and any subsequent columns will be treated as containing content to be embedded.

An example CSV file might look like this:

```
id,content
one,This is the first item
two,This is the second item
```
TSV would use tabs instead of commas.

JSON files can be structured like this:

```json
[
  {"id": "one", "content": "This is the first item"},
  {"id": "two", "content": "This is the second item"}
]
```
Or as newline-delimited JSON like this:
```json
{"id": "one", "content": "This is the first item"}
{"id": "two", "content": "This is the second item"}
```
In each of these cases the file can be passed to `llm embed-multi` like this:
```bash
llm embed-multi items mydata.csv
```
The first argument is the name of the collection, the second is the filename.

You can also pipe content to standard input of the tool using `-`:

```bash
cat mydata.json | llm embed-multi items -
```
LLM will attempt to detect the format of your data automatically. If this doesn't work you can specify the format using the `--format` option. This is required if you are piping newline-delimited JSON to standard input.

```bash
cat mydata.json | llm embed-multi items - --format nl
```
Other supported `--format` options are `csv`, `tsv` and `json`.

This example embeds the data from a JSON file in a collection called `items` in database called `docs.db` using the `3-small` model and stores the original content in the `embeddings` table as well, adding a prefix of `my-items/` to each ID:

```bash
llm embed-multi items mydata.json \
  -d docs.db \
  -m 3-small \
  --prefix my-items/ \
  --store
```

(embeddings-cli-embed-multi-sqlite)=
### Embedding data from a SQLite database

You can embed data from a SQLite database using `--sql`, optionally combined with `--attach` to attach an additional database.

If you are storing embeddings in the same database as the source data, you can do this:

```bash
llm embed-multi docs \
  -d docs.db \
  --sql 'select id, title, content from documents' \
  -m 3-small
```
The `docs.db` database here contains a `documents` table, and we want to embed the `title` and `content` columns from that table and store the results back in the same database.

To load content from a database other than the one you are using to store embeddings, attach it with the `--attach` option and use `alias.table` in your SQLite query:

```bash
llm embed-multi docs \
  -d embeddings.db \
  --attach other other.db \
  --sql 'select id, title, content from other.documents' \
  -m 3-small
```

(embeddings-cli-embed-multi-directories)=
### Embedding data from files in directories

LLM can embed the content of every text file in a specified directory, using the file's path and name as the ID.

Consider a directory structure like this:
```
docs/aliases.md
docs/contributing.md
docs/embeddings/binary.md
docs/embeddings/cli.md
docs/embeddings/index.md
docs/index.md
docs/logging.md
docs/plugins/directory.md
docs/plugins/index.md
```
To embed all of those documents, you can run the following:

```bash
llm embed-multi documentation \
  -m 3-small \
  --files docs '**/*.md' \
  -d documentation.db \
  --store
```
Here `--files docs '**/*.md'` specifies that the `docs` directory should be scanned for files matching the `**/*.md` glob pattern - which will match Markdown files in any nested directory.

The result of the above command is a `embeddings` table with the following IDs:

```
aliases.md
contributing.md
embeddings/binary.md
embeddings/cli.md
embeddings/index.md
index.md
logging.md
plugins/directory.md
plugins/index.md
```
Each corresponding to embedded content for the file in question.

The `--prefix` option can be used to add a prefix to each ID:

```bash
llm embed-multi documentation \
  -m 3-small \
  --files docs '**/*.md' \
  -d documentation.db \
  --store \
  --prefix llm-docs/
```
This will result in the following IDs instead:

```
llm-docs/aliases.md
llm-docs/contributing.md
llm-docs/embeddings/binary.md
llm-docs/embeddings/cli.md
llm-docs/embeddings/index.md
llm-docs/index.md
llm-docs/logging.md
llm-docs/plugins/directory.md
llm-docs/plugins/index.md
```
Files are assumed to be `utf-8`, but LLM will fall back to `latin-1` if it encounters an encoding error. You can specify a different set of encodings using the `--encoding` option.

This example will try `utf-16` first and then `mac_roman` before falling back to `latin-1`:
```
llm embed-multi documentation \
  -m 3-small \
  --files docs '**/*.md' \
  -d documentation.db \
  --encoding utf-16 \
  --encoding mac_roman \
  --encoding latin-1
```
If a file cannot be read it will be logged to standard error but the script will keep on running.

If you are embedding binary content such as images for use with CLIP, add the `--binary` option:
```
llm embed-multi photos \
  -m clip \
  --files photos/ '*.jpeg' --binary
```

(embeddings-cli-similar)=
## llm similar

The `llm similar` command searches a collection of embeddings for the items that are most similar to a given or item ID, based on [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity).

This currently uses a slow brute-force approach which does not scale well to large collections. See [issue 216](https://github.com/simonw/llm/issues/216) for plans to add a more scalable approach via vector indexes provided by plugins.

To search the `quotations` collection for items that are semantically similar to `'computer science'`:

```bash
llm similar quotations -c 'computer science'
```
This embeds the provided string and returns a newline-delimited list of JSON objects like this:
```json
{"id": "philkarlton-1", "score": 0.8323904531677017, "content": null, "metadata": null}
```
Use `-p/--plain` to get back results in plain text instead of JSON:
```bash
llm similar quotations -c 'computer science' -p
```
Example output:
```
philkarlton-1 (0.8323904531677017)
```
You can compare against text stored in a file using `-i filename`:
```bash
llm similar quotations -i one.txt
```
Or feed text to standard input using `-i -`:
```bash
echo 'computer science' | llm similar quotations -i -
```
When using a model like CLIP, you can find images similar to an input image using `-i filename` with `--binary`:
```bash
llm similar photos -i image.jpg --binary
```

You can filter results to only show IDs that begin with a specific prefix using --prefix:

```bash
llm similar quotations --prefix 'movies/' -c 'star wars'
```

(embeddings-cli-embed-models)=
## llm embed-models

To list all available embedding models, including those provided by plugins, run this command:

```bash
llm embed-models
```
The output should look something like this:
```
OpenAIEmbeddingModel: text-embedding-ada-002 (aliases: ada, ada-002)
OpenAIEmbeddingModel: text-embedding-3-small (aliases: 3-small)
OpenAIEmbeddingModel: text-embedding-3-large (aliases: 3-large)
...
```
Add `-q` one or more times to search for models matching those terms:
```bash
llm embed-models -q 3-small
```

(embeddings-cli-embed-models-default)=
### llm embed-models default

This command can be used to get and set the default embedding model.

This will return the name of the current default model:
```bash
llm embed-models default
```
You can set a different default like this:
```bash
llm embed-models default 3-small
```
This will set the default model to OpenAI's `3-small` model.

Any of the supported aliases for a model can be passed to this command.

You can unset the default model using `--remove-default`:

```bash
llm embed-models default --remove-default
```
When no default model is set, the `llm embed` and `llm embed-multi` commands will require that a model is specified using `-m/--model`.

## llm collections list

To list all of the collections in the embeddings database, run this command:

```bash
llm collections list
```
Add `--json` for JSON output:
```bash
llm collections list --json
```
Add `-d/--database` to specify a different database file:
```bash
llm collections list -d my-embeddings.db
```
## llm collections delete

To delete a collection from the database, run this:
```bash
llm collections delete collection-name
```
Pass `-d` to specify a different database file:
```bash
llm collections delete collection-name -d my-embeddings.db
```


================================================
FILE: docs/embeddings/index.md
================================================
(embeddings)=
# Embeddings

Embedding models allow you to take a piece of text - a word, sentence, paragraph or even a whole article, and convert that into an array of floating point numbers.

This floating point array is called an "embedding vector", and works as a numerical representation of the semantic meaning of the content in a many-multi-dimensional space.

By calculating the distance between embedding vectors, we can identify which content is semantically "nearest" to other content.

This can be used to build features like related article lookups. It can also be used to build semantic search, where a user can search for a phrase and get back results that are semantically similar to that phrase even if they do not share any exact keywords.

Some embedding models like [CLIP](https://github.com/simonw/llm-clip) can even work against binary files such as images. These can be used to search for images that are similar to other images, or to search for images that are semantically similar to a piece of text.

LLM supports multiple embedding models through {ref}`plugins <plugins>`. Once installed, an embedding model can be used on the command-line or via the Python API to calculate and store embeddings for content, and then to perform similarity searches against those embeddings.

See [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/) for an extended explanation of embeddings, why they are useful and what you can do with them.

```{toctree}
---
maxdepth: 3
---
cli
python-api
writing-plugins
storage
```


================================================
FILE: docs/embeddings/python-api.md
================================================
(embeddings-python-api)=
# Using embeddings from Python

You can load an embedding model using its model ID or alias like this:
```python
import llm

embedding_model = llm.get_embedding_model("3-small")
```
To embed a string, returning a Python list of floating point numbers, use the `.embed()` method:
```python
vector = embedding_model.embed("my happy hound")
```
If the embedding model can handle binary input, you can call `.embed()` with a byte string instead. You can check the `supports_binary` property to see if this is supported:
```python
if embedding_model.supports_binary:
    vector = embedding_model.embed(open("my-image.jpg", "rb").read())
```
The `embedding_model.supports_text` property indicates if the model supports text input.

Many embeddings models are more efficient when you embed multiple strings or binary strings at once. To embed multiple strings at once, use the `.embed_multi()` method:
```python
vectors = list(embedding_model.embed_multi(["my happy hound", "my dissatisfied cat"]))
```
This returns a generator that yields one embedding vector per string.

Embeddings are calculated in batches. By default all items will be processed in a single batch, unless the underlying embedding model has defined its own preferred batch size. You can pass a custom batch size using `batch_size=N`, for example:

```python
vectors = list(embedding_model.embed_multi(lines_from_file, batch_size=20))
```

(embeddings-python-collections)=
## Working with collections

The `llm.Collection` class can be used to work with **collections** of embeddings from Python code.

A collection is a named group of embedding vectors, each stored along with their IDs in a SQLite database table.

To work with embeddings in this way you will need an instance of a [sqlite-utils Database](https://sqlite-utils.datasette.io/en/stable/python-api.html#connecting-to-or-creating-a-database) object. You can then pass that to the `llm.Collection` constructor along with the unique string name of the collection and the ID of the embedding model you will be using with that collection:

```python
import sqlite_utils
import llm

# This collection will use an in-memory database that will be
# discarded when the Python process exits
collection = llm.Collection("entries", model_id="3-small")

# Or you can persist the database to disk like this:
db = sqlite_utils.Database("my-embeddings.db")
collection = llm.Collection("entries", db, model_id="3-small")

# You can pass a model directly using model= instead of model_id=
embedding_model = llm.get_embedding_model("3-small")
collection = llm.Collection("entries", db, model=embedding_model)
```
If the collection already exists in the database you can omit the `model` or `model_id` argument - the model ID will be read from the `collections` table.

To embed a single string and store it in the collection, use the `embed()` method:

```python
collection.embed("hound", "my happy hound")
```
This stores the embedding for the string "my happy hound" in the `entries` collection under the key `hound`.

Add `store=True` to store the text content itself in the database table along with the embedding vector.

To attach additional metadata to an item, pass a JSON-compatible dictionary as the `metadata=` argument:

```python
collection.embed("hound", "my happy hound", metadata={"name": "Hound"}, store=True)
```
This additional metadata will be stored as JSON in the `metadata` column of the embeddings database table.

(embeddings-python-bulk)=
### Storing embeddings in bulk

The `collection.embed_multi()` method can be used to store embeddings for multiple items at once. This can be more efficient for some embedding models.

```python
collection.embed_multi(
    [
        ("hound", "my happy hound"),
        ("cat", "my dissatisfied cat"),
    ],
    # Add this to store the strings in the content column:
    store=True,
)
```
To include metadata to be stored with each item, call `embed_multi_with_metadata()`:

```python
collection.embed_multi_with_metadata(
    [
        ("hound", "my happy hound", {"name": "Hound"}),
        ("cat", "my dissatisfied cat", {"name": "Cat"}),
    ],
    # This can also take the store=True argument:
    store=True,
)
```
The `batch_size=` argument defaults to 100, and will be used unless the embedding model itself defines a lower batch size. You can adjust this if you are having trouble with memory while embedding large collections:

```python
collection.embed_multi(
    (
        (i, line)
        for i, line in enumerate(lines_in_file)
    ),
    batch_size=10
)
```

(embeddings-python-collection-class)=
### Collection class reference

A collection instance has the following properties and methods:

- `id` - the integer ID of the collection in the database
- `name` - the string name of the collection (unique in the database)
- `model_id` - the string ID of the embedding model used for this collection
- `model()` - returns the `EmbeddingModel` instance, based on that `model_id`
- `count()` - returns the integer number of items in the collection
- `embed(id: str, text: str, metadata: dict=None, store: bool=False)` - embeds the given string and stores it in the collection under the given ID. Can optionally include metadata (stored as JSON) and store the text content itself in the database table.
- `embed_multi(entries: Iterable, store: bool=False, batch_size: int=100)` - see above
- `embed_multi_with_metadata(entries: Iterable, store: bool=False, batch_size: int=100)` - see above
- `similar(query: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the given query string
- `similar_by_id(id: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the item with the given ID
- `similar_by_vector(vector: List[float], number: int=10, skip_id: str=None)` - returns a list of entries that are most similar to the given embedding vector, optionally skipping the entry with the given ID
- `delete()` - deletes the collection and its embeddings from the database

There is also a `Collection.exists(db, name)` class method which returns a boolean value and can be used to determine if a collection exists or not in a database:

```python
if Collection.exists(db, "entries"):
    print("The entries collection exists")
```

(embeddings-python-similar)=
## Retrieving similar items

Once you have populated a collection of embeddings you can retrieve the entries that are most similar to a given string using the `similar()` method.

This method uses a brute force approach, calculating distance scores against every document. This is fine for small collections, but will not scale to large collections. See [issue 216](https://github.com/simonw/llm/issues/216) for plans to add a more scalable approach via vector indexes provided by plugins.

```python
for entry in collection.similar("hound"):
    print(entry.id, entry.score)
```
The string will first by embedded using the model for the collection.

The `entry` object returned is an object with the following properties:

- `id` - the string ID of the item
- `score` - the floating point similarity score between the item and the query string
- `content` - the string text content of the item, if it was stored - or `None`
- `metadata` - the dictionary (from JSON) metadata for the item, if it was stored - or `None`

This defaults to returning the 10 most similar items. You can change this by passing a different `number=` argument:
```python
for entry in collection.similar("hound", number=5):
    print(entry.id, entry.score)
```
The `similar_by_id()` method takes the ID of another item in the collection and returns the most similar items to that one, based on the embedding that has already been stored for it:

```python
for entry in collection.similar_by_id("cat"):
    print(entry.id, entry.score)
```
The item itself is excluded from the results.

(embeddings-sql-schema)=
## SQL schema

Here's the SQL schema used by the embeddings database:

<!-- [[[cog
import cog
from llm.embeddings_migrations import embeddings_migrations
import sqlite_utils
import re
db = sqlite_utils.Database(memory=True)
embeddings_migrations.apply(db)

cog.out("```sql\n")
for table in ("collections", "embeddings"):
    schema = db[table].schema
    cog.out(format(schema))
    cog.out("\n")
cog.out("```\n")
]]] -->
```sql
CREATE TABLE [collections] (
   [id] INTEGER PRIMARY KEY,
   [name] TEXT,
   [model] TEXT
)
CREATE TABLE "embeddings" (
   [collection_id] INTEGER REFERENCES [collections]([id]),
   [id] TEXT,
   [embedding] BLOB,
   [content] TEXT,
   [content_blob] BLOB,
   [content_hash] BLOB,
   [metadata] TEXT,
   [updated] INTEGER,
   PRIMARY KEY ([collection_id], [id])
)
```
<!-- [[[end]]] -->


================================================
FILE: docs/embeddings/storage.md
================================================
(embeddings-storage)=
# Embedding storage format

The default output format of the `llm embed` command is a JSON array of floating point numbers.

LLM stores embeddings in space-efficient format: a little-endian binary sequences of 32-bit floating point numbers, each represented using 4 bytes.

These are stored in a `BLOB` column in a SQLite database.

The following Python functions can be used to convert between this format and an array of floating point numbers:

```python
import struct

def encode(values):
    return struct.pack("<" + "f" * len(values), *values)

def decode(binary):
    return struct.unpack("<" + "f" * (len(binary) // 4), binary)
```

These functions are available as `llm.encode()` and `llm.decode()`.

If you are using [NumPy](https://numpy.org/) you can decode one of these binary values like this:

```python
import numpy as np

numpy_array = np.frombuffer(value, "<f4")
```
The `<f4` format string here ensures NumPy will treat the data as a little-endian sequence of 32-bit floats.

================================================
FILE: docs/embeddings/writing-plugins.md
================================================
(embeddings-writing-plugins)=
# Writing plugins to add new embedding models

Read the {ref}`plugin tutorial <tutorial-model-plugin>` for details on how to develop and package a plugin.

This page shows an example plugin that implements and registers a new embedding model.

There are two components to an embedding model plugin:

1. An implementation of the `register_embedding_models()` hook, which takes a `register` callback function and calls it to register the new model with the LLM plugin system.
2. A class that extends the `llm.EmbeddingModel` abstract base class.

    The only required method on this class is `embed_batch(texts)`, which takes an iterable of strings and returns an iterator over lists of floating point numbers.

The following example uses the [sentence-transformers](https://github.com/UKPLab/sentence-transformers) package to provide access to the [MiniLM-L6](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) embedding model.

```python
import llm
from sentence_transformers import SentenceTransformer


@llm.hookimpl
def register_embedding_models(register):
    model_id = "sentence-transformers/all-MiniLM-L6-v2"
    register(SentenceTransformerModel(model_id, model_id), aliases=("all-MiniLM-L6-v2",))


class SentenceTransformerModel(llm.EmbeddingModel):
    def __init__(self, model_id, model_name):
        self.model_id = model_id
        self.model_name = model_name
        self._model = None

    def embed_batch(self, texts):
        if self._model is None:
            self._model = SentenceTransformer(self.model_name)
        results = self._model.encode(texts)
        return (list(map(float, result)) for result in results)
```
Once installed, the model provided by this plugin can be used with the {ref}`llm embed <embeddings-cli-embed>` command like this:

```bash
cat file.txt | llm embed -m sentence-transformers/all-MiniLM-L6-v2
```
Or via its registered alias like this:
```bash
cat file.txt | llm embed -m all-MiniLM-L6-v2
```
[llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) is a complete example of a plugin that provides an embedding model.

[Execute Jina embeddings with a CLI using llm-embed-jina](https://simonwillison.net/2023/Oct/26/llm-embed-jina/#how-i-built-the-plugin) talks through a similar process to add support for the [Jina embeddings models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/).

## Embedding binary content

If your model can embed binary content, use the `supports_binary` property to indicate that:

```python
class ClipEmbeddingModel(llm.EmbeddingModel):
    model_id = "clip"
    supports_binary = True
    supports_text= True
```

`supports_text` defaults to `True` and so is not necessary here. You can set it to `False` if your model only supports binary data.

If your model accepts binary, your `.embed_batch()` model may be called with a list of Python bytestrings. These may be mixed with regular strings if the model accepts both types of input.

[llm-clip](https://github.com/simonw/llm-clip) is an example of a model that can embed both binary and text content.


================================================
FILE: docs/fragments.md
================================================
(fragments)=
# Fragments

LLM prompts can optionally be composed out of **fragments** - reusable pieces of text that are logged just once to the database and can then be attached to multiple prompts.

These are particularly useful when you are working with long context models, which support feeding large amounts of text in as part of your prompt.

Fragments primarily exist to save space in the database, but may be used to support other features such as vendor prompt caching as well.

Fragments can be specified using several different mechanisms:

- URLs to text files online
- Paths to text files on disk
- Aliases that have been attached to a specific fragment
- Hash IDs of stored fragments, where the ID is the SHA256 hash of the fragment content
- Fragments that are provided by custom plugins - these look like `plugin-name:argument`

(fragments-usage)=
## Using fragments in a prompt

Use the `-f/--fragment` option to specify one or more fragments to be used as part of your prompt:

```bash
llm -f https://llm.datasette.io/robots.txt "Explain this robots.txt file in detail"
```
Here we are specifying a fragment using a URL. The contents of that URL will be included in the prompt that is sent to the model, prepended prior to the prompt text.

<!--[[[cog
from importlib.metadata import version
llm_version = version("llm")
cog.out(f'The URL will be fetched with the user-agent `llm/{llm_version} (https://llm.datasette.io/)`.')
]]]-->
The URL will be fetched with the user-agent `llm/0.28 (https://llm.datasette.io/)`.
<!--[[[end]]]-->

The `-f` option can be used multiple times to combine together multiple fragments.

Fragments can also be files on disk, for example:
```bash
llm -f setup.py 'extract the metadata'
```
Use `-` to specify a fragment that is read from standard input:
```bash
llm -f - 'extract the metadata' < setup.py
```
This will read the contents of `setup.py` from standard input and use it as a fragment.

Fragments can also be used as part of your system prompt. Use `--sf value` or `--system-fragment value` instead of `-f`.

## Using fragments in chat

The `chat` command also supports the `-f` and `--sf` arguments to start a chat with fragments.

```bash
llm chat -f my_doc.txt
Chatting with gpt-4
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> Explain this document to me
```

Fragments can also be added *during* a chat conversation using the `!fragment <my_fragment>` command.

```bash
Chatting with gpt-4
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> !fragment https://llm.datasette.io/en/stable/fragments.html
```

This can be combined with `!multi`:

```bash
> !multi
Explain the difference between fragments and templates to me
!fragment https://llm.datasette.io/en/stable/fragments.html https://llm.datasette.io/en/stable/templates.html
!end
```

Any `!fragment` lines found in a prompt created with `!edit` will not be parsed.

(fragments-browsing)=
## Browsing fragments

You can view a truncated version of the fragments you have previously stored in your database with the `llm fragments` command:

```bash
llm fragments
```
The output from that command looks like this:

```yaml
- hash: 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
  aliases: []
  datetime_utc: '2025-04-06 07:36:53'
  source: https://raw.githubusercontent.com/simonw/llm-docs/refs/heads/main/llm/0.22.txt
  content: |-
    <documents>
    <document index="1">
    <source>docs/aliases.md</source>
    <document_content>
    (aliases)=
    #...
- hash: 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c
  aliases: []
  datetime_utc: '2025-04-06 23:03:47'
  source: simonw/files-to-prompt/pyproject.toml
  content: |-
    [project]
    name = "files-to-prompt"
    version = "0.6"
    description = "Concatenate a directory full of...
```
Those long `hash` values are IDs that can be used to reference a fragment in the future:
```bash
llm -f 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c 'Extract metadata'
```
Use `-q searchterm` one or more times to search for fragments that match a specific set of search terms.

To view the full content of a fragment use `llm fragments show`:
```bash
llm fragments show 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
```

(fragments-aliases)=
## Setting aliases for fragments

You can assign aliases to fragments that you use often using the `llm fragments set` command:
```bash
llm fragments set mydocs ./docs.md
```
To remove an alias, use `llm fragments remove`:
```bash
llm fragments remove mydocs
```
You can then use that alias in place of the fragment hash ID:
```bash
llm -f mydocs 'How do I access metadata?'
```
Use `llm fragments --aliases` to see a full list of fragments that have been assigned aliases:
```bash
llm fragments --aliases
```

(fragments-logs)=
## Viewing fragments in your logs

The `llm logs` command lists the fragments that were used for a prompt. By default these are listed as fragment hash IDs, but you can use the `--expand` option to show the full content of each fragment.

This command will show the expanded fragments for your most recent conversation:

```bash
llm logs -c --expand
```
You can filter for logs that used a specific fragment using the `-f/--fragment` option:
```bash
llm logs -c -f 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
```
This accepts URLs, file paths, aliases, and hash IDs.

Multiple `-f` options will return responses that used **all** of the specified fragments.

Fragments are returned by `llm logs --json` as well. By default these are truncated but you can add the `-e/--expand` option to show the full content of each fragment.

```bash
llm logs -c --json --expand
```

(fragments-plugins)=
## Using fragments from plugins

LLM plugins can provide custom fragment loaders which do useful things.

One example is the [llm-fragments-github plugin](https://github.com/simonw/llm-fragments-github). This can convert the files from a public GitHub repository into a list of fragments, allowing you to ask questions about the full repository.

Here's how to try that out:

```bash
llm install llm-fragments-github
llm -f github:simonw/s3-credentials 'Suggest new features for this tool'
```
This plugin turns a single call to `-f github:simonw/s3-credentials` into multiple fragments, one for every text file in the [simonw/s3-credentials](https://github.com/simonw/s3-credentials) GitHub repository.

Running `llm logs -c` will show that this prompt incorporated 26 fragments, one for each file.

Running `llm logs -c --usage --expand` (shortcut: `llm logs -cue`) includes token usage information and turns each fragment ID into a full copy of that file. [Here's the output of that command](https://gist.github.com/simonw/c9bbbc5f6560b01f4b7882ac0194fb25).

Fragment plugins can return {ref}`attachments <usage-attachments>` (such as images) as well.

See the {ref}`register_fragment_loaders() plugin hook <plugin-hooks-register-fragment-loaders>` documentation for details on writing your own custom fragment plugin.

(fragments-loaders)=
## Listing available fragment prefixes

The `llm fragments loaders` command shows all prefixes that have been installed by plugins, along with their documentation:

```bash
llm install llm-fragments-github
llm fragments loaders
```
Example output:
```
github:
  Load files from a GitHub repository as fragments

  Argument is a GitHub repository URL or username/repository

issue:
  Fetch GitHub issue and comments as Markdown

  Argument is either "owner/repo/NUMBER"
  or "https://github.com/owner/repo/issues/NUMBER"
```


================================================
FILE: docs/help.md
================================================
# CLI reference

This page lists the `--help` output for all of the `llm` commands.

<!-- [[[cog
from click.testing import CliRunner
from llm.cli import cli
def all_help(cli):
    "Return all help for Click command and its subcommands"
    # First find all commands and subcommands
    # List will be [["command"], ["command", "subcommand"], ...]
    commands = []
    def find_commands(command, path=None):
        path = path or []
        commands.append(path + [command.name])
        if hasattr(command, 'commands'):
            for subcommand in command.commands.values():
                find_commands(subcommand, path + [command.name])
    find_commands(cli)
    # Remove first item of each list (it is 'cli')
    commands = [command[1:] for command in commands]
    # Now generate help for each one, with appropriate heading level
    output = []
    for command in commands:
        heading_level = len(command) + 2
        result = CliRunner().invoke(cli, command + ["--help"])
        hyphenated = "-".join(command)
        if hyphenated:
            hyphenated = "-" + hyphenated
        output.append(f"\n(help{hyphenated})=")
        output.append("#" * heading_level + " llm " + " ".join(command) + " --help")
        output.append("```")
        output.append(result.output.replace("Usage: cli", "Usage: llm").strip())
        output.append("```")
    return "\n".join(output)
cog.out(all_help(cli))
]]] -->

(help)=
## llm  --help
```
Usage: llm [OPTIONS] COMMAND [ARGS]...

  Access Large Language Models from the command-line

  Documentation: https://llm.datasette.io/

  LLM can run models from many different providers. Consult the plugin directory
  for a list of available models:

  https://llm.datasette.io/en/stable/plugins/directory.html

  To get started with OpenAI, obtain an API key from them and:

      $ llm keys set openai
      Enter key: ...

  Then execute a prompt like this:

      llm 'Five outrageous names for a pet pelican'

  For a full list of prompting options run:

      llm prompt --help

Options:
  --version   Show the version and exit.
  -h, --help  Show this message and exit.

Commands:
  prompt*       Execute a prompt
  aliases       Manage model aliases
  chat          Hold an ongoing chat with a model.
  collections   View and manage collections of embeddings
  embed         Embed text and store or return the result
  embed-models  Manage available embedding models
  embed-multi   Store embeddings for multiple strings at once in the...
  fragments     Manage fragments that are stored in the database
  install       Install packages from PyPI into the same environment as LLM
  keys          Manage stored API keys for different models
  logs          Tools for exploring logged prompts and responses
  models        Manage available models
  openai        Commands for working directly with the OpenAI API
  plugins       List installed plugins
  schemas       Manage stored schemas
  similar       Return top N similar IDs from a collection using cosine...
  templates     Manage stored prompt templates
  tools         Manage tools that can be made available to LLMs
  uninstall     Uninstall Python packages from the LLM environment
```

(help-prompt)=
### llm prompt --help
```
Usage: llm prompt [OPTIONS] [PROMPT]

  Execute a prompt

  Documentation: https://llm.datasette.io/en/stable/usage.html

  Examples:

      llm 'Capital of France?'
      llm 'Capital of France?' -m gpt-4o
      llm 'Capital of France?' -s 'answer in Spanish'

  Multi-modal models can be called with attachments like this:

      llm 'Extract text from this image' -a image.jpg
      llm 'Describe' -a https://static.simonwillison.net/static/2024/pelicans.jpg
      cat image | llm 'describe image' -a -
      # With an explicit mimetype:
      cat image | llm 'describe image' --at - image/jpeg

  The -x/--extract option returns just the content of the first ``` fenced code
  block, if one is present. If none are present it returns the full response.

      llm 'JavaScript function for reversing a string' -x

Options:
  -s, --system TEXT               System prompt to use
  -m, --model TEXT                Model to use
  -d, --database FILE             Path to log database
  -q, --query TEXT                Use first model matching these strings
  -a, --attachment ATTACHMENT     Attachment path or URL or -
  --at, --attachment-type <TEXT TEXT>...
                                  Attachment with explicit mimetype,
                                  --at image.jpg image/jpeg
  -T, --tool TEXT                 Name of a tool to make available to the model
  --functions TEXT                Python code block or file path defining
                                  functions to register as tools
  --td, --tools-debug             Show full details of tool executions
  --ta, --tools-approve           Manually approve every tool execution
  --cl, --chain-limit INTEGER     How many chained tool responses to allow,
                                  default 5, set 0 for unlimited
  -o, --option <TEXT TEXT>...     key/value options for the model
  --schema TEXT                   JSON schema, filepath or ID
  --schema-multi TEXT             JSON schema to use for multiple results
  -f, --fragment TEXT             Fragment (alias, URL, hash or file path) to
                                  add to the prompt
  --sf, --system-fragment TEXT    Fragment to add to system prompt
  -t, --template TEXT             Template to use
  -p, --param <TEXT TEXT>...      Parameters for template
  --no-stream                     Do not stream output
  -n, --no-log                    Don't log to database
  --log                           Log prompt and response to the database
  -c, --continue                  Continue the most recent conversation.
  --cid, --conversation TEXT      Continue the conversation with the given ID.
  --key TEXT                      API key to use
  --save TEXT                     Save prompt with this template name
  --async                         Run prompt asynchronously
  -u, --usage                     Show token usage
  -x, --extract                   Extract first fenced code block
  --xl, --extract-last            Extract last fenced code block
  -h, --help                      Show this message and exit.
```

(help-chat)=
### llm chat --help
```
Usage: llm chat [OPTIONS]

  Hold an ongoing chat with a model.

Options:
  -s, --system TEXT             System prompt to use
  -m, --model TEXT              Model to use
  -c, --continue                Continue the most recent conversation.
  --cid, --conversation TEXT    Continue the conversation with the given ID.
  -f, --fragment TEXT           Fragment (alias, URL, hash or file path) to add
                                to the prompt
  --sf, --system-fragment TEXT  Fragment to add to system prompt
  -t, --template TEXT           Template to use
  -p, --param <TEXT TEXT>...    Parameters for template
  -o, --option <TEXT TEXT>...   key/value options for the model
  -d, --database FILE           Path to log database
  --no-stream                   Do not stream output
  --key TEXT                    API key to use
  -T, --tool TEXT               Name of a tool to make available to the model
  --functions TEXT              Python code block or file path defining
                                functions to register as tools
  --td, --tools-debug           Show full details of tool executions
  --ta, --tools-approve         Manually approve every tool execution
  --cl, --chain-limit INTEGER   How many chained tool responses to allow,
                                default 5, set 0 for unlimited
  -h, --help                    Show this message and exit.
```

(help-keys)=
### llm keys --help
```
Usage: llm keys [OPTIONS] COMMAND [ARGS]...

  Manage stored API keys for different models

Options:
  -h, --help  Show this message and exit.

Commands:
  list*  List names of all stored keys
  get    Return the value of a stored key
  path   Output the path to the keys.json file
  set    Save a key in the keys.json file
```

(help-keys-list)=
#### llm keys list --help
```
Usage: llm keys list [OPTIONS]

  List names of all stored keys

Options:
  -h, --help  Show this message and exit.
```

(help-keys-path)=
#### llm keys path --help
```
Usage: llm keys path [OPTIONS]

  Output the path to the keys.json file

Options:
  -h, --help  Show this message and exit.
```

(help-keys-get)=
#### llm keys get --help
```
Usage: llm keys get [OPTIONS] NAME

  Return the value of a stored key

  Example usage:

      export OPENAI_API_KEY=$(llm keys get openai)

Options:
  -h, --help  Show this message and exit.
```

(help-keys-set)=
#### llm keys set --help
```
Usage: llm keys set [OPTIONS] NAME

  Save a key in the keys.json file

  Example usage:

      $ llm keys set openai
      Enter key: ...

Options:
  --value TEXT  Value to set
  -h, --help    Show this message and exit.
```

(help-logs)=
### llm logs --help
```
Usage: llm logs [OPTIONS] COMMAND [ARGS]...

  Tools for exploring logged prompts and responses

Options:
  -h, --help  Show this message and exit.

Commands:
  list*   Show logged prompts and their responses
  backup  Backup your logs database to this file
  off     Turn off logging for all prompts
  on      Turn on logging for all prompts
  path    Output the path to the logs.db file
  status  Show current status of database logging
```

(help-logs-path)=
#### llm logs path --help
```
Usage: llm logs path [OPTIONS]

  Output the path to the logs.db file

Options:
  -h, --help  Show this message and exit.
```

(help-logs-status)=
#### llm logs status --help
```
Usage: llm logs status [OPTIONS]

  Show current status of database logging

Options:
  -h, --help  Show this message and exit.
```

(help-logs-backup)=
#### llm logs backup --help
```
Usage: llm logs backup [OPTIONS] PATH

  Backup your logs database to this file

Options:
  -h, --help  Show this message and exit.
```

(help-logs-on)=
#### llm logs on --help
```
Usage: llm logs on [OPTIONS]

  Turn on logging for all prompts

Options:
  -h, --help  Show this message and exit.
```

(help-logs-off)=
#### llm logs off --help
```
Usage: llm logs off [OPTIONS]

  Turn off logging for all prompts

Options:
  -h, --help  Show this message and exit.
```

(help-logs-list)=
#### llm logs list --help
```
Usage: llm logs list [OPTIONS]

  Show logged prompts and their responses

Options:
  -n, --count INTEGER         Number of entries to show - defaults to 3, use 0
                              for all
  -d, --database FILE         Path to log database
  -m, --model TEXT            Filter by model or model alias
  -q, --query TEXT            Search for logs matching this string
  -f, --fragment TEXT         Filter for prompts using these fragments
  -T, --tool TEXT             Filter for prompts with results from these tools
  --tools                     Filter for prompts with results from any tools
  --schema TEXT               JSON schema, filepath or ID
  --schema-multi TEXT         JSON schema used for multiple results
  -l, --latest                Return latest results matching search query
  --data                      Output newline-delimited JSON data for schema
  --data-array                Output JSON array of data for schema
  --data-key TEXT             Return JSON objects from array in this key
  --data-ids                  Attach corresponding IDs to JSON objects
  -t, --truncate              Truncate long strings in output
  -s, --short                 Shorter YAML output with truncated prompts
  -u, --usage                 Include token usage
  -r, --response              Just output the last response
  -x, --extract               Extract first fenced code block
  --xl, --extract-last        Extract last fenced code block
  -c, --current               Show logs from the current conversation
  --cid, --conversation TEXT  Show logs for this conversation ID
  --id-gt TEXT                Return responses with ID > this
  --id-gte TEXT               Return responses with ID >= this
  --json                      Output logs as JSON
  -e, --expand                Expand fragments to show their content
  -h, --help                  Show this message and exit.
```

(help-models)=
### llm models --help
```
Usage: llm models [OPTIONS] COMMAND [ARGS]...

  Manage available models

Options:
  -h, --help  Show this message and exit.

Commands:
  list*    List available models
  default  Show or set the default model
  options  Manage default options for models
```

(help-models-list)=
#### llm models list --help
```
Usage: llm models list [OPTIONS]

  List available models

Options:
  --options         Show options for each model, if available
  --async           List async models
  --schemas         List models that support schemas
  --tools           List models that support tools
  -q, --query TEXT  Search for models matching these strings
  -m, --model TEXT  Specific model IDs
  -h, --help        Show this message and exit.
```

(help-models-default)=
#### llm models default --help
```
Usage: llm models default [OPTIONS] [MODEL]

  Show or set the default model

Options:
  -h, --help  Show this message and exit.
```

(help-models-options)=
#### llm models options --help
```
Usage: llm models options [OPTIONS] COMMAND [ARGS]...

  Manage default options for models

Options:
  -h, --help  Show this message and exit.

Commands:
  list*  List default options for all models
  clear  Clear default option(s) for a model
  set    Set a default option for a model
  show   List default options set for a specific model
```

(help-models-options-list)=
##### llm models options list --help
```
Usage: llm models options list [OPTIONS]

  List default options for all models

  Example usage:

      llm models options list

Options:
  -h, --help  Show this message and exit.
```

(help-models-options-show)=
##### llm models options show --help
```
Usage: llm models options show [OPTIONS] MODEL

  List default options set for a specific model

  Example usage:

      llm models options show gpt-4o

Options:
  -h, --help  Show this message and exit.
```

(help-models-options-set)=
##### llm models options set --help
```
Usage: llm models options set [OPTIONS] MODEL KEY VALUE

  Set a default option for a model

  Example usage:

      llm models options set gpt-4o temperature 0.5

Options:
  -h, --help  Show this message and exit.
```

(help-models-options-clear)=
##### llm models options clear --help
```
Usage: llm models options clear [OPTIONS] MODEL [KEY]

  Clear default option(s) for a model

  Example usage:

      llm models options clear gpt-4o
      # Or for a single option
      llm models options clear gpt-4o temperature

Options:
  -h, --help  Show this message and exit.
```

(help-templates)=
### llm templates --help
```
Usage: llm templates [OPTIONS] COMMAND [ARGS]...

  Manage stored prompt templates

Options:
  -h, --help  Show this message and exit.

Commands:
  list*    List available prompt templates
  edit     Edit the specified prompt template using the default $EDITOR
  loaders  Show template loaders registered by plugins
  path     Output the path to the templates directory
  show     Show the specified prompt template
```

(help-templates-list)=
#### llm templates list --help
```
Usage: llm templates list [OPTIONS]

  List available prompt templates

Options:
  -h, --help  Show this message and exit.
```

(help-templates-show)=
#### llm templates show --help
```
Usage: llm templates show [OPTIONS] NAME

  Show the specified prompt template

Options:
  -h, --help  Show this message and exit.
```

(help-templates-edit)=
#### llm templates edit --help
```
Usage: llm templates edit [OPTIONS] NAME

  Edit the specified prompt template using the default $EDITOR

Options:
  -h, --help  Show this message and exit.
```

(help-templates-path)=
#### llm templates path --help
```
Usage: llm templates path [OPTIONS]

  Output the path to the templates directory

Options:
  -h, --help  Show this message and exit.
```

(help-templates-loaders)=
#### llm templates loaders --help
```
Usage: llm templates loaders [OPTIONS]

  Show template loaders registered by plugins

Options:
  -h, --help  Show this message and exit.
```

(help-schemas)=
### llm schemas --help
```
Usage: llm schemas [OPTIONS] COMMAND [ARGS]...

  Manage stored schemas

Options:
  -h, --help  Show this message and exit.

Commands:
  list*  List stored schemas
  dsl    Convert LLM's schema DSL to a JSON schema
  show   Show a stored schema
```

(help-schemas-list)=
#### llm schemas list --help
```
Usage: llm schemas list [OPTIONS]

  List stored schemas

Options:
  -d, --database FILE  Path to log database
  -q, --query TEXT     Search for schemas matching this string
  --full               Output full schema contents
  --json               Output as JSON
  --nl                 Output as newline-delimited JSON
  -h, --help           Show this message and exit.
```

(help-schemas-show)=
#### llm schemas show --help
```
Usage: llm schemas show [OPTIONS] SCHEMA_ID

  Show a stored schema

Options:
  -d, --database FILE  Path to log database
  -h, --help           Show this message and exit.
```

(help-schemas-dsl)=
#### llm schemas dsl --help
```
Usage: llm schemas dsl [OPTIONS] INPUT

  Convert LLM's schema DSL to a JSON schema

      llm schema dsl 'name, age int, bio: their bio'

Options:
  --multi     Wrap in an array
  -h, --help  Show this message and exit.
```

(help-tools)=
### llm tools --help
```
Usage: llm tools [OPTIONS] COMMAND [ARGS]...

  Manage tools that can be made available to LLMs

Options:
  -h, --help  Show this message and exit.

Commands:
  list*  List available tools that have been provided by plugins
```

(help-tools-list)=
#### llm tools list --help
```
Usage: llm tools list [OPTIONS] [TOOL_DEFS]...

  List available tools that have been provided by plugins

Options:
  --json            Output as JSON
  --functions TEXT  Python code block or file path defining functions to
                    register as tools
  -h, --help        Show this message and exit.
```

(help-aliases)=
### llm aliases --help
```
Usage: llm aliases [OPTIONS] COMMAND [ARGS]...

  Manage model aliases

Options:
  -h, --help  Show this message and exit.

Commands:
  list*   List current aliases
  path    Output the path to the aliases.json file
  remove  Remove an alias
  set     Set an alias for a model
```

(help-aliases-list)=
#### llm aliases list --help
```
Usage: llm aliases list [OPTIONS]

  List current aliases

Options:
  --json      Output as JSON
  -h, --help  Show this message and exit.
```

(help-aliases-set)=
#### llm aliases set --help
```
Usage: llm aliases set [OPTIONS] ALIAS [MODEL_ID]

  Set an alias for a model

  Example usage:

      llm aliases set mini gpt-4o-mini

  Alternatively you can omit the model ID and specify one or more -q options.
  The first model matching all of those query strings will be used.

      llm aliases set mini -q 4o -q mini

Options:
  -q, --query TEXT  Set alias for model matching these strings
  -h, --help        Show this message and exit.
```

(help-aliases-remove)=
#### llm aliases remove --help
```
Usage: llm aliases remove [OPTIONS] ALIAS

  Remove an alias

  Example usage:

      $ llm aliases remove turbo

Options:
  -h, --help  Show this message and exit.
```

(help-aliases-path)=
#### llm aliases path --help
```
Usage: llm aliases path [OPTIONS]

  Output the path to the aliases.json file

Options:
  -h, --help  Show this message and exit.
```

(help-fragments)=
### llm fragments --help
```
Usage: llm fragments [OPTIONS] COMMAND [ARGS]...

  Manage fragments that are stored in the database

  Fragments are reusable snippets of text that are shared across multiple
  prompts.

Options:
  -h, --help  Show this message and exit.

Commands:
  list*    List current fragments
  loaders  Show fragment loaders registered by plugins
  remove   Remove a fragment alias
  set      Set an alias for a fragment
  show     Display the fragment stored under an alias or hash
```

(help-fragments-list)=
#### llm fragments list --help
```
Usage: llm fragments list [OPTIONS]

  List current fragments

Options:
  -q, --query TEXT  Search for fragments matching these strings
  --aliases         Show only fragments with aliases
  --json            Output as JSON
  -h, --help        Show this message and exit.
```

(help-fragments-set)=
#### llm fragments set --help
```
Usage: llm fragments set [OPTIONS] ALIAS FRAGMENT

  Set an alias for a fragment

  Accepts an alias and a file path, URL, hash or '-' for stdin

  Example usage:

      llm fragments set mydocs ./docs.md

Options:
  -h, --help  Show this message and exit.
```

(help-fragments-show)=
#### llm fragments show --help
```
Usage: llm fragments show [OPTIONS] ALIAS_OR_HASH

  Display the fragment stored under an alias or hash

      llm fragments show mydocs

Options:
  -h, --help  Show this message and exit.
```

(help-fragments-remove)=
#### llm fragments remove --help
```
Usage: llm fragments remove [OPTIONS] ALIAS

  Remove a fragment alias

  Example usage:

      llm fragments remove docs

Options:
  -h, --help  Show this message and exit.
```

(help-fragments-loaders)=
#### llm fragments loaders --help
```
Usage: llm fragments loaders [OPTIONS]

  Show fragment loaders registered by plugins

Options:
  -h, --help  Show this message and exit.
```

(help-plugins)=
### llm plugins --help
```
Usage: llm plugins [OPTIONS]

  List installed plugins

Options:
  --all        Include built-in default plugins
  --hook TEXT  Filter for plugins that implement this hook
  -h, --help   Show this message and exit.
```

(help-install)=
### llm install --help
```
Usage: llm install [OPTIONS] [PACKAGES]...

  Install packages from PyPI into the same environment as LLM

Options:
  -U, --upgrade        Upgrade packages to latest version
  -e, --editable TEXT  Install a project in editable mode from this path
  --force-reinstall    Reinstall all packages even if they are already up-to-
                       date
  --no-cache-dir       Disable the cache
  --pre                Include pre-release and development versions
  -h, --help           Show this message and exit.
```

(help-uninstall)=
### llm uninstall --help
```
Usage: llm uninstall [OPTIONS] PACKAGES...

  Uninstall Python packages from the LLM environment

Options:
  -y, --yes   Don't ask for confirmation
  -h, --help  Show this message and exit.
```

(help-embed)=
### llm embed --help
```
Usage: llm embed [OPTIONS] [COLLECTION] [ID]

  Embed text and store or return the result

Options:
  -i, --input PATH                File to embed
  -m, --model TEXT                Embedding model to use
  --store                         Store the text itself in the database
  -d, --database FILE
  -c, --content TEXT              Content to embed
  --binary                        Treat input as binary data
  --metadata TEXT                 JSON object metadata to store
  -f, --format [json|blob|base64|hex]
                                  Output format
  -h, --help                      Show this message and exit.
```

(help-embed-multi)=
### llm embed-multi --help
```
Usage: llm embed-multi [OPTIONS] COLLECTION [INPUT_PATH]

  Store embeddings for multiple strings at once in the specified collection.

  Input data can come from one of three sources:

  1. A CSV, TSV, JSON or JSONL file:
     - CSV/TSV: First column is ID, remaining columns concatenated as content
     - JSON: Array of objects with "id" field and content fields
     - JSONL: Newline-delimited JSON objects

     Examples:
       llm embed-multi docs input.csv
       cat data.json | llm embed-multi docs -
       llm embed-multi docs input.json --format json

  2. A SQL query against a SQLite database:
     - First column returned is used as ID
     - Other columns concatenated to form content

     Examples:
       llm embed-multi docs --sql "SELECT id, title, body FROM posts"
       llm embed-multi docs --attach blog blog.db --sql "SELECT id, content FROM blog.posts"

  3. Files in directories matching glob patterns:
     - Each file becomes one embedding
     - Relative file paths become IDs

     Examples:
       llm embed-multi docs --files docs '**/*.md'
       llm embed-multi images --files photos '*.jpg' --binary
       llm embed-multi texts --files texts '*.txt' --encoding utf-8 --encoding latin-1

Options:
  --format [json|csv|tsv|nl]   Format of input file - defaults to auto-detect
  --files <DIRECTORY TEXT>...  Embed files in this directory - specify directory
                               and glob pattern
  --encoding TEXT              Encodings to try when reading --files
  --binary                     Treat --files as binary data
  --sql TEXT                   Read input using this SQL query
  --attach <TEXT FILE>...      Additional databases to attach - specify alias
                               and file path
  --batch-size INTEGER         Batch size to use when running embeddings
  --prefix TEXT                Prefix to add to the IDs
  -m, --model TEXT             Embedding model to use
  --prepend TEXT               Prepend this string to all content before
                               embedding
  --store                      Store the text itself in the database
  -d, --database FILE
  -h, --help                   Show this message and exit.
```

(help-similar)=
### llm similar --help
```
Usage: llm similar [OPTIONS] COLLECTION [ID]

  Return top N similar IDs from a collection using cosine similarity.

  Example usage:

      llm similar my-collection -c "I like cats"

  Or to find content similar to a specific stored ID:

      llm similar my-collection 1234

Options:
  -i, --input PATH      File to embed for comparison
  -c, --content TEXT    Content to embed for comparison
  --binary              Treat input as binary data
  -n, --number INTEGER  Number of results to return
  -p, --plain           Output in plain text format
  -d, --database FILE
  --prefix TEXT         Just IDs with this prefix
  -h, --help            Show this message and exit.
```

(help-embed-models)=
### llm embed-models --help
```
Usage: llm embed-models [OPTIONS] COMMAND [ARGS]...

  Manage available embedding models

Options:
  -h, --help  Show this message and exit.

Commands:
  list*    List available embedding models
  default  Show or set the default embedding model
```

(help-embed-models-list)=
#### llm embed-models list --help
```
Usage: llm embed-models list [OPTIONS]

  List available embedding models

Options:
  -q, --query TEXT  Search for embedding models matching these strings
  -h, --help        Show this message and exit.
```

(help-embed-models-default)=
#### llm embed-models default --help
```
Usage: llm embed-models default [OPTIONS] [MODEL]

  Show or set the default embedding model

Options:
  --remove-default  Reset to specifying no default model
  -h, --help        Show this message and exit.
```

(help-collections)=
### llm collections --help
```
Usage: llm collections [OPTIONS] COMMAND [ARGS]...

  View and manage collections of embeddings

Options:
  -h, --help  Show this message and exit.

Commands:
  list*   View a list of collections
  delete  Delete the specified collection
  path    Output the path to the embeddings database
```

(help-collections-path)=
#### llm collections path --help
```
Usage: llm collections path [OPTIONS]

  Output the path to the embeddings database

Options:
  -h, --help  Show this message and exit.
```

(help-collections-list)=
#### llm collections list --help
```
Usage: llm collections list [OPTIONS]

  View a list of collections

Options:
  -d, --database FILE  Path to embeddings database
  --json               Output as JSON
  -h, --help           Show this message and exit.
```

(help-collections-delete)=
#### llm collections delete --help
```
Usage: llm collections delete [OPTIONS] COLLECTION

  Delete the specified collection

  Example usage:

      llm collections delete my-collection

Options:
  -d, --database FILE  Path to embeddings database
  -h, --help           Show this message and exit.
```

(help-openai)=
### llm openai --help
```
Usage: llm openai [OPTIONS] COMMAND [ARGS]...

  Commands for working directly with the OpenAI API

Options:
  -h, --help  Show this message and exit.

Commands:
  models  List models available to you from the OpenAI API
```

(help-openai-models)=
#### llm openai models --help
```
Usage: llm openai models [OPTIONS]

  List models available to you from the OpenAI API

Options:
  --json      Output as JSON
  --key TEXT  OpenAI API key
  -h, --help  Show this message and exit.
```
<!-- [[[end]]] -->

================================================
FILE: docs/index.md
================================================
# LLM

[![GitHub repo](https://img.shields.io/badge/github-repo-green)](https://github.com/simonw/llm)
[![PyPI](https://img.shields.io/pypi/v/llm.svg)](https://pypi.org/project/llm/)
[![Changelog](https://img.shields.io/github/v/release/simonw/llm?include_prereleases&label=changelog)](https://llm.datasette.io/en/stable/changelog.html)
[![Tests](https://github.com/simonw/llm/workflows/Test/badge.svg)](https://github.com/simonw/llm/actions?query=workflow%3ATest)
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/llm/blob/main/LICENSE)
[![Discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord-llm)
[![Homebrew](https://img.shields.io/homebrew/installs/dy/llm?color=yellow&label=homebrew&logo=homebrew)](https://formulae.brew.sh/formula/llm)

A CLI tool and Python library for interacting with **OpenAI**, **Anthropic's Claude**, **Google's Gemini**, **Meta's Llama** and dozens of other Large Language Models, both via remote APIs and with models that can be installed and run on your own machine.

Watch **[Language models on the command-line](https://www.youtube.com/watch?v=QUXQNi6jQ30)** on YouTube for a demo or [read the accompanying detailed notes](https://simonwillison.net/2024/Jun/17/cli-language-models/).

With LLM you can:
- {ref}`Run prompts from the command-line <usage-executing-prompts>`
- {ref}`Store prompts and responses in SQLite <logging>`
- {ref}`Generate and store embeddings <embeddings>`
- {ref}`Extract structured content from text and images <schemas>`
- {ref}`Grant models the ability to execute tools <tools>`
- ... and much, much more

## Quick start

First, install LLM using `pip` or Homebrew or `pipx` or `uv`:

```bash
pip install llm
```
Or with Homebrew (see {ref}`warning note <homebrew-warning>`):
```bash
brew install llm
```
Or with [pipx](https://pypa.github.io/pipx/):
```bash
pipx install llm
```
Or with [uv](https://docs.astral.sh/uv/guides/tools/)
```bash
uv tool install llm
```
If you have an [OpenAI API key](https://platform.openai.com/api-keys) key you can run this:
```bash
# Paste your OpenAI API key into this
llm keys set openai

# Run a prompt (with the default gpt-4o-mini model)
llm "Ten fun names for a pet pelican"

# Extract text from an image
llm "extract text" -a scanned-document.jpg

# Use a system prompt against a file
cat myfile.py | llm -s "Explain this code"
```
Run prompts against [Gemini](https://aistudio.google.com/apikey) or [Anthropic](https://console.anthropic.com/) with their respective plugins:
```bash
llm install llm-gemini
llm keys set gemini
# Paste Gemini API key here
llm -m gemini-2.0-flash 'Tell me fun facts about Mountain View'

llm install llm-anthropic
llm keys set anthropic
# Paste Anthropic API key here
llm -m claude-4-opus 'Impress me with wild facts about turnips'
```
You can also {ref}`install a plugin <installing-plugins>` to access models that can run on your local device. If you use [Ollama](https://ollama.com/):
```bash
# Install the plugin
llm install llm-ollama

# Download and run a prompt against the Orca Mini 7B model
ollama pull llama3.2:latest
llm -m llama3.2:latest 'What is the capital of France?'
```
To start {ref}`an interactive chat <usage-chat>` with a model, use `llm chat`:
```bash
llm chat -m gpt-4.1
```
```
Chatting with gpt-4.1
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> Tell me a joke about a pelican
Why don't pelicans like to tip waiters?

Because they always have a big bill!
```

More background on this project:

- [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/)
- [The LLM CLI tool now supports self-hosted language models via plugins](https://simonwillison.net/2023/Jul/12/llm/)
- [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/)
- [Build an image search engine with llm-clip, chat with models with llm chat](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/)
- [You can now run prompts against images, audio and video in your terminal using LLM](https://simonwillison.net/2024/Oct/29/llm-multi-modal/)
- [Structured data extraction from unstructured content using LLM schemas](https://simonwillison.net/2025/Feb/28/llm-schemas/)
- [Long context support in LLM 0.24 using fragments and template plugins](https://simonwillison.net/2025/Apr/7/long-context-llm/)

See also [the llm tag](https://simonwillison.net/tags/llm/) on my blog.

## Contents

```{toctree}
---
maxdepth: 3
---
setup
usage
openai-models
other-models
tools
schemas
templates
fragments
aliases
embeddings/index
plugins/index
python-api
logging
related-tools
help
contributing
```
```{toctree}
---
maxdepth: 1
---
changelog
```


================================================
FILE: docs/logging.md
================================================
(logging)=
# Logging to SQLite

`llm` defaults to logging all prompts and responses to a SQLite database.

You can find the location of that database using the `llm logs path` command:

```bash
llm logs path
```
On my Mac that outputs:
```
/Users/simon/Library/Application Support/io.datasette.llm/logs.db
```
This will differ for other operating systems.

To avoid logging an individual prompt, pass `--no-log` or `-n` to the command:
```bash
llm 'Ten names for cheesecakes' -n
```

To turn logging by default off:

```bash
llm logs off
```
If you've turned off logging you can still log an individual prompt and response by adding `--log`:
```bash
llm 'Five ambitious names for a pet pterodactyl' --log
```
To turn logging by default back on again:

```bash
llm logs on
```
To see the status of the logs database, run this:
```bash
llm logs status
```
Example output:
```
Logging is ON for all prompts
Found log database at /Users/simon/Library/Application Support/io.datasette.llm/logs.db
Number of conversations logged: 33
Number of responses logged:     48
Database file size:             19.96MB
```

(logging-view)=

## Viewing the logs

You can view the logs using the `llm logs` command:
```bash
llm logs
```
This will output the three most recent logged items in Markdown format, showing both the prompt and the response formatted using Markdown.

To get back just the most recent prompt response as plain text, add `-r/--response`:

```bash
llm logs -r
```
Use `-x/--extract` to extract and return the first fenced code block from the selected log entries:

```bash
llm logs --extract
```
Or `--xl/--extract-last` for the last fenced code block:
```bash
llm logs --extract-last
```

Add `--json` to get the log messages in JSON instead:

```bash
llm logs --json
```

Add `-n 10` to see the ten most recent items:
```bash
llm logs -n 10
```
Or `-n 0` to see everything that has ever been logged:
```bash
llm logs -n 0
```
You can truncate the display of the prompts and responses using the `-t/--truncate` option. This can help make the JSON output more readable - though the `--short` option is usually better.
```bash
llm logs -n 1 -t --json
```
Example output:
```json
[
  {
    "id": "01jm8ec74wxsdatyn5pq1fp0s5",
    "model": "anthropic/claude-3-haiku-20240307",
    "prompt": "hi",
    "system": null,
    "prompt_json": null,
    "response": "Hello! How can I assist you today?",
    "conversation_id": "01jm8ec74taftdgj2t4zra9z0j",
    "duration_ms": 560,
    "datetime_utc": "2025-02-16T22:34:30.374882+00:00",
    "input_tokens": 8,
    "output_tokens": 12,
    "token_details": null,
    "conversation_name": "hi",
    "conversation_model": "anthropic/claude-3-haiku-20240307",
    "attachments": []
  }
]
```

(logging-short)=

### -s/--short mode

Use `-s/--short` to see a shortened YAML log with truncated prompts and no responses:
```bash
llm logs -n 2 --short
```
Example output:
```yaml
- model: deepseek-reasoner
  datetime: '2025-02-02T06:39:53'
  conversation: 01jk2pk05xq3d0vgk0202zrsg1
  prompt:  H01 There are five huts. H02 The Scotsman lives in the purple hut. H03 The Welshman owns the parrot. H04 Kombucha is...
- model: o3-mini
  datetime: '2025-02-02T19:03:05'
  conversation: 01jk40qkxetedzpf1zd8k9bgww
  system: Formatting re-enabled. Write a detailed README with extensive usage examples.
  prompt: <documents> <document index="1"> <source>./Cargo.toml</source> <document_content> [package] name = "py-limbo" version...
```
Include `-u/--usage` to include token usage information:

```bash
llm logs -n 1 --short --usage
```
Example output:
```yaml
- model: o3-mini
  datetime: '2025-02-16T23:00:56'
  conversation: 01jm8fxxnef92n1663c6ays8xt
  system: Produce Python code that demonstrates every possible usage of yaml.dump
    with all of the arguments it can take, especi...
  prompt: <documents> <document index="1"> <source>./setup.py</source> <document_content>
    NAME = 'PyYAML' VERSION = '7.0.0.dev0...
  usage:
    input: 74793
    output: 3550
    details:
      completion_tokens_details:
        reasoning_tokens: 2240
```

(logging-conversation)=

### Logs for a conversation

To view the logs for the most recent {ref}`conversation <usage-conversation>` you have had with a model, use `-c`:

```bash
llm logs -c
```
To see logs for a specific conversation based on its ID, use `--cid ID` or `--conversation ID`:

```bash
llm logs --cid 01h82n0q9crqtnzmf13gkyxawg
```

(logging-search)=

### Searching the logs

You can search the logs for a search term in the `prompt` or the `response` columns.
```bash
llm logs -q 'cheesecake'
```
The most relevant results will be shown first.

To switch to sorting with most recent first, add `-l/--latest`. This can be combined with `-n` to limit the number of results shown:
```bash
llm logs -q 'cheesecake' -l -n 3
```

(logging-filter-id)=

### Filtering past a specific ID

If you want to retrieve all of the logs that were recorded since a specific response ID you can do so using these options:

- `--id-gt $ID` - every record with an ID greater than $ID
- `--id-gte $ID` - every record with an ID greater than or equal to $ID

IDs are always issued in ascending order by time, so this provides a useful way to see everything that has happened since a particular record.

This can be particularly useful when {ref}`working with schema data <schemas-logs>`, where you might want to access every record that you have created using a specific `--schema` but exclude records you have previously processed.

(logging-filter-model)=

### Filtering by model

You can filter to logs just for a specific model (or model alias) using `-m/--model`:
```bash
llm logs -m chatgpt
```

(logging-filter-fragments)=

### Filtering by prompts that used specific fragments

The `-f/--fragment X` option will filter for just responses that were created using the specified {ref}`fragment <usage-fragments>` hash or alias or URL or filename.

Fragments are displayed in the logs as their hash ID. Add `-e/--expand` to display fragments as their full content - this option works for both the default Markdown and the `--json` mode:

```bash
llm logs -f https://llm.datasette.io/robots.txt --expand
```
You can display just the content for a specific fragment hash ID (or alias) using the `llm fragments show` command:

```bash
llm fragments show 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680
```
If you provide multiple fragments you will get back responses that used _all_ of those fragments.

(logging-filter-tools)=

### Filtering by prompts that used specific tools

You can filter for responses that used tools from specific fragments with the `--tool/-T` option:

```bash
llm logs -T simple_eval
```
This will match responses that involved a _result_ from that tool. If the tool was not executed it will not be included in the filtered responses.

Pass `--tool/-T` multiple times for responses that used all of the specified tools.

Use the `llm logs --tools` flag to see _all_ responses that involved at least one tool result, including from `--functions`:

```bash
llm logs --tools
```

(logging-filter-schemas)=

### Browsing data collected using schemas

The `--schema X` option can be used to view responses that used the specified schema, using any of the {ref}`ways to specify a schema <schemas-specify>`:

```bash
llm logs --schema 'name, age int, bio'
```

This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation <schemas-logs>` for details.

(logging-datasette)=

## Browsing logs using Datasette

You can also use [Datasette](https://datasette.io/) to browse your logs like this:

```bash
datasette "$(llm logs path)"
```

(logging-backup)=

## Backing up your database

You can backup your logs to another file using the `llm logs backup` command:

```bash
llm logs backup /tmp/backup.db
```
This uses SQLite [VACUUM INTO](https://sqlite.org/lang_vacuum.html#vacuum_with_an_into_clause) under the hood.

(logging-sql-schema)=

## SQL schema

Here's the SQL schema used by the `logs.db` database:

<!-- [[[cog
import cog
from llm.migrations import migrate
import sqlite_utils
import re
db = sqlite_utils.Database(memory=True)
migrate(db)

def cleanup_sql(sql):
    first_line = sql.split('(')[0]
    inner = re.search(r'\((.*)\)', sql, re.DOTALL).group(1)
    columns = [l.strip() for l in inner.split(',')]
    return first_line + '(\n  ' + ',\n  '.join(columns) + '\n);'

cog.out("```sql\n")
for table in (
    "conversations", "schemas", "responses", "responses_fts", "attachments", "prompt_attachments",
    "fragments", "fragment_aliases", "prompt_fragments", "system_fragments", "tools",
    "tool_responses", "tool_calls", "tool_results", "tool_instances"
):
    schema = db[table].schema
    cog.out(format(cleanup_sql(schema)))
    cog.out("\n")
cog.out("```\n")
]]] -->
```sql
CREATE TABLE [conversations] (
  [id] TEXT PRIMARY KEY,
  [name] TEXT,
  [model] TEXT
);
CREATE TABLE [schemas] (
  [id] TEXT PRIMARY KEY,
  [content] TEXT
);
CREATE TABLE "responses" (
  [id] TEXT PRIMARY KEY,
  [model] TEXT,
  [prompt] TEXT,
  [system] TEXT,
  [prompt_json] TEXT,
  [options_json] TEXT,
  [response] TEXT,
  [response_json] TEXT,
  [conversation_id] TEXT REFERENCES [conversations]([id]),
  [duration_ms] INTEGER,
  [datetime_utc] TEXT,
  [input_tokens] INTEGER,
  [output_tokens] INTEGER,
  [token_details] TEXT,
  [schema_id] TEXT REFERENCES [schemas]([id]),
  [resolved_model] TEXT
);
CREATE VIRTUAL TABLE [responses_fts] USING FTS5 (
  [prompt],
  [response],
  content=[responses]
);
CREATE TABLE [attachments] (
  [id] TEXT PRIMARY KEY,
  [type] TEXT,
  [path] TEXT,
  [url] TEXT,
  [content] BLOB
);
CREATE TABLE [prompt_attachments] (
  [response_id] TEXT REFERENCES [responses]([id]),
  [attachment_id] TEXT REFERENCES [attachments]([id]),
  [order] INTEGER,
  PRIMARY KEY ([response_id],
  [attachment_id])
);
CREATE TABLE [fragments] (
  [id] INTEGER PRIMARY KEY,
  [hash] TEXT,
  [content] TEXT,
  [datetime_utc] TEXT,
  [source] TEXT
);
CREATE TABLE [fragment_aliases] (
  [alias] TEXT PRIMARY KEY,
  [fragment_id] INTEGER REFERENCES [fragments]([id])
);
CREATE TABLE "prompt_fragments" (
  [response_id] TEXT REFERENCES [responses]([id]),
  [fragment_id] INTEGER REFERENCES [fragments]([id]),
  [order] INTEGER,
  PRIMARY KEY ([response_id],
  [fragment_id],
  [order])
);
CREATE TABLE "system_fragments" (
  [response_id] TEXT REFERENCES [responses]([id]),
  [fragment_id] INTEGER REFERENCES [fragments]([id]),
  [order] INTEGER,
  PRIMARY KEY ([response_id],
  [fragment_id],
  [order])
);
CREATE TABLE [tools] (
  [id] INTEGER PRIMARY KEY,
  [hash] TEXT,
  [name] TEXT,
  [description] TEXT,
  [input_schema] TEXT,
  [plugin] TEXT
);
CREATE TABLE [tool_responses] (
  [tool_id] INTEGER REFERENCES [tools]([id]),
  [response_id] TEXT REFERENCES [responses]([id]),
  PRIMARY KEY ([tool_id],
  [response_id])
);
CREATE TABLE [tool_calls] (
  [id] INTEGER PRIMARY KEY,
  [response_id] TEXT REFERENCES [responses]([id]),
  [tool_id] INTEGER REFERENCES [tools]([id]),
  [name] TEXT,
  [arguments] TEXT,
  [tool_call_id] TEXT
);
CREATE TABLE "tool_results" (
  [id] INTEGER PRIMARY KEY,
  [response_id] TEXT REFERENCES [responses]([id]),
  [tool_id] INTEGER REFERENCES [tools]([id]),
  [name] TEXT,
  [output] TEXT,
  [tool_call_id] TEXT,
  [instance_id] INTEGER REFERENCES [tool_instances]([id]),
  [exception] TEXT
);
CREATE TABLE [tool_instances] (
  [id] INTEGER PRIMARY KEY,
  [plugin] TEXT,
  [name] TEXT,
  [arguments] TEXT
);
```
<!-- [[[end]]] -->
`responses_fts` configures [SQLite full-text search](https://www.sqlite.org/fts5.html) against the `prompt` and `response` columns in the `responses` table.


================================================
FILE: docs/openai-models.md
================================================
(openai-models)=

# OpenAI models

LLM ships with a default plugin for talking to OpenAI's API. OpenAI offer both language models and embedding models, and LLM can access both types.

(openai-models-configuration)=

## Configuration

All OpenAI models are accessed using an API key. You can obtain one from [the API keys page](https://platform.openai.com/api-keys) on their site.

Once you have created a key, configure LLM to use it by running:

```bash
llm keys set openai
```
Then paste in the API key.

(openai-models-language)=

## OpenAI language models

Run `llm models` for a full list of available models. The OpenAI models supported by LLM are:

<!-- [[[cog
from click.testing import CliRunner
from llm.cli import cli
result = CliRunner().invoke(cli, ["models", "list"])
models = [line for line in result.output.split("\n") if line.startswith("OpenAI ")]
cog.out("```\n{}\n```".format("\n".join(models)))
]]] -->
```
OpenAI Chat: gpt-4o (aliases: 4o)
OpenAI Chat: chatgpt-4o-latest (aliases: chatgpt-4o)
OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
OpenAI Chat: gpt-4o-audio-preview
OpenAI Chat: gpt-4o-audio-preview-2024-12-17
OpenAI Chat: gpt-4o-audio-preview-2024-10-01
OpenAI Chat: gpt-4o-mini-audio-preview
OpenAI Chat: gpt-4o-mini-audio-preview-2024-12-17
OpenAI Chat: gpt-4.1 (aliases: 4.1)
OpenAI Chat: gpt-4.1-mini (aliases: 4.1-mini)
OpenAI Chat: gpt-4.1-nano (aliases: 4.1-nano)
OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
OpenAI Chat: gpt-4 (aliases: 4, gpt4)
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
OpenAI Chat: gpt-4-1106-preview
OpenAI Chat: gpt-4-0125-preview
OpenAI Chat: gpt-4-turbo-2024-04-09
OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
OpenAI Chat: gpt-4.5-preview-2025-02-27
OpenAI Chat: gpt-4.5-preview (aliases: gpt-4.5)
OpenAI Chat: o1
OpenAI Chat: o1-2024-12-17
OpenAI Chat: o1-preview
OpenAI Chat: o1-mini
OpenAI Chat: o3-mini
OpenAI Chat: o3
OpenAI Chat: o4-mini
OpenAI Chat: gpt-5
OpenAI Chat: gpt-5-mini
OpenAI Chat: gpt-5-nano
OpenAI Chat: gpt-5-2025-08-07
OpenAI Chat: gpt-5-mini-2025-08-07
OpenAI Chat: gpt-5-nano-2025-08-07
OpenAI Chat: gpt-5.1
OpenAI Chat: gpt-5.1-chat-latest
OpenAI Chat: gpt-5.2
OpenAI Chat: gpt-5.2-chat-latest
OpenAI Chat: gpt-5.4
OpenAI Chat: gpt-5.4-2026-03-05
OpenAI Chat: gpt-5.4-mini
OpenAI Chat: gpt-5.4-mini-2026-03-17
OpenAI Chat: gpt-5.4-nano
OpenAI Chat: gpt-5.4-nano-2026-03-17
OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
```
<!-- [[[end]]] -->

See [the OpenAI models documentation](https://platform.openai.com/docs/models) for details of each of these.

`gpt-4o-mini` (aliased to `4o-mini`) is the least expensive model, and is the default for if you don't specify a model at all. Consult [OpenAI's model documentation](https://platform.openai.com/docs/models) for details of the other models.

[o1-pro](https://platform.openai.com/docs/models/o1-pro) is not available  through the Chat Completions API used by LLM's default OpenAI plugin. You can install the new [llm-openai-plugin](https://github.com/simonw/llm-openai-plugin) plugin to access that model.

## Model features

The following features work with OpenAI models:

- {ref}`System prompts <usage-system-prompts>` can be used to provide instructions that have a higher weight than the prompt itself.
- {ref}`Attachments <usage-attachments>`. Many OpenAI models support image inputs - check which ones using `llm models --options`. Any model that accepts images can also accept PDFs.
- {ref}`Schemas <usage-schemas>` can be used to influence the JSON structure of the model output.
- {ref}`Model options <usage-model-options>` can be used to set parameters like `temperature`. Use `llm models --options` for a full list of supported options.

(openai-models-embedding)=

## OpenAI embedding models

Run `llm embed-models` for a list of {ref}`embedding models <embeddings>`. The following OpenAI embedding models are supported by LLM:

```
ada-002 (aliases: ada, oai)
3-small
3-large
3-small-512
3-large-256
3-large-1024
```

The `3-small` model is currently the most inexpensive. `3-large` costs more but is more capable - see [New embedding models and API updates](https://openai.com/blog/new-embedding-models-and-api-updates) on the OpenAI blog for details and benchmarks.

An important characteristic of any embedding model is the size of the vector it returns. Smaller vectors cost less to store and query, but may be less accurate.

OpenAI `3-small` and `3-large` vectors can be safely truncated to lower dimensions without losing too much accuracy. The `-int` models provided by LLM are pre-configured to do this, so `3-large-256` is the `3-large` model truncated to 256 dimensions.

The vector size of the supported OpenAI embedding models are as follows:

| Model | Size |
| --- | --- |
| ada-002 | 1536 |
| 3-small | 1536 |
| 3-large | 3072 |
| 3-small-512 | 512 |
| 3-large-256 | 256 |
| 3-large-1024 | 1024 |

(openai-completion-models)=

## OpenAI completion models

The `gpt-3.5-turbo-instruct` model is a little different - it is a completion model rather than a chat model, described in [the OpenAI completions documentation](https://platform.openai.com/docs/api-reference/completions/create).

Completion models can be called with the `-o logprobs 3` option (not supported by chat models) which will cause LLM to store 3 log probabilities for each returned token in the SQLite database. Consult [this issue](https://github.com/simonw/llm/issues/284#issuecomment-1724772704) for details on how to read these values.

(openai-extra-models)=

## Adding more OpenAI models

OpenAI occasionally release new models with new names. LLM aims to ship new releases to support these, but you can also configure them directly, by adding them to a `extra-openai-models.yaml` configuration file.

Run this command to find the directory in which this file should be created:

```bash
dirname "$(llm logs path)"
```
On my Mac laptop I get this:
```
~/Library/Application Support/io.datasette.llm
```
Create a file in that directory called `extra-openai-models.yaml`.

Let's say OpenAI have just released the `gpt-3.5-turbo-0613` model and you want to use it, despite LLM not yet shipping support. You could configure that by adding this to the file:

```yaml
- model_id: gpt-3.5-turbo-0613
  model_name: gpt-3.5-turbo-0613
  aliases: ["0613"]
```
The `model_id` is the identifier that will be recorded in the LLM logs. You can use this to specify the model, or you can optionally include a list of aliases for that model. The `model_name` is the actual model identifier that will be passed to the API, which must match exactly what the API expects.

If the model is a completion model (such as `gpt-3.5-turbo-instruct`) add `completion: true` to the configuration.

If the model supports structured extraction using json_schema, add `supports_schema: true` to the configuration.

For reasoning models like `o1` or `o3-mini` add `reasoning: true`.

With this configuration in place, the following command should run a prompt against the new model:

```bash
llm -m 0613 'What is the capital of France?'
```
Run `llm models` to confirm that the new model is now available:
```bash
llm models
```
Example output:
```
OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
OpenAI Chat: gpt-4 (aliases: 4, gpt4)
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
OpenAI Chat: gpt-3.5-turbo-0613 (aliases: 0613)
```
Running `llm logs -n 1` should confirm that the prompt and response has been correctly logged to the database.


================================================
FILE: docs/other-models.md
================================================
(other-models)=
# Other models

LLM supports OpenAI models by default. You can install {ref}`plugins <plugins>` to add support for other models. You can also add additional OpenAI-API-compatible models {ref}`using a configuration file <openai-extra-models>`.

## Installing and using a local model

{ref}`LLM plugins <plugins>` can provide local models that run on your machine.

To install **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)**, providing 17 models from the [GPT4All](https://gpt4all.io/) project, run this:

```bash
llm install llm-gpt4all
```
Run `llm models` to see the expanded list of available models.

To run a prompt through one of the models from GPT4All specify it using `-m/--model`:
```bash
llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?'
```
The model will be downloaded and cached the first time you use it.

Check the {ref}`plugin directory <plugin-directory>` for the latest list of available plugins for other models.

(openai-compatible-models)=

## OpenAI-compatible models

Projects such as [LocalAI](https://localai.io/) offer a REST API that imitates the OpenAI API but can be used to run other models, including models that can be installed on your own machine. These can be added using the same configuration mechanism.

The `model_id` is the name LLM will use for the model. The `model_name` is the name which needs to be passed to the API - this might differ from the `model_id`, especially if the `model_id` could potentially clash with other installed models.

The `api_base` key can be used to point the OpenAI client library at a different API endpoint.

To add the `orca-mini-3b` model hosted by a local installation of [LocalAI](https://localai.io/), add this to your `extra-openai-models.yaml` file:

```yaml
- model_id: orca-openai-compat
  model_name: orca-mini-3b.ggmlv3
  api_base: "http://localhost:8080"
```
If the `api_base` is set, the existing configured `openai` API key will not be sent by default.

You can set `api_key_name` to the name of a key stored using the {ref}`api-keys` feature.

Other keys you can use here:

- `completion: true` for completion models that should use the `/completion` endpoint as opposed to `/completion/chat`
- `supports_tools: true` for models that support tool calling
- `can_stream: false` to disable streaming mode for models that cannot stream
- `supports_schema: true` for models that support JSON structured schema output
- `vision: true` for models that can accept images as input
- `audio: true` for models that accept audio attachments

Having configured the model like this, run `llm models --options -m MODEL_ID` to check that it installed correctly. You can then run prompts against it like so:

```bash
llm -m orca-openai-compat 'What is the capital of France?'
```
And confirm they were logged correctly with:
```bash
llm logs -n 1
```

### Extra HTTP headers

Some providers such as [openrouter.ai](https://openrouter.ai/docs) may require the setting of additional HTTP headers. You can set those using the `headers:` key like this:

```yaml
- model_id: claude
  model_name: anthropic/claude-2
  api_base: "https://openrouter.ai/api/v1"
  api_key_name: openrouter
  headers:
    HTTP-Referer: "https://llm.datasette.io/"
    X-Title: LLM
```


================================================
FILE: docs/plugins/advanced-model-plugins.md
================================================
(advanced-model-plugins)=
# Advanced model plugins

The {ref}`model plugin tutorial <tutorial-model-plugin>` covers the basics of developing a plugin that adds support for a new model. This document covers more advanced topics.

Features to consider for your model plugin include:

- {ref}`Accepting API keys <advanced-model-plugins-api-keys>` using the standard mechanism that incorporates `llm keys set`, environment variables and support for passing an explicit key to the model.
- Including support for {ref}`Async models <advanced-model-plugins-async>` that can be used with Python's `asyncio` library.
- Support for {ref}`structured output <advanced-model-plugins-schemas>` using JSON schemas.
- Support for {ref}`tools <advanced-model-plugins-tools>`.
- Handling {ref}`attachments <advanced-model-plugins-attachments>` (images, audio and more) for multi-modal models.
- Tracking {ref}`token usage <advanced-model-plugins-usage>` for models that charge by the token.

(advanced-model-plugins-lazy)=

## Tip: lazily load expensive dependencies

If your plugin depends on an expensive library such as [PyTorch](https://pytorch.org/) you should avoid importing that dependency (or a dependency that uses that dependency) at the top level of your module. Expensive imports in plugins mean that even simple commands like `llm --help` can take a long time to run.

Instead, move those imports to inside the methods that need them. Here's an example [change to llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers/commit/f87df71e8a652a8cb05ad3836a79b815bcbfa64b) that shaved 1.8 seconds off the time it took to run `llm --help`!

(advanced-model-plugins-api-keys)=

## Models that accept API keys

Models that call out to API providers such as OpenAI, Anthropic or Google Gemini usually require an API key.

LLM's API key management mechanism {ref}`is described here <api-keys>`.

If your plugin requires an API key you should subclass the `llm.KeyModel` class instead of the `llm.Model` class. Start your model definition like this:

```python
import llm

class HostedModel(llm.KeyModel):
    needs_key = "hosted" # Required
    key_env_var = "HOSTED_API_KEY" # Optional
```
This tells LLM that your model requires an API key, which may be saved in the key registry under the key name `hosted` or might also be provided as the `HOSTED_API_KEY` environment variable.

Then when you define your `execute()` method it should take an extra `key=` parameter like this:

```python
    def execute(self, prompt, stream, response, conversation, key=None):
        # key= here will be the API key to use
```
LLM will pass in the key from the environment variable, key registry or that has been passed to LLM as the `--key` command-line option or the `model.prompt(..., key=)` parameter.

(advanced-model-plugins-async)=

## Async models

Plugins can optionally provide an asynchronous version of their model, suitable for use with Python [asyncio](https://docs.python.org/3/library/asyncio.html). This is particularly useful for remote models accessible by an HTTP API.

The async version of a model subclasses `llm.AsyncModel` instead of `llm.Model`. It must implement an `async def execute()` async generator method instead of `def execute()`.

This example shows a subset of the OpenAI default plugin illustrating how this method might work:

```python
from typing import AsyncGenerator
import llm

class MyAsyncModel(llm.AsyncModel):
    # This can duplicate the model_id of the sync model:
    model_id = "my-model-id"

    async def execute(
        self, prompt, stream, response, conversation=None
    ) -> AsyncGenerator[str, None]:
        if stream:
            completion = await client.chat.completions.create(
                model=self.model_id,
                messages=messages,
                stream=True,
            )
            async for chunk in completion:
                yield chunk.choices[0].delta.content
        else:
            completion = await client.chat.completions.create(
                model=self.model_name or self.model_id,
                messages=messages,
                stream=False,
            )
            if completion.choices[0].message.content is not None:
                yield completion.choices[0].message.content
```
If your model takes an API key you should instead subclass `llm.AsyncKeyModel` and have a `key=` parameter on your `.execute()` method:

```python
class MyAsyncModel(llm.AsyncKeyModel):
    ...
    async def execute(
        self, prompt, stream, response, conversation=None, key=None
    ) -> AsyncGenerator[str, None]:
```

This async model instance should then be passed to the `register()` method in the `register_models()` plugin hook:

```python
@hookimpl
def register_models(register):
    register(
        MyModel(), MyAsyncModel(), aliases=("my-model-aliases",)
    )
```

(advanced-model-plugins-schemas)=

## Supporting schemas

If your model supports {ref}`structured output <schemas>` against a defined JSON schema you can implement support by first adding `supports_schema = True` to the class:

```python
class MyModel(llm.KeyModel):
    ...
    support_schema = True
```
And then adding code to your `.execute()` method that checks for `prompt.schema` and, if it is present, uses that to prompt the model.

`prompt.schema` will always be a Python dictionary representing a JSON schema, even if the user passed in a Pydantic model class.

Check the [llm-gemini](https://github.com/simonw/llm-gemini) and [llm-anthropic](https://github.com/simonw/llm-anthropic) plugins for example of this pattern in action.

(advanced-model-plugins-tools)=

## Supporting tools

Adding {ref}`tools support <tools>` involves several steps:

1. Add `supports_tools = True` to your model class.
2. If `prompt.tools` is populated, turn that list of `llm.Tool` objects into the correct format for your model.
3. Look out for requests to call tools in the responses from your model. Call `response.add_tool_call(llm.ToolCall(...))` for each of those. This should work for streaming and non-streaming and async and non-async cases.
4. If your prompt has a `prompt.tool_results` list, pass the information from those `llm.ToolResult` objects to your model.
5. Include `prompt.tools` and `prompt.tool_results` and tool calls from `response.tool_calls_or_raise()` in the conversation history constructed by your plugin.
6. Make sure your code is OK with prompts that do not have `prompt.prompt` set to a value, since they may be carrying exclusively the results of a tool call.

This [commit to llm-gemini](https://github.com/simonw/llm-gemini/commit/a7f1096cfbb733018eb41c29028a8cc6160be298) implementing tools helps demonstrate what this looks like for a real plugin.

Here are the relevant dataclasses:

```{eval-rst}
.. autoclass:: llm.Tool

.. autoclass:: llm.ToolCall

.. autoclass:: llm.ToolResult
```


(advanced-model-plugins-attachments)=

## Attachments for multi-modal models

Models such as GPT-4o, Claude 3.5 Sonnet and Google's Gemini 1.5 are multi-modal: they accept input in the form of images and maybe even audio, video and other formats.

LLM calls these **attachments**. Models can specify the types of attachments they accept and then implement special code in the `.execute()` method to handle them.

See {ref}`the Python attachments documentation <python-api-attachments>` for details on using attachments in the Python API.

### Specifying attachment types

A `Model` subclass can list the types of attachments it accepts by defining a `attachment_types` class attribute:

```python
class NewModel(llm.Model):
    model_id = "new-model"
    attachment_types = {
        "image/png",
        "image/jpeg",
        "image/webp",
        "image/gif",
    }
```
These content types are detected when an attachment is passed to LLM using `llm -a filename`, or can be specified by the user using the `--attachment-type filename image/png` option.

**Note:** MP3 files will have their attachment type detected as `audio/mpeg`, not `audio/mp3`.

LLM will use the `attachment_types` attribute to validate that provided attachments should be accepted before passing them to the model.

### Handling attachments

The `prompt` object passed to the `execute()` method will have an `attachments` attribute containing a list of `Attachment` objects provided by the user.

An `Attachment` instance has the following properties:

- `url (str)`: The URL of the attachment, if it was provided as a URL
- `path (str)`: The resolved file path of the attachment, if it was provided as a file
- `type (str)`: The content type of the attachment, if it was provided
- `content (bytes)`: The binary content of the attachment, if it was provided

Generally only one of `url`, `path` or `content` will be set.

You should usually access the type and the content through one of these methods:

- `attachment.resolve_type() -> str`: Returns the `type` if it is available, otherwise attempts to guess the type by looking at the first few bytes of content
- `attachment.content_bytes() -> bytes`: Returns the binary content, which it may need to read from a file or fetch from a URL
- `attachment.base64_content() -> str`: Returns that content as a base64-encoded string

A `id()` method returns a database ID for this content, which is either a SHA256 hash of the binary content or, in the case of attachments hosted at an external URL, a hash of `{"url": url}` instead. This is an implementation detail which you should not need to access directly.

Note that it's possible for a prompt with an attachments to not include a text prompt at all, in which case `prompt.prompt` will be `None`.

Here's how the OpenAI plugin handles attachments, including the case where no `prompt.prompt` was provided:

```python
if not prompt.attachments:
    messages.append({"role": "user", "content": prompt.prompt})
else:
    attachment_message = []
    if prompt.prompt:
        attachment_message.append({"type": "text", "text": prompt.prompt})
    for attachment in prompt.attachments:
        attachment_message.append(_attachment(attachment))
    messages.append({"role": "user", "content": attachment_message})


# And the code for creating the attachment message
def _attachment(attachment):
    url = attachment.url
    base64_content = ""
    if not url or attachment.resolve_type().startswith("audio/"):
        base64_content = attachment.base64_content()
        url = f"data:{attachment.resolve_type()};base64,{base64_content}"
    if attachment.resolve_type().startswith("image/"):
        return {"type": "image_url", "image_url": {"url": url}}
    else:
        format_ = "wav" if attachment.resolve_type() == "audio/wav" else "mp3"
        return {
            "type": "input_audio",
            "input_audio": {
                "data": base64_content,
                "format": format_,
            },
        }
```
As you can see, it uses `attachment.url` if that is available and otherwise falls back to using the `base64_content()` method to embed the image directly in the JSON sent to the API. For the OpenAI API audio attachments are always included as base64-encoded strings.

### Attachments from previous conversations

Models that implement the ability to continue a conversation can reconstruct the previous message JSON using the `response.attachments` attribute.

Here's how the OpenAI plugin does that:

```python
for prev_response in conversation.responses:
    if prev_response.attachments:
        attachment_message = []
        if prev_response.prompt.prompt:
            attachment_message.append(
                {"type": "text", "text": prev_response.prompt.prompt}
            )
        for attachment in prev_response.attachments:
            attachment_message.append(_attachment(attachment))
        messages.append({"role": "user", "content": attachment_message})
    else:
        messages.append(
            {"role": "user", "content": prev_response.prompt.prompt}
        )
    messages.append({"role": "assistant", "content": prev_response.text_or_raise()})
```
The `response.text_or_raise()` method used there will return the text from the response or raise a `ValueError` exception if the response is an `AsyncResponse` instance that has not yet been fully resolved.

This is a slightly weird hack to work around the common need to share logic for building up the `messages` list across both sync and async models.

(advanced-model-plugins-usage)=

## Tracking token usage

Models that charge by the token should track the number of tokens used by each prompt. The ``response.set_usage()`` method can be used to record the number of tokens used by a response - these will then be made available through the Python API and logged to the SQLite database for command-line users.

`response` here is the response object that is passed to `.execute()` as an argument.

Call ``response.set_usage()`` at the end of your `.execute()` method. It accepts keyword arguments `input=`, `output=` and `details=` - all three are optional. `input` and `output` should be integers, and `details` should be a dictionary that provides additional information beyond the input and output token counts.

This example logs 15 input tokens, 340 output tokens and notes that 37 tokens were cached:

```python
response.set_usage(input=15, output=340, details={"cached": 37})
```
(advanced-model-plugins-resolved-model)=

## Tracking resolved model names

In some cases the model ID that the user requested may not be the exact model that is executed. Many providers have a `model-latest` alias which may execute different models over time.

If those APIs return the _real_ model ID that was used, your plugin can record that in the `resources.resolved_model` column in the logs by calling this method and passing the string representing the resolved, final model ID:

```bash
response.set_resolved_model(resolved_model_id)
```
This string will be recorded in the database and shown in the output of `llm logs` and `llm logs --json`.

(tutorial-model-plugin-raise-errors)=

## LLM_RAISE_ERRORS

While working on a plugin it can be useful to request that errors are raised instead of being caught and logged, so you can access them from the Python debugger.

Set the `LLM_RAISE_ERRORS` environment variable to enable this behavior, then run `llm` like this:

```bash
LLM_RAISE_ERRORS=1 python -i -m llm ...
```
The `-i` option means Python will drop into an interactive shell if an error occurs. You can then open a debugger at the most recent error using:

```python
import pdb; pdb.pm()
```


================================================
FILE: docs/plugins/directory.md
================================================
(plugin-directory)=
# Plugin directory

The following plugins are available for LLM. Here's {ref}`how to install them <installing-plugins>`.

(plugin-directory-local-models)=
## Local models

These plugins all help you run LLMs directly on your own computer:

- **[llm-gguf](https://github.com/simonw/llm-gguf)** uses [llama.cpp](https://github.com/ggerganov/llama.cpp) to run models published in the GGUF format.
- **[llm-mlx](https://github.com/simonw/llm-mlx)** (Mac only) uses Apple's MLX framework to provide extremely high performance access to a large number of local models.
- **[llm-ollama](https://github.com/taketwo/llm-ollama)** adds support for local models run using [Ollama](https://ollama.ai/).
- **[llm-llamafile](https://github.com/simonw/llm-llamafile)** adds support for local models that are running locally using [llamafile](https://github.com/Mozilla-Ocho/llamafile).
- **[llm-mlc](https://github.com/simonw/llm-mlc)** can run local models released by the [MLC project](https://mlc.ai/mlc-llm/), including models that can take advantage of the GPU on Apple Silicon M1/M2 devices.
- **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)** adds support for various models released by the [GPT4All](https://gpt4all.io/) project that are optimized to run locally on your own machine. These models include versions of Vicuna, Orca, Falcon and MPT - here's [a full list of models](https://observablehq.com/@simonw/gpt4all-models).
- **[llm-mpt30b](https://github.com/simonw/llm-mpt30b)** adds support for the [MPT-30B](https://huggingface.co/mosaicml/mpt-30b) local model.

(plugin-directory-remote-apis)=
## Remote APIs

These plugins can be used to interact with remotely hosted models via their API:

- **[llm-mistral](https://github.com/simonw/llm-mistral)** adds support for [Mistral AI](https://mistral.ai/)'s language and embedding models.
- **[llm-gemini](https://github.com/simonw/llm-gemini)** adds support for Google's [Gemini](https://ai.google.dev/docs) models.
- **[llm-anthropic](https://github.com/simonw/llm-anthropic)** supports Anthropic's [Claude 3 family](https://www.anthropic.com/news/claude-3-family), [3.5 Sonnet](https://www.anthropic.com/news/claude-3-5-sonnet) and beyond.
- **[llm-command-r](https://github.com/simonw/llm-command-r)** supports Cohere's Command R and [Command R Plus](https://txt.cohere.com/command-r-plus-microsoft-azure/) API models.
- **[llm-reka](https://github.com/simonw/llm-reka)** supports the [Reka](https://www.reka.ai/) family of models via their API.
- **[llm-perplexity](https://github.com/hex/llm-perplexity)** by Alexandru Geana supports the [Perplexity Labs](https://docs.perplexity.ai/) API models, including `llama-3-sonar-large-32k-online` which can search for things online and `llama-3-70b-instruct`.
- **[llm-groq](https://github.com/angerman/llm-groq)** by Moritz Angermann provides access to fast models hosted by [Groq](https://console.groq.com/docs/models).
- **[llm-grok](https://github.com/Hiepler/llm-grok)** by Benedikt Hiepler providing access to Grok model using the xAI API [Grok](https://x.ai/api).
- **[llm-anyscale-endpoints](https://github.com/simonw/llm-anyscale-endpoints)** supports models hosted on the [Anyscale Endpoints](https://app.endpoints.anyscale.com/) platform, including Llama 2 70B.
- **[llm-replicate](https://github.com/simonw/llm-replicate)** adds support for remote models hosted on [Replicate](https://replicate.com/), including Llama 2 from Meta AI.
- **[llm-fireworks](https://github.com/simonw/llm-fireworks)** supports models hosted by [Fireworks AI](https://fireworks.ai/).
- **[llm-openrouter](https://github.com/simonw/llm-openrouter)** provides access to models hosted on [OpenRouter](https://openrouter.ai/).
- **[llm-cohere](https://github.com/Accudio/llm-cohere)** by Alistair Shepherd provides `cohere-generate` and `cohere-summarize` API models, powered by [Cohere](https://cohere.com/).
- **[llm-bedrock](https://github.com/simonw/llm-bedrock)** adds support for Nova by Amazon via Amazon Bedrock.
- **[llm-bedrock-anthropic](https://github.com/sblakey/llm-bedrock-anthropic)** by Sean Blakey adds support for Claude and Claude Instant by Anthropic via Amazon Bedrock.
- **[llm-bedrock-meta](https://github.com/flabat/llm-bedrock-meta)** by Fabian Labat adds support for Llama 2 and Llama 3 by Meta via Amazon Bedrock.
- **[llm-together](https://github.com/wearedevx/llm-together)** adds support for the [Together AI](https://www.together.ai/) extensive family of hosted openly licensed models.
- **[llm-deepseek](https://github.com/abrasumente233/llm-deepseek)** adds support for the [DeepSeek](https://deepseek.com)'s DeepSeek-Chat and DeepSeek-Coder models.
- **[llm-lambda-labs](https://github.com/simonw/llm-lambda-labs)** provides access to models hosted by [Lambda Labs](https://docs.lambdalabs.com/public-cloud/lambda-chat-api/), including the Nous Hermes 3 series.
- **[llm-venice](https://github.com/ar-jan/llm-venice)** provides access to uncensored models hosted by privacy-focused [Venice AI](https://docs.venice.ai/), including Llama 3.1 405B.

If an API model host provides an OpenAI-compatible API you can also [configure LLM to talk to it](https://llm.datasette.io/en/stable/other-models.html#openai-compatible-models) without needing an extra plugin.

(plugin-directory-tools)=
## Tools

The following plugins add new {ref}`tools <tools>` that can be used by models:

- **[llm-tools-simpleeval](https://github.com/simonw/llm-tools-simpleeval)** implements simple expression support for things like mathematics.
- **[llm-tools-quickjs](https://github.com/simonw/llm-tools-quickjs)** provides access to a sandboxed QuickJS JavaScript interpreter, allowing LLMs to run JavaScript code. The environment persists between calls so the model can set variables and build functions and reuse them later on.
- **[llm-tools-sqlite](https://github.com/simonw/llm-tools-sqlite)** can run read-only SQL queries against local SQLite databases.
- **[llm-tools-datasette](https://github.com/simonw/llm-tools-datasette)** can run SQL queries against a remote [Datasette](https://datasette.io/) instance.
- **[llm-tools-exa](https://github.com/daturkel/llm-tools-exa)** by Dan Turkel can perform web searches and question-answering using [exa.ai](https://exa.ai/).
- **[llm-tools-rag](https://github.com/daturkel/llm-tools-rag)** by Dan Turkel can perform searches over your LLM embedding collections for simple RAG.

(plugin-directory-loaders)=
## Fragments and template loaders

{ref}`LLM 0.24 <v0_24>` introduced support for plugins that define `-f prefix:value` or `-t prefix:value` custom loaders for fragments and templates.

- **[llm-video-frames](https://github.com/simonw/llm-video-frames)** uses `ffmpeg` to turn a video into a sequence of JPEG frames suitable for feeding into a vision model that doesn't support video inputs: `llm -f video-frames:video.mp4 'describe the key scenes in this video'`.
- **[llm-templates-github](https://github.com/simonw/llm-templates-github)** supports loading templates shared on GitHub, e.g. `llm -t gh:simonw/pelican-svg`.
- **[llm-templates-fabric](https://github.com/simonw/llm-templates-fabric)** provides access to the [Fabric](https://github.com/danielmiessler/fabric) collection of prompts: `cat setup.py | llm -t fabric:explain_code`.
- **[llm-fragments-github](https://github.com/simonw/llm-fragments-github)** can load entire GitHub repositories in a single operation: `llm -f github:simonw/files-to-prompt 'explain this code'`. It can also fetch issue threads as Markdown using `llm -f issue:https://github.com/simonw/llm-fragments-github/issues/3`.
- **[llm-hacker-news](https://github.com/simonw/llm-hacker-news)** imports conversations from Hacker News as fragments: `llm -f hn:43615912 'summary with illustrative direct quotes'`.
- **[llm-fragments-pypi](https://github.com/samueldg/llm-fragments-pypi)** loads [PyPI](https://pypi.org/) packages' description and metadata as fragments: `llm -f pypi:ruff "What flake8 plugins does ruff re-implement?"`.
- **[llm-fragments-pdf](https://github.com/daturkel/llm-fragments-pdf)** by Dan Turkel converts PDFs to markdown with [PyMuPDF4LLM](https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/index.html) to use as fragments: `llm -f pdf:something.pdf "what's this about?"`.
- **[llm-fragments-site-text](https://github.com/daturkel/llm-fragments-site-text)** by Dan Turkel converts websites to markdown with [Trafilatura](https://trafilatura.readthedocs.io/en/latest/) to use as fragments: `llm -f site:https://example.com "summarize this"`.
- **[llm-fragments-reader](https://github.com/simonw/llm-fragments-reader)** runs a URL theough the Jina Reader API: `llm -f 'reader:https://simonwillison.net/tags/jina/' summary`.

(plugin-directory-embeddings)=
## Embedding models

{ref}`Embedding models <embeddings>` are models that can be used to generate and store embedding vectors for text.

- **[llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers)** adds support for embeddings using the [sentence-transformers](https://www.sbert.net/) library, which provides access to [a wide range](https://www.sbert.net/docs/pretrained_models.html) of embedding models.
- **[llm-clip](https://github.com/simonw/llm-clip)** provides the [CLIP](https://openai.com/research/clip) model, which can be used to embed images and text in the same vector space, enabling text search against images. See [Build an image search engine with llm-clip](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/) for more on this plugin.
- **[llm-embed-jina](https://github.com/simonw/llm-embed-jina)** provides Jina AI's [8K text embedding models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/).
- **[llm-embed-onnx](https://github.com/simonw/llm-embed-onnx)** provides seven embedding models that can be executed using the ONNX model framework.

(plugin-directory-commands)=
## Extra commands

- **[llm-cmd](https://github.com/simonw/llm-cmd)** accepts a prompt for a shell command, runs that prompt and populates the result in your shell so you can review it, edit it and then hit `<enter>` to execute or `ctrl+c` to cancel.
- **[llm-cmd-comp](https://github.com/CGamesPlay/llm-cmd-comp)** provides a key binding for your shell that will launch a chat to build the command. When ready, hit `<enter>` and it will go right back into your shell command line, so you can run it.
- **[llm-python](https://github.com/simonw/llm-python)** adds a `llm python` command for running a Python interpreter in the same virtual environment as LLM. This is useful for debugging, and also provides a convenient way to interact with the LLM {ref}`python-api` if you installed LLM using Homebrew or `pipx`.
- **[llm-cluster](https://github.com/simonw/llm-cluster)** adds a `llm cluster` command for calculating clusters for a collection of embeddings. Calculated clusters can then be passed to a Large Language Model to generate a summary description.
- **[llm-jq](https://github.com/simonw/llm-jq)** lets you pipe in JSON data and a prompt describing a `jq` program, then executes the generated program against the JSON.

(plugin-directory-fun)=
## Just for fun

- **[llm-markov](https://github.com/simonw/llm-markov)** adds a simple model that generates output using a [Markov chain](https://en.wikipedia.org/wiki/Markov_chain). This example is used in the tutorial [Writing a plugin to support a new model](https://llm.datasette.io/en/latest/plugins/tutorial-model-plugin.html).


================================================
FILE: docs/plugins/index.md
================================================
(plugins)=
# Plugins

LLM plugins can enhance LLM by making alternative Large Language Models available, either via API or by running the models locally on your machine.

Plugins can also add new commands to the `llm` CLI tool.

The {ref}`plugin directory <plugin-directory>` lists available plugins that you can install and use.

{ref}`tutorial-model-plugin` describes how to build a new plugin in detail.

```{toctree}
---
maxdepth: 3
---
installing-plugins
directory
plugin-hooks
tutorial-model-plugin
advanced-model-plugins
plugin-utilities
```


================================================
FILE: docs/plugins/installing-plugins.md
================================================
(installing-plugins)=
# Installing plugins

Plugins must be installed in the same virtual environment as LLM itself.

You can find names of plugins to install in the {ref}`plugin directory <plugin-directory>`

Use the `llm install` command (a thin wrapper around `pip install`) to install plugins in the correct environment:
```bash
llm install llm-gpt4all
```
Plugins can be uninstalled with `llm uninstall`:
```bash
llm uninstall llm-gpt4all -y
```
The `-y` flag skips asking for confirmation.

You can see additional models that have been added by plugins by running:
```bash
llm models
```
Or add `--options` to include details of the options available for each model:
```bash
llm models --options
```
To run a prompt against a newly installed model, pass its name as the `-m/--model` option:
```bash
llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?'
```

## Listing installed plugins

Run `llm plugins` to list installed plugins:

```bash
llm plugins
```
```json
[
  {
    "name": "llm-anthropic",
    "hooks": [
      "register_models"
    ],
    "version": "0.11"
  },
  {
    "name": "llm-gguf",
    "hooks": [
      "register_commands",
      "register_models"
    ],
    "version": "0.1a0"
  },
  {
    "name": "llm-clip",
    "hooks": [
      "register_commands",
      "register_embedding_models"
    ],
    "version": "0.1"
  },
  {
    "name": "llm-cmd",
    "hooks": [
      "register_commands"
    ],
    "version": "0.2a0"
  },
  {
    "name": "llm-gemini",
    "hooks": [
      "register_embedding_models",
      "register_models"
    ],
    "version": "0.3"
  }
]
```

(llm-load-plugins)=
## Running with a subset of plugins

By default, LLM will load all plugins that are installed in the same virtual environment as LLM itself.

You can control the set of plugins that is loaded using the `LLM_LOAD_PLUGINS` environment variable.

Set that to the empty string to disable all plugins:

```bash
LLM_LOAD_PLUGINS='' llm ...
```
Or to a comma-separated list of plugin names to load only those plugins:

```bash
LLM_LOAD_PLUGINS='llm-gpt4all,llm-cluster' llm ...
```
You can use the `llm plugins` command to check that it is working correctly:
```
LLM_LOAD_PLUGINS='' llm plugins
```


================================================
FILE: docs/plugins/llm-markov/llm_markov.py
================================================
import llm
import random
import time
from typing import Optional
from pydantic import field_validator, Field


@llm.hookimpl
def register_models(register):
    register(Markov())


def build_markov_table(text):
    words = text.split()
    transitions = {}
    # Loop through all but the last word
    for i in range(len(words) - 1):
        word = words[i]
        next_word = words[i + 1]
        transitions.setdefault(word, []).append(next_word)
    return transitions


def generate(transitions, length, start_word=None):
    all_words = list(transitions.keys())
    next_word = start_word or random.choice(all_words)
    for i in range(length):
        yield next_word
        options = transitions.get(next_word) or all_words
        next_word = random.choice(options)


class Markov(llm.Model):
    model_id = "markov"
    can_stream = True

    class Options(llm.Options):
        length: Optional[int] = Field(
            description="Number of words to generate", default=None
        )
        delay: Optional[float] = Field(
            description="Seconds to delay between each token", default=None
        )

        @field_validator("length")
        def validate_length(cls, length):
            if length is None:
                return None
            if length < 2:
                raise ValueError("length must be >= 2")
            return length

        @field_validator("delay")
        def validate_delay(cls, delay):
            if delay is None:
                return None
            if not 0 <= delay <= 10:
                raise ValueError("delay must be between 0 and 10")
            return delay

    def execute(self, prompt, stream, response, conversation):
        text = prompt.prompt
        transitions = build_markov_table(text)
        length = prompt.options.length or 20
        for word in generate(transitions, length):
            yield word + " "
            if prompt.options.delay:
                time.sleep(prompt.options.delay)


================================================
FILE: docs/plugins/llm-markov/pyproject.toml
================================================
[project]
name = "llm-markov"
version = "0.1"

[project.entry-points.llm]
markov = "llm_markov"

================================================
FILE: docs/plugins/plugin-hooks.md
================================================
(plugin-hooks)=
# Plugin hooks

Plugins use **plugin hooks** to customize LLM's behavior. These hooks are powered by the [Pluggy plugin system](https://pluggy.readthedocs.io/).

Each plugin can implement one or more hooks using the @hookimpl decorator against one of the hook function names described on this page.

LLM imitates the Datasette plugin system. The [Datasette plugin documentation](https://docs.datasette.io/en/stable/writing_plugins.html) describes how plugins work.

(plugin-hooks-register-commands)=
## register_commands(cli)

This hook adds new commands to the `llm` CLI tool - for example `llm extra-command`.

This example plugin adds a new `hello-world` command that prints "Hello world!":

```python
from llm import hookimpl
import click

@hookimpl
def register_commands(cli):
    @cli.command(name="hello-world")
    def hello_world():
        "Print hello world"
        click.echo("Hello world!")
```
This new command will be added to `llm --help` and can be run using `llm hello-world`.

(plugin-hooks-register-models)=
## register_models(register)

This hook can be used to register one or more additional models.

```python
import llm

@llm.hookimpl
def register_models(register):
    register(HelloWorld())

class HelloWorld(llm.Model):
    model_id = "helloworld"

    def execute(self, prompt, stream, response):
        return ["hello world"]
```
If your model includes an async version, you can register that too:

```python
class AsyncHelloWorld(llm.AsyncModel):
    model_id = "helloworld"

    async def execute(self, prompt, stream, response):
        return ["hello world"]

@llm.hookimpl
def register_models(register):
    register(HelloWorld(), AsyncHelloWorld(), aliases=("hw",))
```
This demonstrates how to register a model with both sync and async versions, and how to specify an alias for that model.

The {ref}`model plugin tutorial <tutorial-model-plugin>` describes how to use this hook in detail. Asynchronous models {ref}`are described here <advanced-model-plugins-async>`.

(plugin-hooks-register-embedding-models)=
## register_embedding_models(register)

This hook can be used to register one or more additional embedding models, as described in {ref}`embeddings-writing-plugins`.

```python
import llm

@llm.hookimpl
def register_embedding_models(register):
    register(HelloWorld())

class HelloWorld(llm.EmbeddingModel):
    model_id = "helloworld"

    def embed_batch(self, items):
        return [[1, 2, 3], [4, 5, 6]]
```

(plugin-hooks-register-tools)=
## register_tools(register)

This hook can register one or more tool functions for use with LLM. See {ref}`the tools documentation <tools>` for more details.

This example registers two tools: `upper` and `count_character_in_word`.

```python
import llm

def upper(text: str) -> str:
    """Convert text to uppercase."""
    return text.upper()

def count_char(text: str, character: str) -> int:
    """Count the number of occurrences of a character in a word."""
    return text.count(character)

@llm.hookimpl
def register_tools(register):
    register(upper)
    # Here the name= argument is used to specify a different name for the tool:
    register(count_char, name="count_character_in_word")
```

Tools can also be implemented as classes, as described in {ref}`Toolbox classes <python-api-toolbox>` in the Python API documentation.

You can register classes like the `Memory` example {ref}`from here <python-api-toolbox>` by passing the class (_not_ an instance of the class) to `register()`:

```python
import llm

class Memory(llm.Toolbox):
    # Copy implementation from the Python API documentation

@llm.hookimpl
def register_tools(register):
    register(Memory)
```
Once installed, this tool can be used like so:

```bash
llm chat -T Memory
```
If a tool name starts with a capital letter it is assumed to be a toolbox class, not a regular tool function.

Here's an example session with the Memory tool:
```
Chatting with gpt-4.1-mini
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> Remember my name is Henry

Tool call: Memory_set({'key': 'user_name', 'value': 'Henry'})
  null

Got it, Henry! I'll remember your name. How can I assist you today?
> what keys are there?

Tool call: Memory_keys({})
  [
    "user_name"
  ]

Currently, there is one key stored: "user_name". Would you like to add or retrieve any information?
> read it

Tool call: Memory_get({'key': 'user_name'})
  Henry

The value stored under the key "user_name" is Henry. Is there anything else you'd like to do?
> add Barrett to it

Tool call: Memory_append({'key': 'user_name', 'value': 'Barrett'})
  null

I have added "Barrett" to the key "user_name". If you want, I can now show you the updated value.
> show value

Tool call: Memory_get({'key': 'user_name'})
  Henry
  Barrett

The value stored under the key "user_name" is now:
Henry
Barrett

Is there anything else you would like to do?
```

(plugin-hooks-register-template-loaders)=
## register_template_loaders(register)

Plugins can register new {ref}`template loaders <prompt-templates-loaders>` using the `register_template_loaders` hook.

Template loaders work with the `llm -t prefix:name` syntax. The prefix specifies the loader, then the registered loader function is called with the name as an argument. The loader function should return an `llm.Template()` object.

This example plugin registers `my-prefix` as a new template loader. Once installed it can be used like this:

```bash
llm -t my-prefix:my-template
```
Here's the Python code:

```python
import llm

@llm.hookimpl
def register_template_loaders(register):
    register("my-prefix", my_template_loader)

def my_template_loader(template_path: str) -> llm.Template:
    """
    Documentation for the template loader goes here. It will be displayed
    when users run the 'llm templates loaders' command.
    """
    try:
        # Your logic to fetch the template content
        # This is just an example:
        prompt = "This is a sample prompt for {}".format(template_path)
        system = "You are an assistant specialized in {}".format(template_path)

        # Return a Template object with the required fields
        return llm.Template(
            name=template_path,
            prompt=prompt,
            system=system,
        )
    except Exception as e:
        # Raise a ValueError with a clear message if the template cannot be found
        raise ValueError(f"Template '{template_path}' could not be loaded: {str(e)}")
```
The `llm.Template` class has the following constructor:

```{eval-rst}
.. autoclass:: llm.Template
```

The loader function should raise a `ValueError` if the template cannot be found or loaded correctly, providing a clear error message.

Note that `functions:` provided by templates using this plugin hook will not be made available, to avoid the risk of plugin hooks that load templates from remote sources introducing arbitrary code execution vulnerabilities.

(plugin-hooks-register-fragment-loaders)=
## register_fragment_loaders(register)

Plugins can register new fragment loaders using the `register_template_loaders` hook. These can then be used with the `llm -f prefix:argument` syntax.

Fragment loader plugins differ from template loader plugins in that you can stack more than one fragment loader call together in the same prompt.

A fragment loader can return one or more string fragments or attachments, or a mixture of the two. The fragments will be concatenated together into the prompt string, while any attachments will be added to the list of attachments to be sent to the model.

The `prefix` specifies the loader. The `argument` will be passed to that registered callback..

The callback works in a very similar way to template loaders, but returns either a single `llm.Fragment`, a list of `llm.Fragment` objects, a single `llm.Attachment`, or a list that can mix `llm.Attachment` and `llm.Fragment` objects.

The `llm.Fragment` constructor takes a required string argument (the content of the fragment) and an optional second `source` argument, which is a string that may be displayed as debug information. For files this is a path and for URLs it is a URL. Your plugin can use anything you like for the `source` value.

See {ref}`the Python API documentation for attachments <python-api-attachments>` for details of the `llm.Attachment` class.

Here is some example code:

```python
import llm

@llm.hookimpl
def register_fragment_loaders(register):
    register("my-fragments", my_fragment_loader)


def my_fragment_loader(argument: str) -> llm.Fragment:
    """
    Documentation for the fragment loader goes here. It will be displayed
    when users run the 'llm fragments loaders' command.
    """
    try:
        fragment = "Fragment content for {}".format(argument)
        source = "my-fragments:{}".format(argument)
        return llm.Fragment(fragment, source)
    except Exception as ex:
        # Raise a ValueError with a clear message if the fragment cannot be loaded
        raise ValueError(
            f"Fragment 'my-fragments:{argument}' could not be loaded: {str(ex)}"
        )

# Or for the case where you want to return multiple fragments and attachments:
def my_fragment_loader(argument: str) -> list[llm.Fragment]:
    "Docs go here."
    return [
        llm.Fragment("Fragment 1 content", "my-fragments:{argument}"),
        llm.Fragment("Fragment 2 content", "my-fragments:{argument}"),
        llm.Attachment(path="/path/to/image.png"),
    ]
```
A plugin like this one can be called like so:
```bash
llm -f my-fragments:argument
```
If multiple fragments are returned they will be used as if the user passed multiple `-f X` arguments to the command.

Multiple fragments are particularly useful for things like plugins that return every file in a directory. If these were concatenated together by the plugin, a change to a single file would invalidate the de-duplicatino cache for that whole fragment. Giving each file its own fragment means we can avoid storing multiple copies of that full collection if only a single file has changed.


================================================
FILE: docs/plugins/plugin-utilities.md
================================================
(plugin-utilities)=
# Utility functions for plugins

LLM provides some utility functions that may be useful to plugins.

(plugin-utilities-get-key)=
## llm.get_key()

This method can be used to look up secrets that users have stored using the {ref}`llm keys set <help-keys-set>` command. If your plugin needs to access an API key or other secret this can be a convenient way to provide that.

This returns either a string containing the key or `None` if the key could not be resolved.

Use the `alias="name"` option to retrieve the key set with that alias:

```python
github_key = llm.get_key(alias="github")
```
You can also add `env="ENV_VAR"` to fall back to looking in that environment variable if the key has not been configured:
```python
github_key = llm.get_key(alias="github", env="GITHUB_TOKEN")
```
In some cases you may allow users to provide a key as input, where they could input either the key itself or specify an alias to lookup in `keys.json`. Use the `input=` parameter for that:

```python
github_key = llm.get_key(input=input_from_user, alias="github", env="GITHUB_TOKEN")
```

An previous version of function used positional arguments in a confusing order. These are still supported but the new keyword arguments are recommended as a better way to use `llm.get_key()` going forward.

(plugin-utilities-user-dir)=
## llm.user_dir()

LLM stores various pieces of logging and configuration data in a directory on the user's machine.

On macOS this directory is `~/Library/Application Support/io.datasette.llm`, but this will differ on other operating systems.

The `llm.user_dir()` function returns the path to this directory as a `pathlib.Path` object, after creating that directory if it does not yet exist.

Plugins can use this to store their own data in a subdirectory of this directory.

```python
import llm
user_dir = llm.user_dir()
plugin_dir = data_path = user_dir / "my-plugin"
plugin_dir.mkdir(exist_ok=True)
data_path = plugin_dir / "plugin-data.db"
```

(plugin-utilities-modelerror)=
## llm.ModelError

If your model encounters an error that should be reported to the user you can raise this exception. For example:

```python
import llm

raise ModelError("MPT model not installed - try running 'llm mpt30b download'")
```
This will be caught by the CLI layer and displayed to the user as an error message.

(plugin-utilities-response-fake)=
## Response.fake()

When writing tests for a model it can be useful to generate fake response objects, for example in this test from [llm-mpt30b](https://github.com/simonw/llm-mpt30b):

```python
def test_build_prompt_conversation():
    model = llm.get_model("mpt")
    conversation = model.conversation()
    conversation.responses = [
        llm.Response.fake(model, "prompt 1", "system 1", "response 1"),
        llm.Response.fake(model, "prompt 2", None, "response 2"),
        llm.Response.fake(model, "prompt 3", None, "response 3"),
    ]
    lines = model.build_prompt(llm.Prompt("prompt 4", model), conversation)
    assert lines == [
        "<|im_start|>system\system 1<|im_end|>\n",
        "<|im_start|>user\nprompt 1<|im_end|>\n",
        "<|im_start|>assistant\nresponse 1<|im_end|>\n",
        "<|im_start|>user\nprompt 2<|im_end|>\n",
        "<|im_start|>assistant\nresponse 2<|im_end|>\n",
        "<|im_start|>user\nprompt 3<|im_end|>\n",
        "<|im_start|>assistant\nresponse 3<|im_end|>\n",
        "<|im_start|>user\nprompt 4<|im_end|>\n",
        "<|im_start|>assistant\n",
    ]
```
The signature of `llm.Response.fake()` is:

```python
def fake(cls, model: Model, prompt: str, system: str, response: str):
```


================================================
FILE: docs/plugins/tutorial-model-plugin.md
================================================
(tutorial-model-plugin)=

# Developing a model plugin

This tutorial will walk you through developing a new plugin for LLM that adds support for a new Large Language Model.

We will be developing a plugin that implements a simple [Markov chain](https://en.wikipedia.org/wiki/Markov_chain) to generate words based on an input string. Markov chains are not technically large language models, but they provide a useful exercise for demonstrating how the LLM tool can be extended through plugins.

(tutorial-model-plugin-initial)=

## The initial structure of the plugin

First create a new directory with the name of your plugin - it should be called something like `llm-markov`.
```bash
mkdir llm-markov
cd llm-markov
```
In that directory create a file called `llm_markov.py` containing this:

```python
import llm

@llm.hookimpl
def register_models(register):
    register(Markov())

class Markov(llm.Model):
    model_id = "markov"

    def execute(self, prompt, stream, response, conversation):
        return ["hello world"]
```

The `def register_models()` function here is called by the plugin system (thanks to the `@hookimpl` decorator). It uses the `register()` function passed to it to register an instance of the new model.

The `Markov` class implements the model. It sets a `model_id` - an identifier that can be passed to `llm -m` in order to identify the model to be executed.

The logic for executing the model goes in the `execute()` method. We'll extend this to do something more useful in a later step.

Next, create a `pyproject.toml` file. This is necessary to tell LLM how to load your plugin:

```toml
[project]
name = "llm-markov"
version = "0.1"

[project.entry-points.llm]
markov = "llm_markov"
```

This is the simplest possible configuration. It defines a plugin name and provides an [entry point](https://setuptools.pypa.io/en/latest/userguide/entry_point.html) for `llm` telling it how to load the plugin.

If you are comfortable with Python virtual environments you can create one now for your project, activate it and run `pip install llm` before the next step.

If you aren't familiar with virtual environments, don't worry: you can develop plugins without them. You'll need to have LLM installed using Homebrew or `pipx` or one of the [other installation options](https://llm.datasette.io/en/latest/setup.html#installation).

(tutorial-model-plugin-installing)=

## Installing your plugin to try it out

Having created a directory with a `pyproject.toml` file and an `llm_markov.py` file, you can install your plugin into LLM by running this from inside your `llm-markov` directory:

```bash
llm install -e .
```

The `-e` stands for "editable" - it means you'll be able to make further changes to the `llm_markov.py` file that will be reflected without you having to reinstall the plugin.

The `.` means the current directory. You can also install editable plugins by passing a path to their directory this:
```bash
llm install -e path/to/llm-markov
```
To confirm that your plugin has installed correctly, run this command:
```bash
llm plugins
```
The output should look like this:
```json
[
  {
    "name": "llm-markov",
    "hooks": [
      "register_models"
    ],
    "version": "0.1"
  },
  {
    "name": "llm.default_plugins.openai_models",
    "hooks": [
      "register_commands",
      "register_models"
    ]
  }
]
```
This command lists default plugins that are included with LLM as well as new plugins that have been installed.

Now let's try the plugin by running a prompt through it:
```bash
llm -m markov "the cat sat on the mat"
```
It outputs:
```
hello world
```
Next, we'll make it execute and return the results of a Markov chain.

(tutorial-model-plugin-building)=

## Building the Markov chain

Markov chains can be thought of as the simplest possible example of a generative language model. They work by building an index of words that have been seen following other words.

Here's what that index looks like for the phrase "the cat sat on the mat"
```json
{
  "the": ["cat", "mat"],
  "cat": ["sat"],
  "sat": ["on"],
  "on": ["the"]
}
```
Here's a Python function that builds that data structure from a text input:
```python
def build_markov_table(text):
    words = text.split()
    transitions = {}
    # Loop through all but the last word
    for i in range(len(words) - 1):
        word = words[i]
        next_word = words[i + 1]
        transitions.setdefault(word, []).append(next_word)
    return transitions
```
We can try that out by pasting it into the interactive Python interpreter and running this:
```pycon
>>> transitions = build_markov_table("the cat sat on the mat")
>>> transitions
{'the': ['cat', 'mat'], 'cat': ['sat'], 'sat': ['on'], 'on': ['the']}
```

(tutorial-model-plugin-executing)=

## Executing the Markov chain

To execute the model, we start with a word. We look at the options for words that might come next and pick one of those at random. Then we repeat that process until we have produced the desired number of output words.

Some words might not have any following words from our training sentence. For our implementation we will fall back on picking a random word from our collection.

We will implement this as a [Python generator](https://realpython.com/introduction-to-python-generators/), using the yield keyword to produce each token:
```python
def generate(transitions, length, start_word=None):
    all_words = list(transitions.keys())
    next_word = start_word or random.choice(all_words)
    for i in range(length):
        yield next_word
        options = transitions.get(next_word) or all_words
        next_word = random.choice(options)
```
If you aren't familiar with generators, the above code could also be implemented like this - creating a Python list and returning it at the end of the function:
```python
def generate_list(transitions, length, start_word=None):
    all_words = list(transitions.keys())
    next_word = start_word or random.choice(all_words)
    output = []
    for i in range(length):
        output.append(next_word)
        options = transitions.get(next_word) or all_words
        next_word = random.choice(options)
    return output
```
You can try out the `generate()` function like this:
```python
lookup = build_markov_table("the cat sat on the mat")
for word in generate(transitions, 20):
    print(word)
```
Or you can generate a full string sentence with it like this:
```python
sentence = " ".join(generate(transitions, 20))
```

(tutorial-model-plugin-register)=

## Adding that to the plugin

Our `execute()` method from earlier currently returns the list `["hello world"]`.

Update that to use our new Markov chain generator instead. Here's the full text of the new `llm_markov.py` file:

```python
import llm
import random

@llm.hookimpl
def register_models(register):
    register(Markov())

def build_markov_table(text):
    words = text.split()
    transitions = {}
    # Loop through all but the last word
    for i in range(len(words) - 1):
        word = words[i]
        next_word = words[i + 1]
        transitions.setdefault(word, []).append(next_word)
    return transitions

def generate(transitions, length, start_word=None):
    all_words = list(transitions.keys())
    next_word = start_word or random.choice(all_words)
    for i in range(length):
        yield next_word
        options = transitions.get(next_word) or all_words
        next_word = random.choice(options)

class Markov(llm.Model):
    model_id = "markov"

    def execute(self, prompt, stream, response, conversation):
        text = prompt.prompt
        transitions = build_markov_table(text)
        for word in generate(transitions, 20):
            yield word + ' '
```
The `execute()` method can access the text prompt that the user provided using` prompt.prompt` - `prompt` is a `Prompt` object that might include other more advanced input details as well.

Now when you run this you should see the output of the Markov chain!
```bash
llm -m markov "the cat sat on the mat"
```
```
the mat the cat sat on the cat sat on the mat cat sat on the mat cat sat on
```

 (tutorial-model-plugin-execute)=

## Understanding execute()

The full signature of the `execute()` method is:
```python
def execute(self, prompt, stream, response, conversation):
```
The `prompt` argument is a `Prompt` object that contains the text that the user provided, the system prompt and the provided options.

`stream` is a boolean that says if the model is being run in streaming mode.

`response` is the `Response` object that is being created by the model. This is provided so you can write additional information to `response.response_json`, which may be logged to the database.

`conversation` is the `Conversation` that the prompt is a part of - or `None` if no conversation was provided. Some models may use `conversation.responses` to access previous prompts and responses in the conversation and use them to construct a call to the LLM that includes previous context.

(tutorial-model-plugin-logging)=

## Prompts and responses are logged to the database

The prompt and the response will be logged to a SQLite database automatically by LLM. You can see the single most recent addition to the logs using:
```
llm logs -n 1
```
The output should look something like this:
```json
[
  {
    "id": "01h52s4yez2bd1qk2deq49wk8h",
    "model": "markov",
    "prompt": "the cat sat on the mat",
    "system": null,
    "prompt_json": null,
    "options_json": {},
    "response": "on the cat sat on the cat sat on the mat cat sat on the cat sat on the cat ",
    "response_json": null,
    "conversation_id": "01h52s4yey7zc5rjmczy3ft75g",
    "duration_ms": 0,
    "datetime_utc": "2023-07-11T15:29:34.685868",
    "conversation_name": "the cat sat on the mat",
    "conversation_model": "markov"
  }
]
```
Plugins can log additional information to the database by assigning a dictionary to the `response.response_json` property during the `execute()` method.

Here's how to include that full `transitions` table in the `response_json` in the log:
```python
    def execute(self, prompt, stream, response, conversation):
        text = self.prompt.prompt
        transitions = build_markov_table(text)
        for word in generate(transitions, 20):
            yield word + ' '
        response.response_json = {"transitions": transitions}
```

Now when you run the logs command you'll see that too:
```bash
llm logs -n 1
```
```json
[
  {
    "id": 623,
    "model": "markov",
    "prompt": "the cat sat on the mat",
    "system": null,
    "prompt_json": null,
    "options_json": {},
    "response": "on the mat the cat sat on the cat sat on the mat sat on the cat sat on the ",
    "response_json": {
      "transitions": {
        "the": [
          "cat",
          "mat"
        ],
        "cat": [
          "sat"
        ],
        "sat": [
          "on"
        ],
        "on": [
          "the"
        ]
      }
    },
    "reply_to_id": null,
    "chat_id": null,
    "duration_ms": 0,
    "datetime_utc": "2023-07-06T01:34:45.376637"
  }
]
```
In this particular case this isn't a great idea here though: the `transitions` table is duplicate information, since it can be reproduced from the input data - and it can get really large for longer prompts.

(tutorial-model-plugin-options)=

## Adding options

LLM models can take options. For large language models these can be things like `temperature` or `top_k`.

Options are passed using the `-o/--option` command line parameters, for example:
```bash
llm -m gpt4 "ten pet pelican names" -o temperature 1.5
```
We're going to add two options to our Markov chain model:

- `length`: Number of words to generate
- `delay`: a floating point number of Delay in between output token

The `delay` token will let us simulate a streaming language model, where tokens take time to generate and are returned by the `execute()` function as they become ready.

Options are defined using an inner class on the model, called `Options`. It should extend the `llm.Options` class.

First, add this import to the top of your `llm_markov.py` file:
```python
from typing import Optional
```
Then add this `Options` class to your model:
```python
class Markov(Model):
    model_id = "markov"

    class Options(llm.Options):
        length: Optional[int] = None
        delay: Optional[float] = None
```
Let's add extra validation rules to our options. Length must be at least 2. Duration must be between 0 and 10.

The `Options` class uses [Pydantic 2](https://pydantic.dev/), which can support all sorts of advanced validation rules.

We can also add inline documentation, which can then be displayed by the `llm models --options` command.

Add these imports to the top of `llm_markov.py`:
```python
from pydantic import field_validator, Field
```

We can now add Pydantic field validators for our two new rules, plus inline documentation:

```python
    class Options(llm.Options):
        length: Optional[int] = Field(
            description="Number of words to generate",
            default=None
        )
        delay: Optional[float] = Field(
            description="Seconds to delay between each token",
            default=None
        )

        @field_validator("length")
        def validate_length(cls, length):
            if length is None:
                return None
            if length < 2:
                raise ValueError("length must be >= 2")
            return length

        @field_validator("delay")
        def validate_delay(cls, delay):
            if delay is None:
                return None
            if not 0 <= delay <= 10:
                raise ValueError("delay must be between 0 and 10")
            return delay
```
Lets test our options validation:
```bash
llm -m markov "the cat sat on the mat" -o length -1
```
```
Error: length
  Value error, length must be >= 2
```

Next, we will modify our `execute()` method to handle those options. Add this to the beginning of `llm_markov.py`:
```python
import time
```
Then replace the `execute()` method with this one:
```python
    def execute(self, prompt, stream, response, conversation):
        text = prompt.prompt
        transitions = build_markov_table(text)
        length = prompt.options.length or 20
        for word in generate(transitions, length):
            yield word + ' '
            if prompt.options.delay:
                time.sleep(prompt.options.delay)
```
Add `can_stream = True` to the top of the `Markov` model class, on the line below `model_id = "markov". This tells LLM that the model is able to stream content to the console.

The full `llm_markov.py` file should now look like this:

```{literalinclude} llm-markov/llm_markov.py
:language: python
```

Now we can request a 20 word completion with a 0.1s delay between tokens like this:
```bash
llm -m markov "the cat sat on the mat" \
  -o length 20 -o delay 0.1
```
LLM provides a `--no-stream` option users can use to turn off streaming. Using that option causes LLM to gather the response from the stream and then return it to the console in one block. You can try that like this:
```bash
llm -m markov "the cat sat on the mat" \
  -o length 20 -o delay 0.1 --no-stream
```
In this case it will still delay for 2s total while it gathers the tokens, then output them all at once.

That `--no-stream` option causes the `stream` argument passed to `execute()` to be false. Your `execute()` method can then behave differently depending on whether it is streaming or not.

Options are also logged to the database. You can see those here:
```bash
llm logs -n 1
```
```json
[
  {
    "id": 636,
    "model": "markov",
    "prompt": "the cat sat on the mat",
    "system": null,
    "prompt_json": null,
    "options_json": {
      "length": 20,
      "delay": 0.1
    },
    "response": "the mat on the mat on the cat sat on the mat sat on the mat cat sat on the ",
    "response_json": null,
    "reply_to_id": null,
    "chat_id": null,
    "duration_ms": 2063,
    "datetime_utc": "2023-07-07T03:02:28.232970"
  }
]
```

(tutorial-model-plugin-distributing)=

## Distributing your plugin

There are many different options for distributing your new plugin so other people can try it out.

You can create a downloadable wheel or `.zip` or `.tar.gz` files, or share the plugin through GitHub Gists or repositories.

You can also publish your plugin to PyPI, the Python Package Index.

(tutorial-model-plugin-wheels)=

### Wheels and sdist packages

The easiest option is to produce a distributable package is to use the `build` command. First, install the `build` package by running this:
```bash
python -m pip install build
```
Then run `build` in your plugin directory to create the packages:
```bash
python -m build
```
This will create two files: `dist/llm-markov-0.1.tar.gz` and `dist/llm-markov-0.1-py3-none-any.whl`.

Either of these files can be used to install the plugin:

```bash
llm install dist/llm_markov-0.1-py3-none-any.whl
```
If you host this file somewhere online other people will be able to install it using `pip install` against the URL to your package:
```bash
llm install 'https://.../llm_markov-0.1-py3-none-any.whl'
```
You can run the following command at any time to uninstall your plugin, which is useful for testing out different installation methods:
```bash
llm uninstall llm-markov -y
```

(tutorial-model-plugin-gists)=

### GitHub Gists

A neat quick option for distributing a simple plugin is to host it in a GitHub Gist. These are available for free with a GitHub account, and can be public or private. Gists can contain multiple files but don't support directory structures - which is OK, because our plugin is just two files, `pyproject.toml` and `llm_markov.py`.

Here's an example Gist I created for this tutorial:

[https://gist.github.com/simonw/6e56d48dc2599bffba963cef0db27b6d](https://gist.github.com/simonw/6e56d48dc2599bffba963cef0db27b6d)

You can turn a Gist into an installable `.zip` URL by right-clicking on the "Download ZIP" button and selecting "Copy Link". Here's that link for my example Gist:

`https://gist.github.com/simonw/6e56d48dc2599bffba963cef0db27b6d/archive/cc50c854414cb4deab3e3ab17e7e1e07d45cba0c.zip`

The plugin can be installed using the `llm install` command like this:
```bash
llm install 'https://gist.github.com/simonw/6e56d48dc2599bffba963cef0db27b6d/archive/cc50c854414cb4deab3e3ab17e7e1e07d45cba0c.zip'
```

(tutorial-model-plugin-github)=

## GitHub repositories

The same trick works for regular GitHub repositories as well: the "Download ZIP" button can be found by clicking the green "Code" button at the top of the repository. The URL which that provides can then be used to install the plugin that lives in that repository.

(tutorial-model-plugin-pypi)=

## Publishing plugins to PyPI

The [Python Package Index (PyPI)](https://pypi.org/) is the official repository for Python packages. You can upload your plugin to PyPI and reserve a name for it - once you have done that, anyone will be able to install your plugin using `llm install <name>`.

Follow [these instructions](https://packaging.python.org/en/latest/tutorials/packaging-projects/#uploading-the-distribution-archives) to publish a package to PyPI. The short version:
```bash
python -m pip install twine
python -m twine upload dist/*
```
You will need an account on PyPI, then you can enter your username and password - or create a token in the PyPI settings and use `__token__` as the username and the token as the password.

(tutorial-model-plugin-metadata)=

## Adding metadata

Before uploading a package to PyPI it's a good idea to add documentation and expand `pyproject.toml` with additional metadata.

Create a `README.md` file in the root of your plugin directory with instructions about how to install, configure and use your plugin.

You can then replace `pyproject.toml` with something like this:

```toml
[project]
name = "llm-markov"
version = "0.1"
description = "Plugin for LLM adding a Markov chain generating model"
readme = "README.md"
authors = [{name = "Simon Willison"}]
license = {text = "Apache-2.0"}
classifiers = [
    "License :: OSI Approved :: Apache Software License"
]
dependencies = [
    "llm"
]
requires-python = ">3.7"

[project.urls]
Homepage = "https://github.com/simonw/llm-markov"
Changelog = "https://github.com/simonw/llm-markov/releases"
Issues = "https://github.com/simonw/llm-markov/issues"

[project.entry-points.llm]
markov = "llm_markov"
```
This will pull in your README to be displayed as part of your project's listing page on PyPI.

It adds `llm` as a dependency, ensuring it will be installed if someone tries to install your plugin package without it.

It adds some links to useful pages (you can drop the `project.urls` section if those links are not useful for your project).

You should drop a `LICENSE` file into the GitHub repository for your package as well. I like to use the Apache 2 license [like this](https://github.com/simonw/llm/blob/main/LICENSE).

(tutorial-model-plugin-breaks)=

## What to do if it breaks

Sometimes you may make a change to your plugin that causes it to break, preventing `llm` from starting. For example you may see an error like this one:

```
$ llm 'hi'
Traceback (most recent call last):
  ...
  File llm-markov/llm_markov.py", line 10
    register(Markov()):
                      ^
SyntaxError: invalid syntax
```
You may find that you are unable to uninstall the plugin using `llm uninstall llm-markov` because the command itself fails with the same error.

Should this happen, you can uninstall the plugin after first disabling it using the {ref}`LLM_LOAD_PLUGINS <llm-load-plugins>` environment variable like this:
```bash
LLM_LOAD_PLUGINS='' llm uninstall llm-markov
```


================================================
FILE: docs/python-api.md
================================================
(python-api)=
# Python API

LLM provides a Python API for executing prompts, in addition to the command-line interface.

Understanding this API is also important for writing {ref}`plugins`.

## Basic prompt execution

To run a prompt against the `gpt-4o-mini` model, run this:

```python
import llm

model = llm.get_model("gpt-4o-mini")
# key= is optional, you can configure the key in other ways
response = model.prompt(
    "Five surprising names for a pet pelican",
    key="sk-..."
)
print(response.text())
```
Note that the prompt will not be evaluated until you call that `response.text()` method - a form of lazy loading.

If you inspect the response before it has been evaluated it will look like this:

    <Response prompt='Your prompt' text='... not yet done ...'>

The `llm.get_model()` function accepts model IDs or aliases. You can also omit it to use the currently configured default model, which is `gpt-4o-mini` if you have not changed the default.

In this example the key is set by Python code. You can also provide the key using the `OPENAI_API_KEY` environment variable, or use the `llm keys set openai` command to store it in a `keys.json` file, see {ref}`api-keys`.

The `__str__()` method of `response` also returns the text of the response, so you can do this instead:

```python
print(llm.get_model().prompt("Five surprising names for a pet pelican"))
```

You can run this command to see a list of available models and their aliases:

```bash
llm models
```
If you have set a `OPENAI_API_KEY` environment variable you can omit the `model.key = ` line.

Calling `llm.get_model()` with an invalid model ID will raise a `llm.UnknownModelError` exception.

(python-api-system-prompts)=

### System prompts

For models that accept a system prompt, pass it as `system="..."`:

```python
response = model.prompt(
    "Five surprising names for a pet pelican",
    system="Answer like GlaDOS"
)
```

(python-api-attachments)=

### Attachments

Models that accept multi-modal input (images, audio, video etc) can be passed attachments using the `attachments=` keyword argument. This accepts a list of `llm.Attachment()` instances.

This example shows two attachments - one from a file path and one from a URL:
```python
import llm

model = llm.get_model("gpt-4o-mini")
response = model.prompt(
    "Describe these images",
    attachments=[
        llm.Attachment(path="pelican.jpg"),
        llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg"),
    ]
)
```
Use `llm.Attachment(content=b"binary image content here")` to pass binary content directly.

You can check which attachment types (if any) a model supports using the `model.attachment_types` set:

```python
model = llm.get_model("gpt-4o-mini")
print(model.attachment_types)
# {'image/gif', 'image/png', 'image/jpeg', 'image/webp'}

if "image/jpeg" in model.attachment_types:
    # Use a JPEG attachment here
    ...
```

(python-api-tools)=

### Tools

{ref}`Tools <tools>` are functions that can be executed by the model as part of a chain of responses.

You can define tools in Python code - with a docstring to describe what they do - and then pass them to the `model.prompt()` method using the `tools=` keyword argument. If the model decides to request a tool call the `response.tool_calls()` method show what the model wants to execute:

```python
import llm

def upper(text: str) -> str:
    """Convert text to uppercase."""
    return text.upper()

model = llm.get_model("gpt-4.1-mini")
response = model.prompt("Convert panda to upper", tools=[upper])
tool_calls = response.tool_calls()
# [ToolCall(name='upper', arguments={'text': 'panda'}, tool_call_id='...')]
```
You can call `response.execute_tool_calls()` to execute those calls and get back the results:
```python
tool_results = response.execute_tool_calls()
# [ToolResult(name='upper', output='PANDA', tool_call_id='...')]
```
You can use the `model.chain()` to pass the results of tool calls back to the model automatically as subsequent prompts:
```python
chain_response = model.chain(
    "Convert panda to upper",
    tools=[upper],
)
print(chain_response.text())
# The word "panda" converted to uppercase is "PANDA".
```
You can also loop through the `model.chain()` response to get a stream of tokens, like this:
```python
for chunk in model.chain(
    "Convert panda to upper",
    tools=[upper],
):
    print(chunk, end="", flush=True)
```
This will stream each of the chain of responses in turn as they are generated.

You can access the individual responses that make up the chain using `chain.responses()`. This can be iterated over as the chain executes like this:

```python
chain = model.chain(
    "Convert panda to upper",
    tools=[upper],
)
for response in chain.responses():
    print(response.prompt)
    for chunk in response:
        print(chunk, end="", flush=True)
```

(python-api-tools-debug-hooks)=

#### Tool debugging hooks

Pass a function to the `before_call=` parameter of `model.chain()` to have that called before every tool call is executed. You can raise `llm.CancelToolCall()` to cancel that tool call.

The method signature is `def before_call(tool: Optional[llm.Tool], tool_call: llm.ToolCall)` - that first `tool` argument can be `None` if the model requests a tool be executed that has not been provided in the `tools=` list.

Here's an example:
```python
import llm
from typing import Optional

def upper(text: str) -> str:
    "Convert text to uppercase."
    return text.upper()

def before_call(tool: Optional[llm.Tool], tool_call: llm.ToolCall):
    print(f"About to call tool {tool.name} with arguments {tool_call.arguments}")
    if tool.name == "upper" and "bad" in repr(tool_call.arguments):
        raise llm.CancelToolCall("Not allowed to call upper on text containing 'bad'")

model = llm.get_model("gpt-4.1-mini")
response = model.chain(
    "Convert panda to upper and badger to upper",
    tools=[upper],
    before_call=before_call,
)
print(response.text())
```
If you raise `llm.CancelToolCall` in the `before_call` function the model will be informed that the tool call was cancelled.

The `after_call=` parameter can be used to run a logging function after each tool call has been executed. The method signature is `def after_call(tool: llm.Tool, tool_call: llm.ToolCall, tool_result: llm.ToolResult)`. This continues the previous example:
```python
def after_call(tool: llm.Tool, tool_call: llm.ToolCall, tool_result: llm.ToolResult):
    print(f"Tool {tool.name} called with arguments {tool_call.arguments} returned {tool_result.output}")

response = model.chain(
    "Convert panda to upper and badger to upper",
    tools=[upper],
    after_call=after_call,
)
print(response.text())
```

(python-api-tools-attachments)=

#### Tools can return attachments

Tools can return {ref}`attachments <python-api-attachments>` in addition to returning text. Attachments that are returned from a tool call will be passed to the model as attachments for the next prompt in the chain.

To return one or more attachments, return a `llm.ToolOutput` instance from your tool function. This can have an `output=` string and an `attachments=` list of `llm.Attachment` instances.

Here's an example:
```python
import llm

def generate_image(prompt: str) -> llm.ToolOutput:
    """Generate an image based on the prompt."""
    image_content = generate_image_from_prompt(prompt)
    return llm.ToolOutput(
        output="Image generated successfully",
        attachments=[llm.Attachment(
            content=image_content,
            mimetype="image/png"
        )],
    )
```

(python-api-toolbox)=

#### Toolbox classes

Functions are useful for simple tools, but some tools may have more advanced needs. You can also define tools as a class (known as a "toolbox"), which provides the following advantages:

- Toolbox tools can bundle multiple tools together
- Toolbox tools can be configured, e.g. to give filesystem tools access to a specific directory
- Toolbox instances can persist shared state in between tool invocations

Toolboxes are classes that extend `llm.Toolbox`. Any methods that do not begin with an underscore will be exposed as tool functions.

This example sets up key/value memory storage that can be used by the model:
```python
import llm

class Memory(llm.Toolbox):
    _memory = None

    def _get_memory(self):
        if self._memory is None:
            self._memory = {}
        return self._memory

    def set(self, key: str, value: str):
        "Set something as a key"
        self._get_memory()[key] = value

    def get(self, key: str):
        "Get something from a key"
        return self._get_memory().get(key) or ""

    def append(self, key: str, value: str):
        "Append something as a key"
        memory = self._get_memory()
        memory[key] = (memory.get(key) or "") + "\n" + value

    def keys(self):
        "Return a list of keys"
        return list(self._get_memory().keys())
```
You can then use that from Python like this:
```python
model = llm.get_model("gpt-4.1-mini")
memory = Memory()

conversation = model.conversation(tools=[memory])
print(conversation.chain("Set name to Simon", after_call=print).text())

print(memory._memory)
# Should show {'name': 'Simon'}

print(conversation.chain("Set name to Penguin", after_call=print).text())
# Now it should be {'name': 'Penguin'}

print(conversation.chain("Print current name", after_call=print).text())
```

See the {ref}`register_tools() plugin hook documentation <plugin-hooks-register-tools>` for an example of this tool in action as a CLI plugin.

(python-api-tools-dynamic)=
#### Dynamic toolboxes

Sometimes you may need to register additional tools against a toolbox after it has been created - for example if you are implementing an MCP plugin where the toolbox needs to consult the MCP server to discover what tools are available.

You can use the `toolbox.add_tool(function_or_tool)` method to add a new tool to an existing toolbox.

This can be passed a `llm.Tool` instance or a function that will be converted into a tool automatically.

If you want your function to be able to access the toolbox instance itself as a `self` parameter, pass that function to `add_tool()` with the `pass_self=True` parameter:

```python
def my_function(self, arg1: str, arg2: int) -> str:
    return f"Received {arg1} and {arg2} in {self}"

toolbox.add_tool(my_function, pass_self=True)
```
Without `pass_self=True` the function will be called with only its declared arguments, with no `self` parameter.

If your toolbox needs to run an additional command to figure out what it should register using `.add_tool()` you can implement a `prepare()` method on your toolbox class. This will be called once automatically when the toolbox is first used.

In asynchronous contexts the alternative method `await toolbox.prepare_async()` method will be called before the toolbox is used. You can implement this method on your subclass and use it to run asynchronous operations that discover tools to be registered using `self.add_tool()`.

If you want to prepare the class in this way such that it can be used in both synchronous and asynchronous contexts, implement both `prepare()` and `prepare_async()` methods.

(python-api-schemas)=

### Schemas

As with {ref}`the CLI tool <usage-schemas>` some models support passing a JSON schema should be used for the resulting response.

You can pass this to the `prompt(schema=)` parameter as either a Python dictionary or a [Pydantic](https://docs.pydantic.dev/) `BaseModel` subclass:

```python
import llm, json
from pydantic import BaseModel

class Dog(BaseModel):
    name: str
    age: int

model = llm.get_model("gpt-4o-mini")
response = model.prompt("Describe a nice dog", schema=Dog)
dog = json.loads(response.text())
print(dog)
# {"name":"Buddy","age":3}
```
You can also pass a schema directly, like this:
```python
response = model.prompt("Describe a nice dog", schema={
    "properties": {
        "name": {"title": "Name", "type": "string"},
        "age": {"title": "Age", "type": "integer"},
    },
    "required": ["name", "age"],
    "title": "Dog",
    "type": "object",
})
```

You can also use LLM's {ref}`alternative schema syntax <schemas-dsl>` via the `llm.schema_dsl(schema_dsl)` function. This provides a quick way to construct a JSON schema for simple cases:

```python
print(model.prompt(
    "Describe a nice dog with a surprising name",
    schema=llm.schema_dsl("name, age int, bio")
))
```
Pass `multi=True` to generate a schema that returns multiple items matching that specification:

```python
print(model.prompt(
    "Describe 3 nice dogs with surprising names",
    schema=llm.schema_dsl("name, age int, bio", multi=True)
))
```

(python-api-fragments)=

### Fragments

The {ref}`fragment system <usage-fragments>` from the CLI tool can also be accessed from the Python API, by passing `fragments=` and/or `system_fragments=` lists of strings to the `prompt()` method:

```python
response = model.prompt(
    "What do these documents say about dogs?",
    fragments=[
        open("dogs1.txt").read(),
        open("dogs2.txt").read(),
    ],
    system_fragments=[
        "You answer questions like Snoopy",
    ]
)
```
This mechanism has limited utility in Python, as you can also assemble the contents of these strings together into the `prompt=` and `system=` strings directly.

Fragments become more interesting if you are working with LLM's mechanisms for storing prompts to a SQLite database, which are not yet part of the stable, documented Python API.

Some model plugins may include features that take advantage of fragments, for example [llm-anthropic](https://github.com/simonw/llm-anthropic) aims to use them as part of a mechanism that taps into Claude's prompt caching system.


(python-api-model-options)=

### Model options

For models that support options (view those with `llm models --options`) you can pass options as keyword arguments to the `.prompt()` method:

```python
model = llm.get_model()
print(model.prompt("Names for otters", temperature=0.2))
```

(python-api-models-api-keys)=

### Passing an API key

Models that accept API keys should take an additional `key=` parameter to their `model.prompt()` method:

```python
model = llm.get_model("gpt-4o-mini")
print(model.prompt("Names for beavers", key="sk-..."))
```

If you don't provide this argument LLM will attempt to find it from an environment variable (`OPENAI_API_KEY` for OpenAI, others for different plugins) or from keys that have been saved using the {ref}`llm keys set <api-keys>` command.

Some model plugins may not yet have been upgraded to handle the `key=` parameter, in which case you will need to use one of the other mechanisms.

(python-api-models-from-plugins)=

### Models from plugins

Any models you have installed as plugins will also be available through this mechanism, for example to use Anthropic's Claude 3.5 Sonnet model with [llm-anthropic](https://github.com/simonw/llm-anthropic):

```bash
pip install llm-anthropic
```
Then in your Python code:
```python
import llm

model = llm.get_model("claude-3.5-sonnet")
# Use this if you have not set the key using 'llm keys set claude':
model.key = 'YOUR_API_KEY_HERE'
response = model.prompt("Five surprising names for a pet pelican")
print(response.text())
```
Some models do not use API keys at all.

(python-api-underlying-json)=

### Accessing the underlying JSON

Most model plugins also make a JSON version of the prompt response available. The structure of this will differ between model plugins, so building against this is likely to result in code that only works with that specific model provider.

You can access this JSON data as a Python dictionary using the `response.json()` method:

```python
import llm
from pprint import pprint

model = llm.get_model("gpt-4o-mini")
response = model.prompt("3 names for an otter")
json_data = response.json()
pprint(json_data)
```
Here's that example output from GPT-4o mini:
```python
{'content': 'Sure! Here are three fun names for an otter:\n'
            '\n'
            '1. **Splash**\n'
            '2. **Bubbles**\n'
            '3. **Otto** \n'
            '\n'
            'Feel free to mix and match or use these as inspiration!',
 'created': 1739291215,
 'finish_reason': 'stop',
 'id': 'chatcmpl-AznO31yxgBjZ4zrzBOwJvHEWgdTaf',
 'model': 'gpt-4o-mini-2024-07-18',
 'object': 'chat.completion.chunk',
 'usage': {'completion_tokens': 43,
           'completion_tokens_details': {'accepted_prediction_tokens': 0,
                                         'audio_tokens': 0,
                                         'reasoning_tokens': 0,
                                         'rejected_prediction_tokens': 0},
           'prompt_tokens': 13,
           'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0},
           'total_tokens': 56}}
```

(python-api-token-usage)=

### Token usage

Many models can return a count of the number of tokens used while executing the prompt.

The `response.usage()` method provides an abstraction over this:

```python
pprint(response.usage())
```
Example output:
```python
Usage(input=5,
      output=2,
      details={'candidatesTokensDetails': [{'modality': 'TEXT',
                                            'tokenCount': 2}],
               'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 5}]})
```
The `.input` and `.output` properties are integers representing the number of input and output tokens. The `.details` property may be a dictionary with additional custom values that vary by model.

(python-api-streaming-responses)=

### Streaming responses

For models that support it you can stream responses as they are generated, like this:

```python
response = model.prompt("Five diabolical names for a pet goat")
for chunk in response:
    print(chunk, end="")
```
The `response.text()` method described earlier does this for you - it runs through the iterator and gathers the results into a string.

If a response has been evaluated, `response.text()` will continue to return the same string.

(python-api-async)=

## Async models

Some plugins provide async versions of their supported models, suitable for use with Python [asyncio](https://docs.python.org/3/library/asyncio.html).

To use an async model, use the `llm.get_async_model()` function instead of `llm.get_model()`:

```python
import llm
model = llm.get_async_model("gpt-4o")
```
You can then run a prompt using `await model.prompt(...)`:

```python
print(await model.prompt(
    "Five surprising names for a pet pelican"
).text())
```
Or use `async for chunk in ...` to stream the response as it is generated:
```python
async for chunk in model.prompt(
    "Five surprising names for a pet pelican"
):
    print(chunk, end="", flush=True)
```
This `await model.prompt()` method takes the same arguments as the synchronous `model.prompt()` method, for options and attachments and `key=` and suchlike.

(python-api-async-tools)=

### Tool functions can be sync or async

{ref}`Tool functions <python-api-tools>` can be both synchronous or asynchronous. The latter are defined using `async def tool_name(...)`. Either kind of function can be passed to the `tools=[...]` parameter.

If an `async def` function is used in a synchronous context LLM will automatically execute it in a thread pool using `asyncio.run()`. This means the following will work even in non-asynchronous Python scripts:

```python
async def hello(name: str) -> str:
    "Say hello to name"
    return "Hello there " + name

model = llm.get_model("gpt-4.1-mini")
chain_response = model.chain(
    "Say hello to Percival", tools=[hello]
)
print(chain_response.text())
```
This also works for `async def` methods of `llm.Toolbox` subclasses.

### Tool use for async models

Tool use is also supported for async models, using either synchronous or asynchronous tool functions. Synchronous functions will block the event loop so only use those in asynchronous context if you are certain they are extremely fast.

The `response.execute_tool_calls()` and `chain_response.text()` and `chain_response.responses()` methods must all be awaited when run against asynchronous models:

```python
import llm
model = llm.get_async_model("gpt-4.1")

def upper(string):
    "Converts string to uppercase"
    return string.upper()

chain = model.chain(
    "Convert panda to uppercase then pelican to uppercase",
    tools=[upper],
    after_call=print
)
print(await chain.text())
```

To iterate over the chained response output as it arrives use `async for`:
```python
async for chunk in model.chain(
    "Convert panda to uppercase then pelican to uppercase",
    tools=[upper]
):
    print(chunk, end="", flush=True)
```
The `before_call` and `after_call` hooks can be async functions when used with async models.

(python-api-conversations)=

## Conversations

LLM supports *conversations*, where you ask follow-up questions of a model as part of an ongoing conversation.

To start a new conversation, use the `model.conversation()` method:

```python
model = llm.get_model()
conversation = model.conversation()
```
You can then use the `conversation.prompt()` method to execute prompts against this conversation:

```python
response = conversation.prompt("Five fun facts about pelicans")
print(response.text())
```
This works exactly the same as the `model.prompt()` method, except that the conversation will be maintained across multiple prompts. So if you run this next:
```python
response2 = conversation.prompt("Now do skunks")
print(response2.text())
```
You will get back five fun facts about skunks.

The `conversation.prompt()` method supports attachments as well:
```python
response = conversation.prompt(
    "Describe these birds",
    attachments=[
        llm.Attachment(url="https://static.simonwillison.net/static/2024/pelicans.jpg")
    ]
)
```

Access `conversation.responses` for a list of all of the responses that have so far been returned during the conversation.

### Conversations using tools

You can pass a list of tool functions to the `tools=[]` argument when you start a new conversation:
```python
import llm

def upper(text: str) -> str:
    "convert text to upper case"
    return text.upper()

def reverse(text: str) -> str:
    "reverse text"
    return text[::-1]

model = llm.get_model("gpt-4.1-mini")
conversation = model.conversation(tools=[upper, reverse])
```
You can then call the `conversation.chain()` method multiple times to have a conversation that uses those tools:
```python
print(conversation.chain(
    "Convert panda to uppercase and reverse it"
).text())
print(conversation.chain(
    "Same with pangolin"
).text())
```
The `before_call=` and `after_call=` parameters {ref}`described above <python-api-tools-debug-hooks>` can be passed directly to the `model.conversation()` method to set those options for all chained prompts in that conversation.


(python-api-listing-models)=

## Listing models

The `llm.get_models()` list returns a list of all available models, including those from plugins.

```python
import llm

for model in llm.get_models():
    print(model.model_id)
```

Use `llm.get_async_models()` to list async models:

```python
for model in llm.get_async_models():
    print(model.model_id)
```

(python-api-response-on-done)=

## Running code when a response has completed

For some applications, such as tracking the tokens used by an application, it may be useful to execute code as soon as a response has finished being executed

You can do this using the `response.on_done(callback)` method, which causes your callback function to be called as soon as the response has finished (all tokens have been returned).

The signature of the method you provide is `def callback(response)` - it can be optionally an `async def` method when working with asynchronous models.

Example usage:

```python
import llm

model = llm.get_model("gpt-4o-mini")
response = model.prompt("a poem about a hippo")
response.on_done(lambda response: print(response.usage()))
print(response.text())
```
Which outputs:
```
Usage(input=20, output=494, details={})
In a sunlit glade by a bubbling brook,
Lived a hefty hippo, with a curious look.
...
```
Or using an `asyncio` model, where you need to `await response.on_done(done)` to queue up the callback:
```python
import asyncio, llm

async def run():
    model = llm.get_async_model("gpt-4o-mini")
    response = model.prompt("a short poem about a brick")
    async def done(response):
        print(await response.usage())
        print(await response.text())
    await response.on_done(done)
    print(await response.text())

asyncio.run(run())
```

## Other functions

The `llm` top level package includes some useful utility functions.

### set_alias(alias, model_id)

The `llm.set_alias()` function can be used to define a new alias:

```python
import llm

llm.set_alias("mini", "gpt-4o-mini")
```
The second argument can be a model identifier or another alias, in which case that alias will be resolved.

If the `aliases.json` file does not exist or contains invalid JSON it will be created or overwritten.

### remove_alias(alias)

Removes the alias with the given name from the `aliases.json` file.

Raises `KeyError` if the alias does not exist.

```python
import llm

llm.remove_alias("turbo")
```

### set_default_model(alias)

This sets the default model to the given model ID or alias. Any changes to defaults will be persisted in the LLM configuration folder, and will affect all programs using LLM on the system, including the `llm` CLI tool.

```python
import llm

llm.set_default_model("claude-3.5-sonnet")
```

### get_default_model()

This returns the currently configured default model, or `gpt-4o-mini` if no default has been set.

```python
import llm

model_id = llm.get_default_model()
```

To detect if no default has been set you can use this pattern:

```python
if llm.get_default_model(default=None) is None:
    print("No default has been set")
```
Here the `default=` parameter specifies the value that should be returned if there is no configured default.

### set_default_embedding_model(alias) and get_default_embedding_model()

These two methods work the same as `set_default_model()` and `get_default_model()` but for the default {ref}`embedding model <embeddings>` instead.


================================================
FILE: docs/related-tools.md
================================================
(related-tools)=
# Related tools

The following tools are designed to be used with LLM:

(related-tools-strip-tags)=
## strip-tags

[strip-tags](https://github.com/simonw/strip-tags) is a command for stripping tags from HTML. This is useful when working with LLMs because HTML tags can use up a lot of your token budget.

Here's how to summarize the front page of the New York Times, by both stripping tags and filtering to just the elements with `class="story-wrapper"`:

```bash
curl -s https://www.nytimes.com/ \
  | strip-tags .story-wrapper \
  | llm -s 'summarize the news'
```

[llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/) describes ways to use `strip-tags` in more detail.

(related-tools-ttok)=
## ttok

[ttok](https://github.com/simonw/ttok) is a command-line tool for counting OpenAI tokens. You can use it to check if input is likely to fit in the token limit for GPT 3.5 or GPT4:

```bash
cat my-file.txt | ttok
```
```
125
```
It can also truncate input down to a desired number of tokens:
```bash
ttok This is too many tokens -t 3
```
```
This is too
```
This is useful for truncating a large document down to a size where it can be processed by an LLM.

(related-tools-symbex)=
## Symbex

[Symbex](https://github.com/simonw/symbex) is a tool for searching for symbols in Python codebases. It's useful for extracting just the code for a specific problem and then piping that into LLM for explanation, refactoring or other tasks.

Here's how to use it to find all functions that match `test*csv*` and use those to guess what the software under test does:

```bash
symbex 'test*csv*' | \
  llm --system 'based on these tests guess what this tool does'
```
It can also be used to export symbols in a format that can be piped to {ref}`llm embed-multi <embeddings-cli-embed-multi>` in order to create embeddings:
```bash
symbex '*' '*:*' --nl | \
  llm embed-multi symbols - \
  --format nl --database embeddings.db --store
```
For more examples see [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/).


================================================
FILE: docs/requirements.txt
================================================
sphinx==7.2.6
furo==2023.9.10
sphinx-autobuild
sphinx-copybutton
sphinx-markdown-builder==0.6.8
myst-parser
cogapp


================================================
FILE: docs/schemas.md
================================================
(schemas)=

# Schemas

Large Language Models are very good at producing structured output as JSON or other formats. LLM's **schemas** feature allows you to define the exact structure of JSON data you want to receive from a model.

This feature is supported by models from OpenAI, Anthropic, Google Gemini and can be implemented for others {ref}`via plugins <advanced-model-plugins-schemas>`.

This page describes schemas used via the `llm` command-line tool. Schemas can also be used from the {ref}`Python API <python-api-schemas>`.

(schemas-tutorial)=

## Schemas tutorial

In this tutorial we're going to use schemas to analyze some news stories.

But first, let's invent some dogs!

### Getting started with dogs

LLMs are great at creating test data. Let's define a simple schema for a dog, using LLM's {ref}`concise schema syntax <schemas-dsl>`. We'll pass that to LLm with `llm --schema` and prompt it to "invent a cool dog":
```bash
llm --schema 'name, age int, one_sentence_bio' 'invent a cool dog'
```
I got back Ziggy:
```json
{
  "name": "Ziggy",
  "age": 4,
  "one_sentence_bio": "Ziggy is a hyper-intelligent, bioluminescent dog who loves to perform tricks in the dark and guides his owner home using his glowing fur."
}
```
The response matched my schema, with `name` and `one_sentence_bio` string columns and an integer for `age`.

We're using the default LLM model here - `gpt-4o-mini`. Add `-m model` to use another model - for example use `-m o3-mini` to have O3 mini invent some dogs.

For a list of available models that support schemas, run this command:
```bash
llm models --schemas
```

Want several more dogs? You can pass in that same schema using `--schema-multi` and ask for several at once:
```bash
llm --schema-multi 'name, age int, one_sentence_bio' 'invent 3 really cool dogs'
```
Here's what I got:
```json
{
  "items": [
    {
      "name": "Echo",
      "age": 3,
      "one_sentence_bio": "Echo is a sleek, silvery-blue Siberian Husky with mesmerizing blue eyes and a talent for mimicking sounds, making him a natural entertainer."
    },
    {
      "name": "Nova",
      "age": 2,
      "one_sentence_bio": "Nova is a vibrant, spotted Dalmatian with an adventurous spirit and a knack for agility courses, always ready to leap into action."
    },
    {
      "name": "Pixel",
      "age": 4,
      "one_sentence_bio": "Pixel is a playful, tech-savvy Poodle with a rainbow-colored coat, known for her ability to interact with smart devices and her love for puzzle toys."
    }
  ]
}
```
So that's the basic idea: we can feed in a schema and LLM will pass it to the underlying model and (usually) get back JSON that conforms to that schema.

This stuff gets a _lot_ more useful when you start applying it to larger amounts of text, extracting structured details from unstructured content.

### Extracting people from a news articles

We are going to extract details of the people who are mentioned in different news stories, and then use those to compile a database.

Let's start by compiling a schema. For each person mentioned we want to extract the following details:

- Their name
- The organization they work for
- Their role
- What we learned about them from the story

We will also record the article headline and the publication date, to make things easier for us later on.

Using LLM's custom, concise schema language, this time with newlines separating the individual fields (for the dogs example we used commas):
```
name: the person's name
organization: who they represent
role: their job title or role
learned: what we learned about them from this story
article_headline: the headline of the story
article_date: the publication date in YYYY-MM-DD
```
As you can see, this schema definition is pretty simple - each line has the name of a property we want to capture, then an optional: followed by a description, which doubles as instructions for the model.

The full syntax is {ref}`described below <schemas-dsl>` - you can also include type information for things like numbers.

Let's run this against a news article.

Visit [AP News](https://apnews.com/) and grab the URL to an article. I'm using this one:

    https://apnews.com/article/trump-federal-employees-firings-a85d1aaf1088e050d39dcf7e3664bb9f

There's quite a lot of HTML on that page, possibly even enough to exceed GPT-4o mini's 128,000 token input limit. We'll use another tool called [strip-tags](https://github.com/simonw/strip-tags) to reduce that. If you have [uv](https://docs.astral.sh/uv/) installed you can call it using `uvx strip-tags`, otherwise you'll need to install it first:

```
uv tool install strip-tags
# Or "pip install" or "pipx install"
```
Now we can run this command to extract the people from that article:

```bash
curl 'https://apnews.com/article/trump-federal-employees-firings-a85d1aaf1088e050d39dcf7e3664bb9f' | \
  uvx strip-tags | \
  llm --schema-multi "
name: the person's name
organization: who they represent
role: their job title or role
learned: what we learned about them from this story
article_headline: the headline of the story
article_date: the publication date in YYYY-MM-DD
" --system 'extract people mentioned in this article'
```
The output I got started like this:
```json
{
  "items": [
    {
      "name": "William Alsup",
      "organization": "U.S. District Court",
      "role": "Judge",
      "learned": "He ruled that the mass firings of probationary employees were likely unlawful and criticized the authority exercised by the Office of Personnel Management.",
      "article_headline": "Judge finds mass firings of federal probationary workers were likely unlawful",
      "article_date": "2025-02-26"
    },
    {
      "name": "Everett Kelley",
      "organization": "American Federation of Government Employees",
      "role": "National President",
      "learned": "He hailed the court's decision as a victory for employees who were illegally fired.",
      "article_headline": "Judge finds mass firings of federal probationary workers were likely unlawful",
      "article_date": "2025-02-26"
    }
```
This data has been logged to LLM's {ref}`SQLite database <logging>`. We can retrieve the data back out again using the {ref}`llm logs <logging-view>` command like this:
```bash
llm logs -c --data
```
The `-c` flag means "use most recent conversation", and the `--data` flag outputs just the JSON data that was captured in the response.

We're going to want to use the same schema for other things. Schemas that we use are automatically logged to the database - we can view them using `llm schemas`:

```bash
llm schemas
```
Here's the output:
```
- id: 3b7702e71da3dd791d9e17b76c88730e
  summary: |
    {items: [{name, organization, role, learned, article_headline, article_date}]}
  usage: |
    1 time, most recently 2025-02-28T04:50:02.032081+00:00
```
To view the full schema, run that command with `--full`:

```bash
llm schemas --full
```
Which outputs:
```
- id: 3b7702e71da3dd791d9e17b76c88730e
  schema: |
    {
      "type": "object",
      "properties": {
        "items": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "the person's name"
              },
    ...
```
That `3b7702e71da3dd791d9e17b76c88730e` ID can be used to run the same schema again. Let's try that now on a different URL:

```bash
curl 'https://apnews.com/article/bezos-katy-perry-blue-origin-launch-4a074e534baa664abfa6538159c12987' | \
  uvx strip-tags | \
  llm --schema 3b7702e71da3dd791d9e17b76c88730e \
    --system 'extract people mentioned in this article'
```
Here we are using `--schema` because our schema ID already corresponds to an array of items.

The result starts like this:
```json
{
  "items": [
    {
      "name": "Katy Perry",
      "organization": "Blue Origin",
      "role": "Singer",
      "learned": "Katy Perry will join the all-female celebrity crew for a spaceflight organized by Blue Origin.",
      "article_headline": "Katy Perry and Gayle King will join Jeff Bezos’ fiancee Lauren Sanchez on Blue Origin spaceflight",
      "article_date": "2023-10-15"
    },
```
One more trick: let's turn our schema and system prompt combination into a {ref}`template <prompt-templates>`.

```bash
llm --schema 3b7702e71da3dd791d9e17b76c88730e \
  --system 'extract people mentioned in this article' \
  --save people
```
This creates a new template called "people". We can confirm the template was created correctly using:
```bash
llm templates show people
```
Which will output the YAML version of the template looking like this:
```yaml
name: people
schema_object:
    properties:
        items:
            items:
                properties:
                    article_date:
                        description: the publication date in YYYY-MM-DD
                        type: string
                    article_headline:
                        description: the headline of the story
                        type: string
                    learned:
                        description: what we learned about them from this story
                        type: string
                    name:
                        description: the person's name
                        type: string
                    organization:
                        description: who they represent
                        type: string
                    role:
                        description: their job title or role
                        type: string
                required:
                - name
                - organization
                - role
                - learned
                - article_headline
                - article_date
                type: object
            type: array
    required:
    - items
    type: object
system: extract people mentioned in this article
```
We can now run our people extractor against another fresh URL. Let's use one from The Guardian:
```bash
curl https://www.theguardian.com/commentisfree/2025/feb/27/billy-mcfarland-new-fyre-festival-fantasist | \
  strip-tags | llm -t people
```
Storing the schema in a template means we can just use `llm -t people` to run the prompt. Here's what I got back:
```json
{
  "items": [
    {
      "name": "Billy McFarland",
      "organization": "Fyre Festival",
      "role": "Organiser",
      "learned": "Billy McFarland is known for organizing the infamous Fyre Festival and was sentenced to six years in prison for wire fraud related to it. He is attempting to revive the festival with Fyre 2.",
      "article_headline": "Welcome back Billy McFarland and a new Fyre festival. Shows you can’t keep a good fantasist down",
      "article_date": "2025-02-27"
    }
  ]
}
```
Depending on the model, schema extraction may work against images and PDF files as well.

I took a screenshot of part of [this story in the Onion](https://theonion.com/mark-zuckerberg-insists-anyone-with-same-skewed-values-1826829272/) and saved it to the following URL:

    https://static.simonwillison.net/static/2025/onion-zuck.jpg

We can pass that as an {ref}`attachment <usage-attachments>` using the `-a` option. This time let's use GPT-4o:

```bash
llm -t people -a https://static.simonwillison.net/static/2025/onion-zuck.jpg -m gpt-4o
```
Which gave me back this:
```json
{
  "items": [
    {
      "name": "Mark Zuckerberg",
      "organization": "Facebook",
      "role": "CEO",
      "learned": "He addressed criticism by suggesting anyone with similar values and thirst for power could make the same mistakes.",
      "article_headline": "Mark Zuckerberg Insists Anyone With Same Skewed Values And Unrelenting Thirst For Power Could Have Made Same Mistakes",
      "article_date": "2018-06-14"
    }
  ]
}
```
Now that we've extracted people from a number of different sources, let's load them into a database.

The {ref}`llm logs <logging-view>` command has several features for working with logged JSON objects. Since we've been recording multiple objects from each page in an `"items"` array using our `people` template we can access those using the following command:

```bash
llm logs --schema t:people --data-key items
```
In place of `t:people` we could use the `3b7702e71da3dd791d9e17b76c88730e` schema ID or even the original schema string instead, see {ref}`specifying a schema <schemas-specify>`.

This command outputs newline-delimited JSON for every item that has been captured using the specified schema:
```json
{"name": "Katy Perry", "organization": "Blue Origin", "role": "Singer", "learned": "She is one of the passengers on the upcoming spaceflight with Blue Origin."}
{"name": "Gayle King", "organization": "Blue Origin", "role": "TV Journalist", "learned": "She is participating in the upcoming Blue Origin spaceflight."}
{"name": "Lauren Sanchez", "organization": "Blue Origin", "role": "Helicopter Pilot and former TV Journalist", "learned": "She selected the crew for the Blue Origin spaceflight."}
{"name": "Aisha Bowe", "organization": "Engineering firm", "role": "Former NASA Rocket Scientist", "learned": "She is part of the crew for the spaceflight."}
{"name": "Amanda Nguyen", "organization": "Research Scientist", "role": "Activist and Scientist", "learned": "She is included in the crew for the upcoming Blue Origin flight."}
{"name": "Kerianne Flynn", "organization": "Movie Producer", "role": "Producer", "learned": "She will also be a passenger on the upcoming spaceflight."}
{"name": "Billy McFarland", "organization": "Fyre Festival", "role": "Organiser", "learned": "He was sentenced to six years in prison for wire fraud in 2018 and has launched a new festival called Fyre 2.", "article_headline": "Welcome back Billy McFarland and a new Fyre festival. Shows you can\u2019t keep a good fantasist down", "article_date": "2025-02-27"}
{"name": "Mark Zuckerberg", "organization": "Facebook", "role": "CEO", "learned": "He attempted to dismiss criticism by suggesting that anyone with similar values and thirst for power could have made the same mistakes.", "article_headline": "Mark Zuckerberg Insists Anyone With Same Skewed Values And Unrelenting Thirst For Power Could Have Made Same Mistakes", "article_date": "2018-06-14"}
```
If we add `--data-array` we'll get back a valid JSON array of objects instead:
```bash
llm logs --schema t:people --data-key items --data-array
```
Output starts:
```json
[{"name": "Katy Perry", "organization": "Blue Origin", "role": "Singer", "learned": "She is one of the passengers on the upcoming spaceflight with Blue Origin."},
 {"name": "Gayle King", "organization": "Blue Origin", "role": "TV Journalist", "learned": "She is participating in the upcoming Blue Origin spaceflight."},
```

We can load this into a SQLite database using [sqlite-utils](https://sqlite-utils.datasette.io/), in particular the [sqlite-utils insert](https://sqlite-utils.datasette.io/en/stable/cli.html#inserting-json-data) command.

```bash
uv tool install sqlite-utils
# or pip install or pipx install
```
Now we can pipe the JSON into that tool to create a database with a `people` table:
```bash
llm logs --schema t:people --data-key items --data-array | \
  sqlite-utils insert data.db people -
```
To see a table of the name, organization and role columns use [sqlite-utils rows](https://sqlite-utils.datasette.io/en/stable/cli.html#returning-all-rows-in-a-table):
```bash
sqlite-utils rows data.db people -t -c name -c organization -c role
```
Which produces:
```
name             organization        role
---------------  ------------------  -----------------------------------------
Katy Perry       Blue Origin         Singer
Gayle King       Blue Origin         TV Journalist
Lauren Sanchez   Blue Origin         Helicopter Pilot and former TV Journalist
Aisha Bowe       Engineering firm    Former NASA Rocket Scientist
Amanda Nguyen    Research Scientist  Activist and Scientist
Kerianne Flynn   Movie Producer      Producer
Billy McFarland  Fyre Festival       Organiser
Mark Zuckerberg  Facebook            CEO
```
We can also explore the database in a web interface using [Datasette](https://datasette.io/):

```bash
uvx datasette data.db
# Or install datasette first:
uv tool install datasette # or pip install or pipx install
datasette data.db
```
Visit `http://127.0.0.1:8001/data/people` to start navigating the data.

(schemas-json-schemas)=

## Using JSON schemas

The above examples have both used {ref}`concise schema syntax <schemas-dsl>`. LLM converts this format to [JSON schema](https://json-schema.org/), and you can use JSON schema directly yourself if you wish.

JSON schema covers the following:

- The data types of fields (string, number, array, object, etc.)
- Required vs. optional fields
- Nested data structures
- Constraints on values (minimum/maximum, patterns, etc.)
- Descriptions of those fields - these can be used to guide the language model

Different models may support different subsets of the overall JSON schema language. You should experiment to figure out what works for the model you are using.

LLM recommends that the top level of the schema is an object, not an array, for increased compatibility across multiple models. I suggest using `{"items": [array of objects]}` if you want to return an array.

The dogs schema above, `name, age int, one_sentence_bio`, would look like this as a full JSON schema:

```json
{
  "type": "object",
  "properties": {
    "name": {
      "type": "string"
    },
    "age": {
      "type": "integer"
    },
    "one_sentence_bio": {
      "type": "string"
    }
  },
  "required": [
    "name",
    "age",
    "one_sentence_bio"
  ]
}
```
This JSON can be passed directly to the `--schema` option, or saved in a file and passed as the filename.
```bash
llm --schema '{
  "type": "object",
  "properties": {
    "name": {
      "type": "string"
    },
    "age": {
      "type": "integer"
    },
    "one_sentence_bio": {
      "type": "string"
    }
  },
  "required": [
    "name",
    "age",
    "one_sentence_bio"
  ]
}' 'a surprising dog'
```
Example output:
```json
{
  "name": "Baxter",
  "age": 3,
  "one_sentence_bio": "Baxter is a rescue dog who learned to skateboard and now performs tricks at local parks, astonishing everyone with his skill!"
}
```

(schemas-specify)=

## Ways to specify a schema

LLM accepts schema definitions for both running prompts and exploring logged responses, using the `--schema` option.

This option can take multiple forms:

- A string providing a JSON schema: `--schema '{"type": "object", ...}'`
- A {ref}`condensed schema definition <schemas-dsl>`: `--schema 'name,age int'`
- The name or path of a file on disk containing a JSON schema: `--schema dogs.schema.json`
- The hexadecimal ID of a previously logged schema: `--schema 520f7aabb121afd14d0c6c237b39ba2d` - these IDs can be found using the `llm schemas` command.
- A schema that has been {ref}`saved in a template <prompt-templates-save>`: `--schema t:name-of-template`, see {ref}`schemas-reusable`.

(schemas-dsl)=

## Concise LLM schema syntax

JSON schema's can be time-consuming to construct by hand. LLM also supports a concise alternative syntax for specifying a schema.

A simple schema for an object with two string properties called `name` and `bio` looks like this:

    name, bio

You can include type information by adding a type indicator after the property name, separated by a space.

    name, bio, age int

Supported types are `int` for integers, `float` for floating point numbers, `str` for strings (the default) and `bool` for true/false booleans.

To include a description of the field to act as a hint to the model, add one after a colon:

    name: the person's name, age int: their age, bio: a short bio

If your schema is getting long you can switch from comma-separated to newline-separated, which also allows you to use commas in those descriptions:

    name: the person's name
    age int: their age
    bio: a short bio, no more than three sentences

You can experiment with the syntax using the `llm schemas dsl` command, which converts the input into a JSON schema:
```bash
llm schemas dsl 'name, age int'
```
Output:
```json
{
  "type": "object",
  "properties": {
    "name": {
      "type": "string"
    },
    "age": {
      "type": "integer"
    }
  },
  "required": [
    "name",
    "age"
  ]
}
```

The Python utility function `llm.schema_dsl(schema)` can be used to convert this syntax into the equivalent JSON schema dictionary when working with schemas {ref}`in the Python API <python-api-schemas>`.

(schemas-reusable)=

## Saving reusable schemas in templates

If you want to store a schema with a name so you can reuse it easily in the future, the easiest way to do so is to save it {ref}`in a template <prompt-templates-schemas>`.

The quickest way to do that is with the `llm --save` option:

```bash
llm --schema 'name, age int, one_sentence_bio' --save dog
```
Now you can use it like this:
```bash
llm --schema t:dog 'invent a dog'
```
Or:
```bash
llm --schema-multi t:dog 'invent three dogs'
```
(schemas-logs)=

## Browsing logged JSON objects created using schemas

By default, all JSON produced using schemas is logged to {ref}`a SQLite database <logging>`. You can use special options to the `llm logs` command to extract just those JSON objects in a useful format.

The `llm logs --schema X` filter option can be used to filter just for responses that were created using the specified schema. You can pass the full schema JSON, a path to the schema on disk or the schema ID.

The `--data` option causes just the JSON data collected by that schema to be outputted, as newline-delimited JSON.

If you instead want a JSON array of objects (with starting and ending square braces) you can use `--data-array` instead.

Let's invent some dogs:

```bash
llm --schema-multi 'name, ten_word_bio' 'invent 3 cool dogs'
llm --schema-multi 'name, ten_word_bio' 'invent 2 cool dogs'
```
Having logged these cool dogs, you can see just the data that was returned by those prompts like this:
```bash
llm logs --schema-multi 'name, ten_word_bio' --data
```
We need to use `--schema-multi` here because we used that when we first created these records. The `--schema` option is also supported, and can be passed a filename or JSON schema or schema ID as well.

Output:
```
{"items": [{"name": "Robo", "ten_word_bio": "A cybernetic dog with laser eyes and super intelligence."}, {"name": "Flamepaw", "ten_word_bio": "Fire-resistant dog with a talent for agility and tricks."}]}
{"items": [{"name": "Bolt", "ten_word_bio": "Lightning-fast border collie, loves frisbee and outdoor adventures."}, {"name": "Luna", "ten_word_bio": "Mystical husky with mesmerizing blue eyes, enjoys snow and play."}, {"name": "Ziggy", "ten_word_bio": "Quirky pug who loves belly rubs and quirky outfits."}]}
```
Note that the dogs are nested in that `"items"` key. To access the list of items from that key use `--data-key items`:
```bash
llm logs --schema-multi 'name, ten_word_bio' --data-key items
```
Output:
```
{"name": "Bolt", "ten_word_bio": "Lightning-fast border collie, loves frisbee and outdoor adventures."}
{"name": "Luna", "ten_word_bio": "Mystical husky with mesmerizing blue eyes, enjoys snow and play."}
{"name": "Ziggy", "ten_word_bio": "Quirky pug who loves belly rubs and quirky outfits."}
{"name": "Robo", "ten_word_bio": "A cybernetic dog with laser eyes and super intelligence."}
{"name": "Flamepaw", "ten_word_bio": "Fire-resistant dog with a talent for agility and tricks."}
```
Finally, to output a JSON array instead of newline-delimited JSON use `--data-array`:
```bash
llm logs --schema-multi 'name, ten_word_bio' --data-key items --data-array
```
Output:
```json
[{"name": "Bolt", "ten_word_bio": "Lightning-fast border collie, loves frisbee and outdoor adventures."},
 {"name": "Luna", "ten_word_bio": "Mystical husky with mesmerizing blue eyes, enjoys snow and play."},
 {"name": "Ziggy", "ten_word_bio": "Quirky pug who loves belly rubs and quirky outfits."},
 {"name": "Robo", "ten_word_bio": "A cybernetic dog with laser eyes and super intelligence."},
 {"name": "Flamepaw", "ten_word_bio": "Fire-resistant dog with a talent for agility and tricks."}]
```
Add `--data-ids` to include `"response_id"` and `"conversation_id"` fields in each of the returned objects reflecting the database IDs of the response and conversation they were a part of. This can be useful for tracking the source of each individual row.

```bash
llm logs --schema-multi 'name, ten_word_bio' --data-key items --data-ids
```
Output:
```json
{"name": "Nebula", "ten_word_bio": "A cosmic puppy with starry fur, loves adventures in space.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
{"name": "Echo", "ten_word_bio": "A clever hound with extraordinary hearing, master of hide-and-seek.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
{"name": "Biscuit", "ten_word_bio": "An adorable chef dog, bakes treats that everyone loves.", "response_id": "01jn4dawj8sq0c6t3emf4k5ryx", "conversation_id": "01jn4dawj8sq0c6t3emf4k5ryx"}
{"name": "Cosmo", "ten_word_bio": "Galactic explorer, loves adventures and chasing shooting stars.", "response_id": "01jn4daycb3svj0x7kvp7zrp4q", "conversation_id": "01jn4daycb3svj0x7kvp7zrp4q"}
{"name": "Pixel", "ten_word_bio": "Tech-savvy pup, builds gadgets and loves virtual playtime.", "response_id": "01jn4daycb3svj0x7kvp7zrp4q", "conversation_id": "01jn4daycb3svj0x7kvp7zrp4q"}
```
If a row already has a property called `"conversation_id"` or `"response_id"` additional underscores will be appended to the ID key until it no longer overlaps with the existing keys.

The `--id-gt $ID` and `--id-gte $ID` options can be useful for ignoring logged schema data prior to a certain point, see {ref}`logging-filter-id` for details.

================================================
FILE: docs/setup.md
================================================
# Setup

## Installation

Install this tool using `pip`:
```bash
pip install llm
```
Or using [pipx](https://pypa.github.io/pipx/):
```bash
pipx install llm
```
Or using [uv](https://docs.astral.sh/uv/guides/tools/) ({ref}`more tips below <setup-uvx>`):
```bash
uv tool install llm
```
Or using [Homebrew](https://brew.sh/) (see {ref}`warning note <homebrew-warning>`):
```bash
brew install llm
```

## Upgrading to the latest version

If you installed using `pip`:
```bash
pip install -U llm
```
For `pipx`:
```bash
pipx upgrade llm
```
For `uv`:
```bash
uv tool upgrade llm
```
For Homebrew:
```bash
brew upgrade llm
```
If the latest version is not yet available on Homebrew you can upgrade like this instead:
```bash
llm install -U llm
```

(setup-uvx)=
## Using uvx

If you have [uv](https://docs.astral.sh/uv/) installed you can also use the `uvx` command to try LLM without first installing it like this:

```bash
export OPENAI_API_KEY='sx-...'
uvx llm 'fun facts about skunks'
```
This will install and run LLM using a temporary virtual environment.

You can use the `--with` option to add extra plugins. To use Anthropic's models, for example:
```bash
export ANTHROPIC_API_KEY='...'
uvx --with llm-anthropic llm -m claude-3.5-haiku 'fun facts about skunks'
```
All of the usual LLM commands will work with `uvx llm`. Here's how to set your OpenAI key without needing an environment variable for example:
```bash
uvx llm keys set openai
# Paste key here
```

(homebrew-warning)=
## A note about Homebrew and PyTorch

The version of LLM packaged for Homebrew currently uses Python 3.12. The PyTorch project do not yet have a stable release of PyTorch for that version of Python.

This means that LLM plugins that depend on PyTorch such as [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) may not install cleanly with the Homebrew version of LLM.

You can workaround this by manually installing PyTorch before installing `llm-sentence-transformers`:

```bash
llm install llm-python
llm python -m pip install \
  --pre torch torchvision \
  --index-url https://download.pytorch.org/whl/nightly/cpu
llm install llm-sentence-transformers
```
This should produce a working installation of that plugin.

## Installing plugins

{ref}`plugins` can be used to add support for other language models, including models that can run on your own device.

For example, the [llm-gpt4all](https://github.com/simonw/llm-gpt4all) plugin adds support for 17 new models that can be installed on your own machine. You can install that like so:
```bash
llm install llm-gpt4all
```

(api-keys)=
## API key management

Many LLM models require an API key. These API keys can be provided to this tool using several different mechanisms.

You can obtain an API key for OpenAI's language models from [the API keys page](https://platform.openai.com/api-keys) on their site.

### Saving and using stored keys

The easiest way to store an API key is to use the `llm keys set` command:

```bash
llm keys set openai
```
You will be prompted to enter the key like this:
```
% llm keys set openai
Enter key:
```
Once stored, this key will be automatically used for subsequent calls to the API:

```bash
llm "Five ludicrous names for a pet lobster"
```

You can list the names of keys that have been set using this command:

```bash
llm keys
```

Keys that are stored in this way live in a file called `keys.json`. This file is located at the path shown when you run the following command:

```bash
llm keys path
```

On macOS this will be `~/Library/Application Support/io.datasette.llm/keys.json`. On Linux it may be something like `~/.config/io.datasette.llm/keys.json`.

### Passing keys using the --key option

Keys can be passed directly using the `--key` option, like this:

```bash
llm "Five names for pet weasels" --key sk-my-key-goes-here
```
You can also pass the alias of a key stored in the `keys.json` file. For example, if you want to maintain a personal API key you could add that like this:
```bash
llm keys set personal
```
And then use it for prompts like so:

```bash
llm "Five friendly names for a pet skunk" --key personal
```

### Keys in environment variables

Keys can also be set using an environment variable. These are different for different models.

For OpenAI models the key will be read from the `OPENAI_API_KEY` environment variable.

The environment variable will be used if no `--key` option is passed to the command and there is not a key configured in `keys.json`

To use an environment variable in place of the `keys.json` key run the prompt like this:
```bash
llm 'my prompt' --key $OPENAI_API_KEY
```

## Configuration

You can configure LLM in a number of different ways.

(setup-default-model)=
### Setting a custom default model

The model used when calling `llm` without the `-m/--model` option defaults to `gpt-4o-mini` - the fastest and least expensive OpenAI model.

You can use the `llm models default` command to set a different default model. For GPT-4o (slower and more expensive, but more capable) run this:

```bash
llm models default gpt-4o
```
You can view the current model by running this:
```
llm models default
```
Any of the supported aliases for a model can be passed to this command.

### Setting a custom directory location

This tool stores various files - prompt templates, stored keys, preferences, a database of logs - in a directory on your computer.

On macOS this is `~/Library/Application Support/io.datasette.llm/`.

On Linux it may be something like `~/.config/io.datasette.llm/`.

You can set a custom location for this directory by setting the `LLM_USER_PATH` environment variable:

```bash
export LLM_USER_PATH=/path/to/my/custom/directory
```
### Turning SQLite logging on and off

By default, LLM will log every prompt and response you make to a SQLite database - see {ref}`logging` for more details.

You can turn this behavior off by default by running:
```bash
llm logs off
```
Or turn it back on again with:
```
llm logs on
```
Run `llm logs status` to see the current states of the setting.

================================================
FILE: docs/templates.md
================================================
(prompt-templates)=
# Templates

A **template** can combine a prompt, system prompt, model, default model options, schema, and fragments into a single reusable unit.

Only one template can be used at a time. To compose multiple shorter pieces of prompts together consider using {ref}`fragments <fragments>` instead.

(prompt-templates-save)=

## Getting started with <code>--save</code>

The easiest way to create a template is using the `--save template_name` option.

Here's how to create a template for summarizing text:

```bash
llm '$input - summarize this' --save summarize
```
Put `$input` where you would like the user's input to be inserted. If you omit this their input will be added to the end of your regular prompt:
```bash
llm 'Summarize the following: ' --save summarize
```
You can also create templates using system prompts:
```bash
llm --system 'Summarize this' --save summarize
```
You can set the default model for a template using `--model`:

```bash
llm --system 'Summarize this' --model gpt-4o --save summarize
```
You can also save default options:
```bash
llm --system 'Speak in French' -o temperature 1.8 --save wild-french
```
If you want to include a literal `$` sign in your prompt, use `$$` instead:
```bash
llm --system 'Estimate the cost in $$ of this: $input' --save estimate
```
Use `--tool/-T` one or more times to add tools to the template:
```bash
llm -T llm_time --system 'Always include the current time in the answer' --save time
```
You can also use `--functions` to add Python function code directly to the template:
```bash
llm --functions 'def reverse_string(s): return s[::-1]' --system 'reverse any input' --save reverse
llm -t reverse 'Hello, world!'
```

Add `--schema` to bake a {ref}`schema <usage-schemas>` into your template:

```bash
llm --schema dog.schema.json 'invent a dog' --save dog
```

If you add `--extract` the setting to  {ref}`extract the first fenced code block <usage-extract-fenced-code>` will be persisted in the template.
```bash
llm --system 'write a Python function' --extract --save python-function
llm -t python-function 'calculate haversine distance between two points'
```
In each of these cases the template will be saved in YAML format in a dedicated directory on disk.

(prompt-templates-using)=

## Using a template

You can execute a named template using the `-t/--template` option:

```bash
curl -s https://example.com/ | llm -t summarize
```

This can be combined with the `-m` option to specify a different model:
```bash
curl -s https://llm.datasette.io/en/latest/ | \
  llm -t summarize -m gpt-3.5-turbo-16k
```
Templates can also be specified as a direct path to a YAML file on disk:
```bash
llm -t path/to/template.yaml 'extra prompt here'
```
Or as a URL to a YAML file hosted online:
```bash
llm -t https://raw.githubusercontent.com/simonw/llm-templates/refs/heads/main/python-app.yaml \
  'Python app to pick a random line from a file'
```
Note that templates loaded via URLs will have any `functions:` keys ignored, to avoid accidentally executing arbitrary code. This restriction also applies to templates loaded via the {ref}`template loaders plugin mechanism <plugin-hooks-register-template-loaders>`.

(prompt-templates-list)=

## Listing available templates

This command lists all available templates:
```bash
llm templates
```
The output looks something like this:
```
cmd        : system: reply with macos terminal commands only, no extra information
glados     : system: You are GlaDOS prompt: Summarize this:
```

(prompt-templates-yaml)=

## Templates as YAML files

Templates are stored as YAML files on disk.

You can edit (or create) a YAML file for a template using the `llm templates edit` command:
```
llm templates edit summarize
```
This will open the system default editor.

:::{tip}
You can control which editor will be used here using the `EDITOR` environment variable - for example, to use VS Code:
```bash
export EDITOR="code -w"
```
Add that to your `~/.zshrc` or `~/.bashrc` file depending on which shell you use (`zsh` is the default on macOS since macOS Catalina in 2019).
:::

You can create or edit template files directly in the templates directory. The location of this directory is shown by the `llm templates path` command:
```bash
llm templates path
```
Example output:
```
/Users/simon/Library/Application Support/io.datasette.llm/templates
```

A basic YAML template looks like this:

```yaml
prompt: 'Summarize this: $input'
```
Or use YAML multi-line strings for longer inputs. I created this using `llm templates edit steampunk`:
```yaml
prompt: >
    Summarize the following text.

    Insert frequent satirical steampunk-themed illustrative anecdotes.
    Really go wild with that.

    Text to summarize: $input
```
The `prompt: >` causes the following indented text to be treated as a single string, with newlines collapsed to spaces. Use `prompt: |` to preserve newlines.

Running that with `llm -t steampunk` against GPT-4o (via [strip-tags](https://github.com/simonw/strip-tags) to remove HTML tags from the input and minify whitespace):
```bash
curl -s 'https://til.simonwillison.net/macos/imovie-slides-and-audio' | \
  strip-tags -m | llm -t steampunk -m gpt-4o
```
Output:
> In a fantastical steampunk world, Simon Willison decided to merge an old MP3 recording with slides from the talk using iMovie. After exporting the slides as images and importing them into iMovie, he had to disable the default Ken Burns effect using the "Crop" tool. Then, Simon manually synchronized the audio by adjusting the duration of each image. Finally, he published the masterpiece to YouTube, with the whimsical magic of steampunk-infused illustrations leaving his viewers in awe.

(prompt-templates-system)=

### System prompts

When working with models that support system prompts you can set a system prompt using a `system:` key like so:

```yaml
system: Summarize this
```
If you specify only a system prompt you don't need to use the `$input` variable - `llm` will use the user's input as the whole of the regular prompt, which will then be processed using the instructions set in that system prompt.

You can combine system and regular prompts like so:

```yaml
system: You speak like an excitable Victorian adventurer
prompt: 'Summarize this: $input'
```

(prompt-templates-fragments)=

### Fragments

Templates can reference {ref}`Fragments <fragments>` using the `fragments:` and `system_fragments:` keys. These should be a list of fragment URLs, filepaths or hashes:

```yaml
fragments:
- https://example.com/robots.txt
- /path/to/file.txt
- 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680
system_fragments:
- https://example.com/systm-prompt.txt
```

(prompt-templates-options)=

### Options

Default options can be set using the `options:` key:

```yaml
name: wild-french
system: Speak in French
options:
  temperature: 1.8
```

(prompt-templates-tools)=

### Tools

The `tools:` key can provide a list of tool names from other plugins - either function names or toolbox specifiers:
```yaml
name: time-plus
tools:
- llm_time
- Datasette("https://example.com/timezone-lookup")
```
The `functions:` key can provide a multi-line string of Python code defining additional functions:
```yaml
name: my-functions
functions: |
  def reverse_string(s: str):
      return s[::-1]

  def greet(name: str):
      return f"Hello, {name}!"
```
(prompt-templates-schemas)=

### Schemas

Use the `schema_object:` key to embed a JSON schema (as YAML) in your template. The easiest way to create these is with the `llm --schema ... --save name-of-template` command - the result should look something like this:

```yaml
name: dogs
schema_object:
    properties:
        dogs:
            items:
                properties:
                    bio:
                        type: string
                    name:
                        type: string
                type: object
            type: array
    type: object
```

(prompt-templates-variables)=

### Additional template variables

Templates that work against the user's normal prompt input (content that is either piped to the tool via standard input or passed as a command-line argument) can use the `$input` variable.

You can use additional named variables. These will then need to be provided using the `-p/--param` option when executing the template.

Here's an example YAML template called `recipe`, which you can create using `llm templates edit recipe`:

```yaml
prompt: |
    Suggest a recipe using ingredients: $ingredients

    It should be based on cuisine from this country: $country
```
This can be executed like so:

```bash
llm -t recipe -p ingredients 'sausages, milk' -p country Germany
```
My output started like this:
> Recipe: German Sausage and Potato Soup
>
> Ingredients:
> - 4 German sausages
> - 2 cups whole milk

This example combines input piped to the tool with additional parameters. Call this `summarize`:

```yaml
system: Summarize this text in the voice of $voice
```
Then to run it:
```bash
curl -s 'https://til.simonwillison.net/macos/imovie-slides-and-audio' | \
  strip-tags -m | llm -t summarize -p voice GlaDOS
```
I got this:

> My previous test subject seemed to have learned something new about iMovie. They exported keynote slides as individual images [...] Quite impressive for a human.

(prompt-default-parameters)=

### Specifying default parameters

When creating a template using the `--save` option you can pass `-p name value` to store the default values for parameters:
```bash
llm --system 'Summarize this text in the voice of $voice' \
  --model gpt-4o -p voice GlaDOS --save summarize
```

You can specify default values for parameters in the YAML using the `defaults:` key.

```yaml
system: Summarize this text in the voice of $voice
defaults:
  voice: GlaDOS
```

When running without `-p` it will choose the default:

```bash
curl -s 'https://til.simonwillison.net/macos/imovie-slides-and-audio' | \
  strip-tags -m | llm -t summarize
```

But you can override the defaults with `-p`:

```bash
curl -s 'https://til.simonwillison.net/macos/imovie-slides-and-audio' | \
  strip-tags -m | llm -t summarize -p voice Yoda
```

I got this:

> Text, summarize in Yoda's voice, I will: "Hmm, young padawan. Summary of this text, you seek. Hmmm. ...

(prompt-templates-extract)=

### Configuring code extraction

To configure the {ref}`extract first fenced code block <usage-extract-fenced-code>` setting for the template, add this:

```yaml
extract: true
```

(prompt-templates-default-model)=

### Setting a default model for a template

Templates executed using `llm -t template-name` will execute using the default model that the user has configured for the tool - or `gpt-3.5-turbo` if they have not configured their own default.

You can specify a new default model for a template using the `model:` key in the associated YAML. Here's a template called `roast`:

```yaml
model: gpt-4o
system: roast the user at every possible opportunity, be succinct
```
Example:
```bash
llm -t roast 'How are you today?'
```
> I'm doing great but with your boring questions, I must admit, I've seen more life in a cemetery.

(prompt-templates-loaders)=

## Template loaders from plugins

LLM plugins can {ref}`register prefixes <plugin-hooks-register-template-loaders>` that can be used to load templates from external sources.

[llm-templates-github](https://github.com/simonw/llm-templates-github) is an example which adds a `gh:` prefix which can be used to load templates from GitHub.

You can install that plugin like this:
```bash
llm install llm-templates-github
```

Use the `llm templates loaders` command to see details of the registered loaders.

```bash
llm templates loaders
```
Output:
```
gh:
  Load a template from GitHub or local cache if available

  Format: username/repo/template_name (without the .yaml extension)
    or username/template_name which means username/llm-templates/template_name
```

Then you can then use it like this:
```bash
curl -sL 'https://llm.datasette.io/' | llm -t gh:simonw/summarize
```
The `-sL` flags to `curl` are used to follow redirects and suppress progress meters.

This command will fetch the content of the LLM index page and feed it to the template defined by [summarize.yaml](https://github.com/simonw/llm-templates/blob/main/summarize.yaml) in the [simonw/llm-templates](https://github.com/simonw/llm-templates) GitHub repository.

If two template loader plugins attempt to register the same prefix one of them will have `_1` added to the end of their prefix. Use `llm templates loaders` to check if this has occurred.

================================================
FILE: docs/tools.md
================================================
(tools)=

# Tools

Many Large Language Models have been trained to execute tools as part of responding to a prompt. LLM supports tool usage with both the command-line interface and the Python API.

Exposing tools to LLMs **carries risks**! Be sure to read the {ref}`warning below <tools-warning>`.

(tools-how-they-work)=

## How tools work

A tool is effectively a function that the model can request to be executed. Here's how that works:

1. The initial prompt to the model includes a list of available tools, containing their names, descriptions and parameters.
2. The model can choose to call one (or sometimes more than one) of those tools, returning a request for the tool to execute.
3. The code that calls the model - in this case LLM itself - then executes the specified tool with the provided arguments.
4. LLM prompts the model a second time, this time including the output of the tool execution.
5. The model can then use that output to generate its next response.

This sequence can run several times in a loop, allowing the LLM to access data, act on that data and then pass that data off to other tools for further processing.

:::{admonition} Tools can be dangerous
:class: danger

(tools-warning)=

## Warning: Tools can be dangerous

Applications built on top of LLMs suffer from a class of attacks called [prompt injection](https://simonwillison.net/tags/prompt-injection/) attacks. These occur when a malicious third party injects content into the LLM which causes it to take tool-based actions that act against the interests of the user of that application.

Be very careful about which tools you enable when you potentially might be exposed to untrusted sources of content - web pages, GitHub issues posted by other people, email and messages that have been sent to you that could come from an attacker.

Watch out for [the lethal trifecta](https://simonwillison.net/2025/Jun/16/the-lethal-trifecta/) of prompt injection exfiltration attacks. If your tool-enabled LLM has the following:

- access to private data
- exposure to malicious instructions
- the ability to exfiltrate information

Anyone who can feed malicious instructions into your LLM - by leaving them on a web page it visits, or sending an email to an inbox that it monitors - could be able to trick your LLM into using other tools to access your private information and then exfiltrate (pass out) that data to somewhere the attacker can see it.
:::

(tools-trying-out)=

## Trying out tools

LLM comes with a default tool installed, called `llm_version`. You can try that out like this:

```bash
llm --tool llm_version "What version of LLM is this?" --td
```
You can also use `-T llm_version` as a shortcut for `--tool llm_version`.

The output should look like this:
```
Tool call: llm_version({})
  0.26a0

The installed version of the LLM is 0.26a0.
```
Further tools can be installed using plugins, or you can use the `llm --functions` option to pass tools implemented as PYthon functions directly, as {ref}`described here <usage-tools>`.

(tools-implementation)=

## LLM's implementation of tools

In LLM every tool is a defined as a Python function. The function can take any number of arguments and can return a string or an object that can be converted to a string.

Tool functions should include a docstring that describes what the function does. This docstring will become the description that is passed to the model.

Tools can also be defined as {ref}`toolbox classes <python-api-toolbox>`, a subclass of `llm.Toolbox` that allows multiple related tools to be bundled together. Toolbox classes can be be configured when they are instantiated, and can also maintain state in between multiple tool calls.

The Python API can accept functions directly. The command-line interface has two ways for tools to be defined: via plugins that implement the {ref}`register_tools() plugin hook <plugin-hooks-register-tools>`, or directly on the command-line using the `--functions` argument to specify a block of Python code defining one or more functions - or a path to a Python file containing the same.

You can use tools {ref}`with the LLM command-line tool <usage-tools>` or {ref}`with the Python API <python-api-tools>`.

(tools-default)=

## Default tools

LLM includes some default tools for you to try out:

- `llm_version()` returns the current version of LLM
- `llm_time()` returns the current local and UTC time

Try them like this:

```bash
llm -T llm_version -T llm_time 'Give me the current time and LLM version' --td
```

(tools-tips)=

## Tips for implementing tools

Consult the {ref}`register_tools() plugin hook <plugin-hooks-register-tools>` documentation for examples of how to implement tools in plugins.

If your plugin needs access to API secrets I recommend storing those using `llm keys set api-name` and then reading them using the {ref}`plugin-utilities-get-key` utility function. This avoids secrets being logged to the database as part of tool calls.

<!-- Uncomment when this is true: The [llm-tools-datasette](https://github.com/simonw/llm-tools-datasette) plugin is a good example of this pattern in action. -->


================================================
FILE: docs/usage.md
================================================
(usage)=
# Usage

The command to run a prompt is `llm prompt 'your prompt'`. This is the default command, so you can use `llm 'your prompt'` as a shortcut.

(usage-executing-prompts)=
## Executing a prompt

These examples use the default OpenAI `gpt-4o-mini` model, which requires you to first {ref}`set an OpenAI API key <api-keys>`.

You can {ref}`install LLM plugins <installing-plugins>` to use models from other providers, including openly licensed models you can run directly on your own computer.

To run a prompt, streaming tokens as they come in:
```bash
llm 'Ten names for cheesecakes'
```
To disable streaming and only return the response once it has completed:
```bash
llm 'Ten names for cheesecakes' --no-stream
```
To switch from ChatGPT 4o-mini (the default) to GPT-4o:
```bash
llm 'Ten names for cheesecakes' -m gpt-4o
```
You can use `-m 4o` as an even shorter shortcut.

Pass `--model <model name>` to use a different model. Run `llm models` to see a list of available models.

Or if you know the name is too long to type, use `-q` once or more to provide search terms - the model with the shortest model ID that matches all of those terms (as a lowercase substring) will be used:
```bash
llm 'Ten names for cheesecakes' -q 4o -q mini
```
To change the default model for the current session, set the `LLM_MODEL` environment variable:
```bash
export LLM_MODEL=gpt-4.1-mini
llm 'Ten names for cheesecakes' # Uses gpt-4.1-mini
```

You can send a prompt directly to standard input like this:
```bash
echo 'Ten names for cheesecakes' | llm
```
If you send text to standard input and provide arguments, the resulting prompt will consist of the piped content followed by the arguments:
```bash
cat myscript.py | llm 'explain this code'
```
Will run a prompt of:
```
<contents of myscript.py> explain this code
```
For models that support them, {ref}`system prompts <usage-system-prompts>` are a better tool for this kind of prompting.

(usage-model-options)=
### Model options

Some models support options. You can pass these using `-o/--option name value` - for example, to set the temperature to 1.5 run this:

```bash
llm 'Ten names for cheesecakes' -o temperature 1.5
```

Use the `llm models --options` command to see which options are supported by each model.

You can also {ref}`configure default options <usage-executing-default-options>` for a model using the `llm models options` commands.

(usage-attachments)=
### Attachments

Some models are multi-modal, which means they can accept input in more than just text. GPT-4o and GPT-4o mini can accept images, and models such as Google Gemini 1.5 can accept audio and video as well.

LLM calls these **attachments**. You can pass attachments using the `-a` option like this:

```bash
llm "describe this image" -a https://static.simonwillison.net/static/2024/pelicans.jpg
```
Attachments can be passed using URLs or file paths, and you can attach more than one attachment to a single prompt:
```bash
llm "extract text" -a image1.jpg -a image2.jpg
```
You can also pipe an attachment to LLM by using `-` as the filename:
```bash
cat image.jpg | llm "describe this image" -a -
```
LLM will attempt to automatically detect the content type of the image. If this doesn't work you can instead use the `--attachment-type` option (`--at` for short) which takes the URL/path plus an explicit content type:
```bash
cat myfile | llm "describe this image" --at - image/jpeg
```

(usage-system-prompts)=
### System prompts

You can use `-s/--system '...'` to set a system prompt.
```bash
llm 'SQL to calculate total sales by month' \
  --system 'You are an exaggerated sentient cheesecake that knows SQL and talks about cheesecake a lot'
```
This is useful for piping content to standard input, for example:
```bash
curl -s 'https://simonwillison.net/2023/May/15/per-interpreter-gils/' | \
  llm -s 'Suggest topics for this post as a JSON array'
```
Or to generate a description of changes made to a Git repository since the last commit:
```bash
git diff | llm -s 'Describe these changes'
```
Different models support system prompts in different ways.

The OpenAI models are particularly good at using system prompts as instructions for how they should process additional input sent as part of the regular prompt.

Other models might use system prompts change the default voice and attitude of the model.

System prompts can be saved as {ref}`templates <prompt-templates>` to create reusable tools. For example, you can create a template called `pytest` like this:

```bash
llm -s 'write pytest tests for this code' --save pytest
```
And then use the new template like this:
```bash
cat llm/utils.py | llm -t pytest
```
See {ref}`prompt templates <prompt-templates>` for more.

(usage-tools)=
### Tools

Many models support the ability to call {ref}`external tools <tools>`. Tools can be provided {ref}`by plugins <plugin-hooks-register-tools>` or you can pass a `--functions CODE` option to LLM to define one or more Python functions that the model can then call.

```bash
llm --functions '
def multiply(x: int, y: int) -> int:
    """Multiply two numbers."""
    return x * y
' 'what is 34234 * 213345'
```
Add `--td/--tools-debug` to see full details of the tools that are being executed. You can also set the `LLM_TOOLS_DEBUG` environment variable to `1` to enable this for all prompts.
```bash
llm --functions '
def multiply(x: int, y: int) -> int:
    """Multiply two numbers."""
    return x * y
' 'what is 34234 * 213345' --td
```
Output:
```
Tool call: multiply({'x': 34234, 'y': 213345})
  7303652730
34234 multiplied by 213345 is 7,303,652,730.
```
Or add `--ta/--tools-approve` to approve each tool call interactively before it is executed:

```bash
llm --functions '
def multiply(x: int, y: int) -> int:
    """Multiply two numbers."""
    return x * y
' 'what is 34234 * 213345' --ta
```
Output:
```
Tool call: multiply({'x': 34234, 'y': 213345})
Approve tool call? [y/N]:
```
The `--functions` option can be passed more than once, and can also point to the filename of a `.py` file containing one or more functions.

If you have any tools that have been made available via plugins you can add them to the prompt using `--tool/-T` option. For example, using [llm-tools-simpleeval](https://github.com/simonw/llm-tools-simpleeval) like this:

```bash
llm install llm-tools-simpleeval
llm --tool simple_eval "4444 * 233423" --td
```
Run this command to see a list of available tools from plugins:
```bash
llm tools
```
If you run a prompt that uses tools from plugins (as opposed to tools provided using the `--functions` option) continuing that conversation using `llm -c` will reuse the tools from the first prompt. Running `llm chat -c` will start a chat that continues using those same tools. For example:

```
llm -T simple_eval "12345 * 12345" --td
Tool call: simple_eval({'expression': '12345 * 12345'})
  152399025
12345 multiplied by 12345 equals 152,399,025.
llm -c "that * 6" --td
Tool call: simple_eval({'expression': '152399025 * 6'})
  914394150
152,399,025 multiplied by 6 equals 914,394,150.
llm chat -c --td
Chatting with gpt-4.1-mini
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt
> / 123
Tool call: simple_eval({'expression': '914394150 / 123'})
  7434098.780487805
914,394,150 divided by 123 is approximately 7,434,098.78.
```
Some tools are bundled in a configurable collection of tools called a **toolbox**. This means a single `--tool` option can load multiple related tools.

[llm-tools-datasette](https://github.com/simonw/llm-tools-datasette) is one example. Using a toolbox looks like this:

```bash
llm install llm-tools-datasette
llm -T 'Datasette("https://datasette.io/content")' "Show tables" --td
```
Toolboxes always start with a capital letter. They can be configured by passing a tool specification, which should fit the following patterns:

- Empty: `ToolboxName` or `ToolboxName()` - has no configuration arguments
- JSON object: `ToolboxName({"key": "value", "other": 42})`
- Single JSON value: `ToolboxName("hello")` or `ToolboxName([1,2,3])`
- Key-value pairs: `ToolboxName(name="test", count=5, items=[1,2])` - treated the same as `{"name": "test", "count": 5, "items": [1, 2]}`, all values must be valid JSON

Toolboxes are not currently supported with the `llm -c` option, but they work well with `llm chat`. Try chatting with the Datasette content database like this:

```bash
llm chat -T 'Datasette("https://datasette.io/content")' --td
```
```
Chatting with gpt-4.1-mini
Type 'exit' or 'quit' to exit
...
> show tables
```

(usage-extract-fenced-code)=
### Extracting fenced code blocks

If you are using an LLM to generate code it can be useful to retrieve just the code it produces without any of the surrounding explanatory text.

The `-x/--extract` option will scan the response for the first instance of a Markdown fenced code block - something that looks like this:

````
```python
def my_function():
    # ...
```
````
It will extract and returns just the content of that block, excluding the fenced coded delimiters. If there are no fenced code blocks it will return the full response.

Use `--xl/--extract-last` to return the last fenced code block instead of the first.

The entire response including explanatory text is still logged to the database, and can be viewed using `llm logs -c`.

(usage-schemas)=
### Schemas

Some models include the ability to return JSON that matches a provided [JSON schema](https://json-schema.org/). Models from OpenAI, Anthropic and Google Gemini all include this capability.

Take a look at the {ref}`schemas documentation <schemas>` for a detailed guide to using this feature.

You can pass JSON schemas directly to the `--schema` option:

```bash
llm --schema '{
  "type": "object",
  "properties": {
    "dogs": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "name": {
            "type": "string"
          },
          "bio": {
            "type": "string"
          }
        }
      }
    }
  }
}' -m gpt-4o-mini 'invent two dogs'
```

Or use LLM's custom {ref}`concise schema syntax <schemas-dsl>` like this:
```bash
llm --schema 'name,bio' 'invent a dog'
```
Two use the same concise schema for multiple items use `--schema-multi`:
```bash
llm --schema-multi 'name,bio' 'invent two dogs'
```
You can also save the JSON schema to a file and reference the filename using `--schema`:

```bash
llm --schema dogs.schema.json 'invent two dogs'
```

Or save your schema {ref}`to a template <prompt-templates>` like this:

```bash
llm --schema dogs.schema.json --save dogs
# Then to use it:
llm -t dogs 'invent two dogs'
```

Be warned that different models may support different dialects of the JSON schema specification.

See {ref}`schemas-logs` for tips on using the `llm logs --schema X` command to access JSON objects you have previously logged using this option.

(usage-fragments)=
### Fragments

You can use the `-f/--fragment` option to reference fragments of context that you would like to load into your prompt. Fragments can be specified as URLs, file paths or as aliases to previously saved fragments.

Fragments are designed for running longer prompts. LLM {ref}`stores prompts in a database <logging>`, and the same prompt repeated many times can end up stored as multiple copies, wasting disk space. A fragment will be stored just once and referenced by all of the prompts that use it.

The `-f` option can accept a path to a file on disk, a URL or the hash or alias of a previous fragment.

For example, to ask a question about the `robots.txt` file on `llm.datasette.io`:
```bash
llm -f https://llm.datasette.io/robots.txt 'explain this'
```
For a poem inspired by some Python code on disk:
```bash
llm -f cli.py 'a short snappy poem inspired by this code'
```
You can use as many `-f` options as you like - the fragments will be concatenated together in the order you provided, with any additional prompt added at the end.

Fragments can also be used for the system prompt using the `--sf/--system-fragment` option. If you have a file called `explain_code.txt` containing this:

```
Explain this code in detail. Include copies of the code quoted in the explanation.
```
You can run it as the system prompt like this:
```bash
llm -f cli.py --sf explain_code.txt
```

You can use the `llm fragments set` command to load a fragment and give it an alias for use in future queries:
```bash
llm fragments set cli cli.py
# Then
llm -f cli 'explain this code'
```
Use `llm fragments` to list all fragments that have been stored:
```bash
llm fragments
```
You can search by passing one or more `-q X` search strings. This will return results matching all of those strings, across the source, hash, aliases and content:
```bash
llm fragments -q pytest -q asyncio
```

The `llm fragments remove` command removes an alias. It does not delete the fragment record itself as those are linked to previous prompts and responses and cannot be deleted independently of them.
```bash
llm fragments remove cli
```

(usage-conversation)=
### Continuing a conversation

By default, the tool will start a new conversation each time you run it.

You can opt to continue the previous conversation by passing the `-c/--continue` option:
```bash
llm 'More names' -c
```
This will re-send the prompts and responses for the previous conversation as part of the call to the language model. Note that this can add up quickly in terms of tokens, especially if you are using expensive models.

`--continue` will automatically use the same model as the conversation that you are continuing, even if you omit the `-m/--model` option.

To continue a conversation that is not the most recent one, use the `--cid/--conversation <id>` option:
```bash
llm 'More names' --cid 01h53zma5txeby33t1kbe3xk8q
```
You can find these conversation IDs using the `llm logs` command.

### Tips for using LLM with Bash or Zsh

To learn more about your computer's operating system based on the output of `uname -a`, run this:
```bash
llm "Tell me about my operating system: $(uname -a)"
```
This pattern of using `$(command)` inside a double quoted string is a useful way to quickly assemble prompts.

(usage-completion-prompts)=
### Completion prompts

Some models are completion models - rather than being tuned to respond to chat style prompts, they are designed to complete a sentence or paragraph.

An example of this is the `gpt-3.5-turbo-instruct` OpenAI model.

You can prompt that model the same way as the chat models, but be aware that the prompt format that works best is likely to differ.

```bash
llm -m gpt-3.5-turbo-instruct 'Reasons to tame a wild beaver:'
```

(usage-chat)=

## Starting an interactive chat

The `llm chat` command starts an ongoing interactive chat with a model.

This is particularly useful for models that run on your own machine, since it saves them from having to be loaded into memory each time a new prompt is added to a conversation.

Run `llm chat`, optionally with a `-m model_id`, to start a chat conversation:

```bash
llm chat -m chatgpt
```
Each chat starts a new conversation. A record of each conversation can be accessed through {ref}`the logs <logging-conversation>`.

You can pass `-c` to start a conversation as a continuation of your most recent prompt. This will automatically use the most recently used model:

```bash
llm chat -c
```

For models that support them, you can pass options using `-o/--option`:
```bash
llm chat -m gpt-4 -o temperature 0.5
```

You can pass a system prompt to be used for your chat conversation:

```bash
llm chat -m gpt-4 -s 'You are a sentient cheesecake'
```
You can also pass {ref}`a template <prompt-templates>` - useful for creating chat personas that you wish to return to.

Here's how to create a template for your GPT-4 powered cheesecake:
```bash
llm --system 'You are a sentient cheesecake' -m gpt-4 --save cheesecake
```
Now you can start a new chat with your cheesecake any time you like using this:
```bash
llm chat -t cheesecake
```
```
Chatting with gpt-4
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> who are you?
I am a sentient cheesecake, meaning I am an artificial
intelligence embodied in a dessert form, specifically a
cheesecake. However, I don't consume or prepare foods
like humans do, I communicate, learn and help answer
your queries.
```

Type `quit` or `exit` followed by `<enter>` to end a chat session.

Sometimes you may want to paste multiple lines of text into a chat at once - for example when debugging an error message.

To do that, type `!multi` to start a multi-line input. Type or paste your text, then type `!end` and hit `<enter>` to finish.

If your pasted text might itself contain a `!end` line, you can set a custom delimiter using `!multi abc` followed by `!end abc` at the end:

```
Chatting with gpt-4
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> !multi custom-end
 Explain this error:

   File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/urllib/request.py", line 1391, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/opt/homebrew/Caskroom/miniconda/base/lib/python3.10/urllib/request.py", line 1351, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>

 !end custom-end
```

You can also use `!edit` to open your default editor and modify the prompt before sending it to the model.

```
Chatting with gpt-4
Type 'exit' or 'quit' to exit
Type '!multi' to enter multiple lines, then '!end' to finish
Type '!edit' to open your default editor and modify the prompt.
Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments
> !edit
```

`llm chat` takes the same `--tool/-T` and `--functions` options as `llm prompt`. You can use this to start a chat with the specified {ref}`tools <usage-tools>` enabled.

## Listing available models

The `llm models` command lists every model that can be used with LLM, along with their aliases. This includes models that have been installed using {ref}`plugins <plugins>`.

```bash
llm models
```
Example output:
```
OpenAI Chat: gpt-4o (aliases: 4o)
OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
OpenAI Chat: o1-preview
OpenAI Chat: o1-mini
GeminiPro: gemini-1.5-pro-002
GeminiPro: gemini-1.5-flash-002
...
```

Add one or more `-q term` options to search for models matching all of those search terms:
```bash
llm models -q gpt-4o
llm models -q 4o -q mini
```
Use one or more `-m` options to indicate specific models, either by their model ID or one of their aliases:
```bash
llm models -m gpt-4o -m gemini-1.5-pro-002
```
Add `--options` to also see documentation for the options supported by each model:
```bash
llm models --options
```
Output:
<!-- [[[cog
from click.testing import CliRunner
from llm.cli import cli
result = CliRunner().invoke(cli, ["models", "list", "--options"])
cog.out("```\n{}\n```".format(result.output))
]]] -->
```
OpenAI Chat: gpt-4o (aliases: 4o)
  Options:
    temperature: float
      What sampling temperature to use, between 0 and 2. Higher values like
      0.8 will make the output more random, while lower values like 0.2 will
      make it more focused and deterministic.
    max_tokens: int
      Maximum number of tokens to generate.
    top_p: float
      An alternative to sampling with temperature, called nucleus sampling,
      where the model considers the results of the tokens with top_p
      probability mass. So 0.1 means only the tokens comprising the top 10%
      probability mass are considered. Recommended to use top_p or
      temperature but not both.
    frequency_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on their existing frequency in the text so far, decreasing the model's
      likelihood to repeat the same line verbatim.
    presence_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on whether they appear in the text so far, increasing the model's
      likelihood to talk about new topics.
    stop: str
      A string where the API will stop generating further tokens.
    logit_bias: dict, str
      Modify the likelihood of specified tokens appearing in the completion.
      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
    seed: int
      Integer seed to attempt to sample deterministically
    json_object: boolean
      Output a valid JSON object {...}. Prompt must mention JSON.
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: chatgpt-4o-latest (aliases: chatgpt-4o)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-audio-preview
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    audio/mpeg, audio/wav
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-audio-preview-2024-12-17
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    audio/mpeg, audio/wav
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-audio-preview-2024-10-01
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    audio/mpeg, audio/wav
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-mini-audio-preview
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    audio/mpeg, audio/wav
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4o-mini-audio-preview-2024-12-17
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    audio/mpeg, audio/wav
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4.1 (aliases: 4.1)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4.1-mini (aliases: 4.1-mini)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4.1-nano (aliases: 4.1-nano)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4 (aliases: 4, gpt4)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4-32k (aliases: 4-32k)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4-1106-preview
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4-0125-preview
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4-turbo-2024-04-09
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4.5-preview-2025-02-27
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-4.5-preview (aliases: gpt-4.5)
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o1
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o1-2024-12-17
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o1-preview
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o1-mini
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
  Features:
  - streaming
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o3-mini
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o3
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: o4-mini
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5-mini
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5-nano
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5-2025-08-07
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5-mini-2025-08-07
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5-nano-2025-08-07
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.1
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.1-chat-latest
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.2
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.2-chat-latest
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4-2026-03-05
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4-mini
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4-mini-2026-03-17
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4-nano
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Chat: gpt-5.4-nano-2026-03-17
  Options:
    temperature: float
    max_tokens: int
    top_p: float
    frequency_penalty: float
    presence_penalty: float
    stop: str
    logit_bias: dict, str
    seed: int
    json_object: boolean
    reasoning_effort: str
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Features:
  - streaming
  - schemas
  - tools
  - async
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
  Options:
    temperature: float
      What sampling temperature to use, between 0 and 2. Higher values like
      0.8 will make the output more random, while lower values like 0.2 will
      make it more focused and deterministic.
    max_tokens: int
      Maximum number of tokens to generate.
    top_p: float
      An alternative to sampling with temperature, called nucleus sampling,
      where the model considers the results of the tokens with top_p
      probability mass. So 0.1 means only the tokens comprising the top 10%
      probability mass are considered. Recommended to use top_p or
      temperature but not both.
    frequency_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on their existing frequency in the text so far, decreasing the model's
      likelihood to repeat the same line verbatim.
    presence_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on whether they appear in the text so far, increasing the model's
      likelihood to talk about new topics.
    stop: str
      A string where the API will stop generating further tokens.
    logit_bias: dict, str
      Modify the likelihood of specified tokens appearing in the completion.
      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
    seed: int
      Integer seed to attempt to sample deterministically
    logprobs: int
      Include the log probabilities of most likely N per token
  Features:
  - streaming
  Keys:
    key: openai
    env_var: OPENAI_API_KEY

```
<!-- [[[end]]] -->

When running a prompt you can pass the full model name or any of the aliases to the `-m/--model` option:
```bash
llm -m 4o \
  'As many names for cheesecakes as you can think of, with detailed descriptions'
```

(usage-executing-default-options)=

## Setting default options for models

To configure a default option for a specific model, use the `llm models options set` command:
```bash
llm models options set gpt-4o temperature 0.5
```
This option will then be applied automatically any time you run a prompt through the `gpt-4o` model.

Default options are stored in the `model_options.json` file in the LLM configuration directory.

You can list all default options across all models using the `llm models options list` command:
```bash
llm models options list
```
Or show them for an individual model with `llm models options show <model_id>`:
```bash
llm models options show gpt-4o
```
To clear a default option, use the `llm models options clear` command:
```bash
llm models options clear gpt-4o temperature
```
Or clear all default options for a model like this:
```bash
llm models options clear gpt-4o
```
Default model options are respected by both the `llm prompt` and the `llm chat` commands. They will not be applied when you use LLM as a {ref}`Python library <python-api>`.


================================================
FILE: llm/__init__.py
================================================
from .hookspecs import hookimpl
from .errors import (
    ModelError,
    NeedsKeyException,
)
from .models import (
    AsyncConversation,
    AsyncKeyModel,
    AsyncModel,
    AsyncResponse,
    Attachment,
    CancelToolCall,
    Conversation,
    EmbeddingModel,
    EmbeddingModelWithAliases,
    KeyModel,
    Model,
    ModelWithAliases,
    Options,
    Prompt,
    Response,
    Tool,
    Toolbox,
    ToolCall,
    ToolOutput,
    ToolResult,
)
from .utils import schema_dsl, Fragment
from .embeddings import Collection
from .templates import Template
from .plugins import pm, load_plugins
import click
from typing import Any, Dict, List, Optional, Callable, Type, Union
import inspect
import json
import os
import pathlib
import struct

__all__ = [
    "AsyncConversation",
    "AsyncKeyModel",
    "AsyncModel",
    "AsyncResponse",
    "Attachment",
    "CancelToolCall",
    "Collection",
    "Conversation",
    "Fragment",
    "get_async_model",
    "get_key",
    "get_model",
    "hookimpl",
    "KeyModel",
    "Model",
    "ModelError",
    "NeedsKeyException",
    "Options",
    "Prompt",
    "Response",
    "Template",
    "Tool",
    "Toolbox",
    "ToolCall",
    "ToolOutput",
    "ToolResult",
    "user_dir",
    "schema_dsl",
]
DEFAULT_MODEL = "gpt-4o-mini"


def get_plugins(all=False):
    plugins = []
    plugin_to_distinfo = dict(pm.list_plugin_distinfo())
    for plugin in pm.get_plugins():
        if not all and plugin.__name__.startswith("llm.default_plugins."):
            continue
        plugin_info = {
            "name": plugin.__name__,
            "hooks": [h.name for h in pm.get_hookcallers(plugin)],
        }
        distinfo = plugin_to_distinfo.get(plugin)
        if distinfo:
            plugin_info["version"] = distinfo.version
            plugin_info["name"] = (
                getattr(distinfo, "name", None) or distinfo.project_name
            )
        plugins.append(plugin_info)
    return plugins


def get_models_with_aliases() -> List["ModelWithAliases"]:
    model_aliases = []

    # Include aliases from aliases.json
    aliases_path = user_dir() / "aliases.json"
    extra_model_aliases: Dict[str, list] = {}
    if aliases_path.exists():
        configured_aliases = json.loads(aliases_path.read_text())
        for alias, model_id in configured_aliases.items():
            extra_model_aliases.setdefault(model_id, []).append(alias)

    def register(model, async_model=None, aliases=None):
        alias_list = list(aliases or [])
        if model.model_id in extra_model_aliases:
            alias_list.extend(extra_model_aliases[model.model_id])
        model_aliases.append(ModelWithAliases(model, async_model, alias_list))

    load_plugins()
    pm.hook.register_models(register=register)

    return model_aliases


def _get_loaders(hook_method) -> Dict[str, Callable]:
    load_plugins()
    loaders = {}

    def register(prefix, loader):
        suffix = 0
        prefix_to_try = prefix
        while prefix_to_try in loaders:
            suffix += 1
            prefix_to_try = f"{prefix}_{suffix}"
        loaders[prefix_to_try] = loader

    hook_method(register=register)
    return loaders


def get_template_loaders() -> Dict[str, Callable[[str], Template]]:
    """Get template loaders registered by plugins."""
    return _get_loaders(pm.hook.register_template_loaders)


def get_fragment_loaders() -> Dict[
    str,
    Callable[[str], Union[Fragment, Attachment, List[Union[Fragment, Attachment]]]],
]:
    """Get fragment loaders registered by plugins."""
    return _get_loaders(pm.hook.register_fragment_loaders)


def get_tools() -> Dict[str, Union[Tool, Type[Toolbox]]]:
    """Return all tools (llm.Tool and llm.Toolbox) registered by plugins."""
    load_plugins()
    tools: Dict[str, Union[Tool, Type[Toolbox]]] = {}

    # Variable to track current plugin name
    current_plugin_name = None

    def register(
        tool_or_function: Union[Tool, Type[Toolbox], Callable[..., Any]],
        name: Optional[str] = None,
    ) -> None:
        tool: Union[Tool, Type[Toolbox], None] = None

        # If it's a Toolbox class, set the plugin field on it
        if inspect.isclass(tool_or_function):
            if issubclass(tool_or_function, Toolbox):
                tool = tool_or_function
                if current_plugin_name:
                    tool.plugin = current_plugin_name
                tool.name = name or tool.__name__
            else:
                raise TypeError(
                    "Toolbox classes must inherit from llm.Toolbox, {} does not.".format(
                        tool_or_function.__name__
                    )
                )

        # If it's already a Tool instance, use it directly
        elif isinstance(tool_or_function, Tool):
            tool = tool_or_function
            if name:
                tool.name = name
            if current_plugin_name:
                tool.plugin = current_plugin_name

        # If it's a bare function, wrap it in a Tool
        else:
            tool = Tool.function(tool_or_function, name=name)
            if current_plugin_name:
                tool.plugin = current_plugin_name

        # Get the name for the tool/toolbox
        if tool:
            # For Toolbox classes, use their name attribute or class name
            if inspect.isclass(tool) and issubclass(tool, Toolbox):
                prefix = name or getattr(tool, "name", tool.__name__) or ""
            else:
                prefix = name or tool.name or ""

            suffix = 0
            candidate = prefix

            # Avoid name collisions
            while candidate in tools:
                suffix += 1
                candidate = f"{prefix}_{suffix}"

            tools[candidate] = tool

    # Call each plugin's register_tools hook individually to track current_plugin_name
    for plugin in pm.get_plugins():
        current_plugin_name = pm.get_name(plugin)
        hook_caller = pm.hook.register_tools
        plugin_impls = [
            impl for impl in hook_caller.get_hookimpls() if impl.plugin is plugin
        ]
        for impl in plugin_impls:
            impl.function(register=register)

    return tools


def get_embedding_models_with_aliases() -> List["EmbeddingModelWithAliases"]:
    model_aliases = []

    # Include aliases from aliases.json
    aliases_path = user_dir() / "aliases.json"
    extra_model_aliases: Dict[str, list] = {}
    if aliases_path.exists():
        configured_aliases = json.loads(aliases_path.read_text())
        for alias, model_id in configured_aliases.items():
            extra_model_aliases.setdefault(model_id, []).append(alias)

    def register(model, aliases=None):
        alias_list = list(aliases or [])
        if model.model_id in extra_model_aliases:
            alias_list.extend(extra_model_aliases[model.model_id])
        model_aliases.append(EmbeddingModelWithAliases(model, alias_list))

    load_plugins()
    pm.hook.register_embedding_models(register=register)

    return model_aliases


def get_embedding_models():
    models = []

    def register(model, aliases=None):
        models.append(model)

    load_plugins()
    pm.hook.register_embedding_models(register=register)
    return models


def get_embedding_model(name):
    aliases = get_embedding_model_aliases()
    try:
        return aliases[name]
    except KeyError:
        raise UnknownModelError("Unknown model: " + str(name))


def get_embedding_model_aliases() -> Dict[str, EmbeddingModel]:
    model_aliases = {}
    for model_with_aliases in get_embedding_models_with_aliases():
        for alias in model_with_aliases.aliases:
            model_aliases[alias] = model_with_aliases.model
        model_aliases[model_with_aliases.model.model_id] = model_with_aliases.model
    return model_aliases


def get_async_model_aliases() -> Dict[str, AsyncModel]:
    async_model_aliases = {}
    for model_with_aliases in get_models_with_aliases():
        if model_with_aliases.async_model:
            for alias in model_with_aliases.aliases:
                async_model_aliases[alias] = model_with_aliases.async_model
            async_model_aliases[model_with_aliases.model.model_id] = (
                model_with_aliases.async_model
            )
    return async_model_aliases


def get_model_aliases() -> Dict[str, Model]:
    model_aliases = {}
    for model_with_aliases in get_models_with_aliases():
        if model_with_aliases.model:
            for alias in model_with_aliases.aliases:
                model_aliases[alias] = model_with_aliases.model
            model_aliases[model_with_aliases.model.model_id] = model_with_aliases.model
    return model_aliases


class UnknownModelError(KeyError):
    pass


def get_models() -> List[Model]:
    "Get all registered models"
    models_with_aliases = get_models_with_aliases()
    return [mwa.model for mwa in models_with_aliases if mwa.model]


def get_async_models() -> List[AsyncModel]:
    "Get all registered async models"
    models_with_aliases = get_models_with_aliases()
    return [mwa.async_model for mwa in models_with_aliases if mwa.async_model]


def get_async_model(name: Optional[str] = None) -> AsyncModel:
    "Get an async model by name or alias"
    aliases = get_async_model_aliases()
    name = name or get_default_model()
    try:
        return aliases[name]
    except KeyError:
        # Does a sync model exist?
        sync_model = None
        try:
            sync_model = get_model(name, _skip_async=True)
        except UnknownModelError:
            pass
        if sync_model:
            raise UnknownModelError("Unknown async model (sync model exists): " + name)
        else:
            raise UnknownModelError("Unknown model: " + name)


def get_model(name: Optional[str] = None, _skip_async: bool = False) -> Model:
    "Get a model by name or alias"
    aliases = get_model_aliases()
    name = name or get_default_model()
    try:
        return aliases[name]
    except KeyError:
        # Does an async model exist?
        if _skip_async:
            raise UnknownModelError("Unknown model: " + name)
        async_model = None
        try:
            async_model = get_async_model(name)
        except UnknownModelError:
            pass
        if async_model:
            raise UnknownModelError("Unknown model (async model exists): " + name)
        else:
            raise UnknownModelError("Unknown model: " + name)


def get_key(
    explicit_key: Optional[str] = None,
    key_alias: Optional[str] = None,
    env_var: Optional[str] = None,
    *,
    alias: Optional[str] = None,
    env: Optional[str] = None,
    input: Optional[str] = None,
) -> Optional[str]:
    """
    Return an API key based on a hierarchy of potential sources. You should use the keyword arguments,
    the positional arguments are here purely for backwards-compatibility with older code.

    :param input: Input provided by the user. This may be the key, or an alias of a key in keys.json.
    :param alias: The alias used to retrieve the key from the keys.json file.
    :param env: Name of the environment variable to check for the key as a final fallback.
    """
    if alias:
        key_alias = alias
    if env:
        env_var = env
    if input:
        explicit_key = input
    stored_keys = load_keys()
    # If user specified an alias, use the key stored for that alias
    if explicit_key in stored_keys:
        return stored_keys[explicit_key]
    if explicit_key:
        # User specified a key that's not an alias, use that
        return explicit_key
    # Stored key over-rides environment variables over-ride the default key
    if key_alias in stored_keys:
        return stored_keys[key_alias]
    # Finally try environment variable
    if env_var and os.environ.get(env_var):
        return os.environ[env_var]
    # Couldn't find it
    return None


def load_keys():
    path = user_dir() / "keys.json"
    if path.exists():
        return json.loads(path.read_text())
    else:
        return {}


def user_dir():
    llm_user_path = os.environ.get("LLM_USER_PATH")
    if llm_user_path:
        path = pathlib.Path(llm_user_path)
    else:
        path = pathlib.Path(click.get_app_dir("io.datasette.llm"))
    path.mkdir(exist_ok=True, parents=True)
    return path


def set_alias(alias, model_id_or_alias):
    """
    Set an alias to point to the specified model.
    """
    path = user_dir() / "aliases.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    if not path.exists():
        path.write_text("{}\n")
    try:
        current = json.loads(path.read_text())
    except json.decoder.JSONDecodeError:
        # We're going to write a valid JSON file in a moment:
        current = {}
    # Resolve model_id_or_alias to a model_id
    try:
        model = get_model(model_id_or_alias)
        model_id = model.model_id
    except UnknownModelError:
        # Try to resolve it to an embedding model
        try:
            model = get_embedding_model(model_id_or_alias)
            model_id = model.model_id
        except UnknownModelError:
            # Set the alias to the exact string they provided instead
            model_id = model_id_or_alias
    current[alias] = model_id
    path.write_text(json.dumps(current, indent=4) + "\n")


def remove_alias(alias):
    """
    Remove an alias.
    """
    path = user_dir() / "aliases.json"
    if not path.exists():
        raise KeyError("No aliases.json file exists")
    try:
        current = json.loads(path.read_text())
    except json.decoder.JSONDecodeError:
        raise KeyError("aliases.json file is not valid JSON")
    if alias not in current:
        raise KeyError("No such alias: {}".format(alias))
    del current[alias]
    path.write_text(json.dumps(current, indent=4) + "\n")


def encode(values):
    return struct.pack("<" + "f" * len(values), *values)


def decode(binary):
    return struct.unpack("<" + "f" * (len(binary) // 4), binary)


def cosine_similarity(a, b):
    dot_product = sum(x * y for x, y in zip(a, b))
    magnitude_a = sum(x * x for x in a) ** 0.5
    magnitude_b = sum(x * x for x in b) ** 0.5
    return dot_product / (magnitude_a * magnitude_b)


def get_default_model(filename="default_model.txt", default=DEFAULT_MODEL):
    path = user_dir() / filename
    if path.exists():
        return path.read_text().strip()
    else:
        return default


def set_default_model(model, filename="default_model.txt"):
    path = user_dir() / filename
    if model is None and path.exists():
        path.unlink()
    else:
        path.write_text(model)


def get_default_embedding_model():
    return get_default_model("default_embedding_model.txt", None)


def set_default_embedding_model(model):
    set_default_model(model, "default_embedding_model.txt")


================================================
FILE: llm/__main__.py
================================================
from .cli import cli

if __name__ == "__main__":
    cli()


================================================
FILE: llm/cli.py
================================================
import asyncio
import click
from click_default_group import DefaultGroup
from dataclasses import asdict
from importlib.metadata import version
import io
import json
import os
from llm import (
    Attachment,
    AsyncConversation,
    AsyncKeyModel,
    AsyncResponse,
    CancelToolCall,
    Collection,
    Conversation,
    Fragment,
    Response,
    Template,
    Tool,
    Toolbox,
    UnknownModelError,
    KeyModel,
    encode,
    get_async_model,
    get_default_model,
    get_default_embedding_model,
    get_embedding_models_with_aliases,
    get_embedding_model_aliases,
    get_embedding_model,
    get_plugins,
    get_tools,
    get_fragment_loaders,
    get_template_loaders,
    get_model,
    get_model_aliases,
    get_models_with_aliases,
    user_dir,
    set_alias,
    set_default_model,
    set_default_embedding_model,
    remove_alias,
)
from llm.models import _BaseConversation, ChainResponse

from .migrations import migrate
from .plugins import pm, load_plugins
from .utils import (
    ensure_fragment,
    extract_fenced_code_block,
    find_unused_key,
    has_plugin_prefix,
    instantiate_from_spec,
    make_schema_id,
    maybe_fenced_code,
    mimetype_from_path,
    mimetype_from_string,
    multi_schema,
    output_rows_as_json,
    resolve_schema_input,
    schema_dsl,
    schema_summary,
    token_usage_string,
    truncate_string,
)
import base64
import httpx
import inspect
import pathlib
import pydantic
import re
import readline
from runpy import run_module
import shutil
import sqlite_utils
from sqlite_utils.utils import rows_from_file, Format
import sys
import textwrap
from typing import cast, Dict, Optional, Iterable, List, Union, Tuple, Type, Any
import warnings
import yaml

warnings.simplefilter("ignore", ResourceWarning)

DEFAULT_TEMPLATE = "prompt: "


class FragmentNotFound(Exception):
    pass


def validate_fragment_alias(ctx, param, value):
    if not re.match(r"^[a-zA-Z0-9_-]+$", value):
        raise click.BadParameter("Fragment alias must be alphanumeric")
    return value


def resolve_fragments(
    db: sqlite_utils.Database, fragments: Iterable[str], allow_attachments: bool = False
) -> List[Union[Fragment, Attachment]]:
    """
    Resolve fragment strings into a mixed of llm.Fragment() and llm.Attachment() objects.
    """

    def _load_by_alias(fragment: str) -> Tuple[Optional[str], Optional[str]]:
        rows = list(
            db.query(
                """
                select content, source from fragments
                left join fragment_aliases on fragments.id = fragment_aliases.fragment_id
                where alias = :alias or hash = :alias limit 1
                """,
                {"alias": fragment},
            )
        )
        if rows:
            row = rows[0]
            return row["content"], row["source"]
        return None, None

    # The fragment strings could be URLs or paths or plugin references
    resolved: List[Union[Fragment, Attachment]] = []
    for fragment in fragments:
        if fragment.startswith("http://") or fragment.startswith("https://"):
            llm_version = version("llm")
            headers = {"User-Agent": f"llm/{llm_version} (https://llm.datasette.io/)"}
            client = httpx.Client(
                follow_redirects=True, max_redirects=3, headers=headers
            )
            response = client.get(fragment)
            response.raise_for_status()
            resolved.append(Fragment(response.text, fragment))
        elif fragment == "-":
            resolved.append(Fragment(sys.stdin.read(), "-"))
        elif has_plugin_prefix(fragment):
            prefix, rest = fragment.split(":", 1)
            loaders = get_fragment_loaders()
            if prefix not in loaders:
                raise FragmentNotFound("Unknown fragment prefix: {}".format(prefix))
            loader = loaders[prefix]
            try:
                result = loader(rest)
                if not isinstance(result, list):
                    result = [result]
                if not allow_attachments and any(
                    isinstance(r, Attachment) for r in result
                ):
                    raise FragmentNotFound(
                        "Fragment loader {} returned a disallowed attachment".format(
                            prefix
                        )
                    )
                resolved.extend(result)
            except Exception as ex:
                raise FragmentNotFound(
                    "Could not load fragment {}: {}".format(fragment, ex)
                )
        else:
            # Try from the DB
            content, source = _load_by_alias(fragment)
            if content is not None:
                resolved.append(Fragment(content, source))
            else:
                # Now try path
                path = pathlib.Path(fragment)
                if path.exists():
                    resolved.append(Fragment(path.read_text(), str(path.resolve())))
                else:
                    raise FragmentNotFound(f"Fragment '{fragment}' not found")
    return resolved


def process_fragments_in_chat(
    db: sqlite_utils.Database, prompt: str
) -> tuple[str, list[Fragment], list[Attachment]]:
    """
    Process any !fragment commands in a chat prompt and return the modified prompt plus resolved fragments and attachments.
    """
    prompt_lines = []
    fragments = []
    attachments = []
    for line in prompt.splitlines():
        if line.startswith("!fragment "):
            try:
                fragment_strs = line.strip().removeprefix("!fragment ").split()
                fragments_and_attachments = resolve_fragments(
                    db, fragments=fragment_strs, allow_attachments=True
                )
                fragments += [
                    fragment
                    for fragment in fragments_and_attachments
                    if isinstance(fragment, Fragment)
                ]
                attachments += [
                    attachment
                    for attachment in fragments_and_attachments
                    if isinstance(attachment, Attachment)
                ]
            except FragmentNotFound as ex:
                raise click.ClickException(str(ex))
        else:
            prompt_lines.append(line)
    return "\n".join(prompt_lines), fragments, attachments


class AttachmentError(Exception):
    """Exception raised for errors in attachment resolution."""

    pass


def resolve_attachment(value):
    """
    Resolve an attachment from a string value which could be:
    - "-" for stdin
    - A URL
    - A file path

    Returns an Attachment object.
    Raises AttachmentError if the attachment cannot be resolved.
    """
    if value == "-":
        content = sys.stdin.buffer.read()
        # Try to guess type
        mimetype = mimetype_from_string(content)
        if mimetype is None:
            raise AttachmentError("Could not determine mimetype of stdin")
        return Attachment(type=mimetype, path=None, url=None, content=content)

    if "://" in value:
        # Confirm URL exists and try to guess type
        try:
            response = httpx.head(value)
            response.raise_for_status()
            mimetype = response.headers.get("content-type")
        except httpx.HTTPError as ex:
            raise AttachmentError(str(ex))
        return Attachment(type=mimetype, path=None, url=value, content=None)

    # Check that the file exists
    path = pathlib.Path(value)
    if not path.exists():
        raise AttachmentError(f"File {value} does not exist")
    path = path.resolve()

    # Try to guess type
    mimetype = mimetype_from_path(str(path))
    if mimetype is None:
        raise AttachmentError(f"Could not determine mimetype of {value}")

    return Attachment(type=mimetype, path=str(path), url=None, content=None)


class AttachmentType(click.ParamType):
    name = "attachment"

    def convert(self, value, param, ctx):
        try:
            return resolve_attachment(value)
        except AttachmentError as e:
            self.fail(str(e), param, ctx)


def resolve_attachment_with_type(value: str, mimetype: str) -> Attachment:
    if "://" in value:
        attachment = Attachment(mimetype, None, value, None)
    elif value == "-":
        content = sys.stdin.buffer.read()
        attachment = Attachment(mimetype, None, None, content)
    else:
        # Look for file
        path = pathlib.Path(value)
        if not path.exists():
            raise click.BadParameter(f"File {value} does not exist")
        path = path.resolve()
        attachment = Attachment(mimetype, str(path), None, None)
    return attachment


def attachment_types_callback(ctx, param, values) -> List[Attachment]:
    collected = []
    for value, mimetype in values:
        collected.append(resolve_attachment_with_type(value, mimetype))
    return collected


def json_validator(object_name):
    def validator(ctx, param, value):
        if value is None:
            return value
        try:
            obj = json.loads(value)
            if not isinstance(obj, dict):
                raise click.BadParameter(f"{object_name} must be a JSON object")
            return obj
        except json.JSONDecodeError:
            raise click.BadParameter(f"{object_name} must be valid JSON")

    return validator


def schema_option(fn):
    click.option(
        "schema_input",
        "--schema",
        help="JSON schema, filepath or ID",
    )(fn)
    return fn


@click.group(
    cls=DefaultGroup,
    default="prompt",
    default_if_no_args=True,
    context_settings={"help_option_names": ["-h", "--help"]},
)
@click.version_option()
def cli():
    """
    Access Large Language Models from the command-line

    Documentation: https://llm.datasette.io/

    LLM can run models from many different providers. Consult the
    plugin directory for a list of available models:

    https://llm.datasette.io/en/stable/plugins/directory.html

    To get started with OpenAI, obtain an API key from them and:

    \b
        $ llm keys set openai
        Enter key: ...

    Then execute a prompt like this:

        llm 'Five outrageous names for a pet pelican'

    For a full list of prompting options run:

        llm prompt --help
    """


@cli.command(name="prompt")
@click.argument("prompt", required=False)
@click.option("-s", "--system", help="System prompt to use")
@click.option("model_id", "-m", "--model", help="Model to use", envvar="LLM_MODEL")
@click.option(
    "-d",
    "--database",
    type=click.Path(readable=True, dir_okay=False),
    help="Path to log database",
)
@click.option(
    "queries",
    "-q",
    "--query",
    multiple=True,
    help="Use first model matching these strings",
)
@click.option(
    "attachments",
    "-a",
    "--attachment",
    type=AttachmentType(),
    multiple=True,
    help="Attachment path or URL or -",
)
@click.option(
    "attachment_types",
    "--at",
    "--attachment-type",
    type=(str, str),
    multiple=True,
    callback=attachment_types_callback,
    help="\b\nAttachment with explicit mimetype,\n--at image.jpg image/jpeg",
)
@click.option(
    "tools",
    "-T",
    "--tool",
    multiple=True,
    help="Name of a tool to make available to the model",
)
@click.option(
    "python_tools",
    "--functions",
    help="Python code block or file path defining functions to register as tools",
    multiple=True,
)
@click.option(
    "tools_debug",
    "--td",
    "--tools-debug",
    is_flag=True,
    help="Show full details of tool executions",
    envvar="LLM_TOOLS_DEBUG",
)
@click.option(
    "tools_approve",
    "--ta",
    "--tools-approve",
    is_flag=True,
    help="Manually approve every tool execution",
)
@click.option(
    "chain_limit",
    "--cl",
    "--chain-limit",
    type=int,
    default=5,
    help="How many chained tool responses to allow, default 5, set 0 for unlimited",
)
@click.option(
    "options",
    "-o",
    "--option",
    type=(str, str),
    multiple=True,
    help="key/value options for the model",
)
@schema_option
@click.option(
    "--schema-multi",
    help="JSON schema to use for multiple results",
)
@click.option(
    "fragments",
    "-f",
    "--fragment",
    multiple=True,
    help="Fragment (alias, URL, hash or file path) to add to the prompt",
)
@click.option(
    "system_fragments",
    "--sf",
    "--system-fragment",
    multiple=True,
    help="Fragment to add to system prompt",
)
@click.option("-t", "--template", help="Template to use")
@click.option(
    "-p",
    "--param",
    multiple=True,
    type=(str, str),
    help="Parameters for template",
)
@click.option("--no-stream", is_flag=True, help="Do not stream output")
@click.option("-n", "--no-log", is_flag=True, help="Don't log to database")
@click.option("--log", is_flag=True, help="Log prompt and response to the database")
@click.option(
    "_continue",
    "-c",
    "--continue",
    is_flag=True,
    flag_value=-1,
    help="Continue the most recent conversation.",
)
@click.option(
    "conversation_id",
    "--cid",
    "--conversation",
    help="Continue the conversation with the given ID.",
)
@click.option("--key", help="API key to use")
@click.option("--save", help="Save prompt with this template name")
@click.option("async_", "--async", is_flag=True, help="Run prompt asynchronously")
@click.option("-u", "--usage", is_flag=True, help="Show token usage")
@click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block")
@click.option(
    "extract_last",
    "--xl",
    "--extract-last",
    is_flag=True,
    help="Extract last fenced code block",
)
def prompt(
    prompt,
    system,
    model_id,
    database,
    queries,
    attachments,
    attachment_types,
    tools,
    python_tools,
    tools_debug,
    tools_approve,
    chain_limit,
    options,
    schema_input,
    schema_multi,
    fragments,
    system_fragments,
    template,
    param,
    no_stream,
    no_log,
    log,
    _continue,
    conversation_id,
    key,
    save,
    async_,
    usage,
    extract,
    extract_last,
):
    """
    Execute a prompt

    Documentation: https://llm.datasette.io/en/stable/usage.html

    Examples:

    \b
        llm 'Capital of France?'
        llm 'Capital of France?' -m gpt-4o
        llm 'Capital of France?' -s 'answer in Spanish'

    Multi-modal models can be called with attachments like this:

    \b
        llm 'Extract text from this image' -a image.jpg
        llm 'Describe' -a https://static.simonwillison.net/static/2024/pelicans.jpg
        cat image | llm 'describe image' -a -
        # With an explicit mimetype:
        cat image | llm 'describe image' --at - image/jpeg

    The -x/--extract option returns just the content of the first ``` fenced code
    block, if one is present. If none are present it returns the full response.

    \b
        llm 'JavaScript function for reversing a string' -x
    """
    if log and no_log:
        raise click.ClickException("--log and --no-log are mutually exclusive")

    log_path = pathlib.Path(database) if database else logs_db_path()
    (log_path.parent).mkdir(parents=True, exist_ok=True)
    db = sqlite_utils.Database(log_path)
    migrate(db)

    if queries and not model_id:
        # Use -q options to find model with shortest model_id
        matches = []
        for model_with_aliases in get_models_with_aliases():
            if all(model_with_aliases.matches(q) for q in queries):
                matches.append(model_with_aliases.model.model_id)
        if not matches:
            raise click.ClickException(
                "No model found matching queries {}".format(", ".join(queries))
            )
        model_id = min(matches, key=len)

    if schema_multi:
        schema_input = schema_multi

    schema = resolve_schema_input(db, schema_input, load_template)

    if schema_multi:
        # Convert that schema into multiple "items" of the same schema
        schema = multi_schema(schema)

    def read_prompt():
        nonlocal prompt, schema

        # Is there extra prompt available on stdin?
        stdin_prompt = None
        if not sys.stdin.isatty():
            stdin_prompt = sys.stdin.read()

        if stdin_prompt:
            bits = [stdin_prompt]
            if prompt:
                bits.append(prompt)
            prompt = " ".join(bits)

        if (
            prompt is None
            and not save
            and sys.stdin.isatty()
            and not attachments
            and not attachment_types
            and not schema
            and not fragments
        ):
            # Hang waiting for input to stdin (unless --save)
            prompt = sys.stdin.read()
        return prompt

    if save:
        # We are saving their prompt/system/etc to a new template
        # Fields to save: prompt, system, model - and more in the future
        disallowed_options = []
        for option, var in (
            ("--template", template),
            ("--continue", _continue),
            ("--cid", conversation_id),
        ):
            if var:
                disallowed_options.append(option)
        if disallowed_options:
            raise click.ClickException(
                "--save cannot be used with {}".format(", ".join(disallowed_options))
            )
        path = template_dir() / f"{save}.yaml"
        to_save = {}
        if model_id:
            model_aliases = get_model_aliases()
            try:
                to_save["model"] = model_aliases[model_id].model_id
            except KeyError:
                raise click.ClickException("'{}' is not a known model".format(model_id))
        prompt = read_prompt()
        if prompt:
            to_save["prompt"] = prompt
        if system:
            to_save["system"] = system
        if param:
            to_save["defaults"] = dict(param)
        if extract:
            to_save["extract"] = True
        if extract_last:
            to_save["extract_last"] = True
        if schema:
            to_save["schema_object"] = schema
        if fragments:
            to_save["fragments"] = list(fragments)
        if system_fragments:
            to_save["system_fragments"] = list(system_fragments)
        if python_tools:
            to_save["functions"] = "\n\n".join(python_tools)
        if tools:
            to_save["tools"] = list(tools)
        if attachments:
            # Only works for attachments with a path or url
            to_save["attachments"] = [
                (a.path or a.url) for a in attachments if (a.path or a.url)
            ]
        if attachment_types:
            to_save["attachment_types"] = [
                {"type": a.type, "value": a.path or a.url}
                for a in attachment_types
                if (a.path or a.url)
            ]
        if options:
            # Need to validate and convert their types first
            model = get_model(model_id or get_default_model())
            try:
                options_model = model.Options(**dict(options))
                # Use model_dump(mode="json") so Enums become their .value strings
                to_save["options"] = {
                    k: v
                    for k, v in options_model.model_dump(mode="json").items()
                    if v is not None
                }
            except pydantic.ValidationError as ex:
                raise click.ClickException(render_errors(ex.errors()))
        path.write_text(
            yaml.safe_dump(
                to_save,
                indent=4,
                default_flow_style=False,
                sort_keys=False,
            ),
            "utf-8",
        )
        return

    if template:
        params = dict(param)
        # Cannot be used with system
        try:
            template_obj = load_template(template)
        except LoadTemplateError as ex:
            raise click.ClickException(str(ex))
        if not (extract or extract_last):
            extract = template_obj.extract
            extract_last = template_obj.extract_last
        # Combine with template fragments/system_fragments
        if template_obj.fragments:
            fragments = [*template_obj.fragments, *fragments]
        if template_obj.system_fragments:
            system_fragments = [*template_obj.system_fragments, *system_fragments]
        if template_obj.schema_object:
            schema = template_obj.schema_object
        if template_obj.tools:
            tools = [*template_obj.tools, *tools]
        if template_obj.functions and template_obj._functions_is_trusted:
            python_tools = [template_obj.functions, *python_tools]
        input_ = ""
        if template_obj.options:
            # Make options mutable (they start as a tuple)
            options = list(options)
            # Load any options, provided they were not set using -o already
            specified_options = dict(options)
            for option_name, option_value in template_obj.options.items():
                if option_name not in specified_options:
                    options.append((option_name, option_value))
        if "input" in template_obj.vars():
            input_ = read_prompt()
        try:
            template_prompt, template_system = template_obj.evaluate(input_, params)
            if template_prompt:
                # Combine with user prompt
                if prompt and "input" not in template_obj.vars():
                    prompt = template_prompt + "\n" + prompt
                else:
                    prompt = template_prompt
            if template_system and not system:
                system = template_system
        except Template.MissingVariables as ex:
            raise click.ClickException(str(ex))
        if model_id is None and template_obj.model:
            model_id = template_obj.model
        # Merge in any attachments
        if template_obj.attachments:
            attachments = [
                resolve_attachment(a) for a in template_obj.attachments
            ] + list(attachments)
        if template_obj.attachment_types:
            attachment_types = [
                resolve_attachment_with_type(at.value, at.type)
                for at in template_obj.attachment_types
            ] + list(attachment_types)
    if extract or extract_last:
        no_stream = True

    conversation = None
    if conversation_id or _continue:
        # Load the conversation - loads most recent if no ID provided
        try:
            conversation = load_conversation(
                conversation_id, async_=async_, database=database
            )
        except UnknownModelError as ex:
            raise click.ClickException(str(ex))

    if conversation_tools := _get_conversation_tools(conversation, tools):
        tools = conversation_tools

    # Figure out which model we are using
    if model_id is None:
        if conversation:
            model_id = conversation.model.model_id
        else:
            model_id = get_default_model()

    # Now resolve the model
    try:
        if async_:
            model = get_async_model(model_id)
        else:
            model = get_model(model_id)
    except UnknownModelError as ex:
        raise click.ClickException(ex)

    if conversation is None and (tools or python_tools):
        conversation = model.conversation()

    if conversation:
        # To ensure it can see the key
        conversation.model = model

    # Validate options
    validated_options = {}
    if options:
        # Validate with pydantic
        try:
            validated_options = dict(
                (key, value)
                for key, value in model.Options(**dict(options))
                if value is not None
            )
        except pydantic.ValidationError as ex:
            raise click.ClickException(render_errors(ex.errors()))

    # Add on any default model options
    default_options = get_model_options(model.model_id)
    for key_, value in default_options.items():
        if key_ not in validated_options:
            validated_options[key_] = value

    kwargs = {}

    resolved_attachments = [*attachments, *attachment_types]

    should_stream = model.can_stream and not no_stream
    if not should_stream:
        kwargs["stream"] = False

    if isinstance(model, (KeyModel, AsyncKeyModel)):
        kwargs["key"] = key

    prompt = read_prompt()
    response = None

    try:
        fragments_and_attachments = resolve_fragments(
            db, fragments, allow_attachments=True
        )
        resolved_fragments = [
            fragment
            for fragment in fragments_and_attachments
            if isinstance(fragment, Fragment)
        ]
        resolved_attachments.extend(
            attachment
            for attachment in fragments_and_attachments
            if isinstance(attachment, Attachment)
        )
        resolved_system_fragments = resolve_fragments(db, system_fragments)
    except FragmentNotFound as ex:
        raise click.ClickException(str(ex))

    prompt_method = model.prompt
    if conversation:
        prompt_method = conversation.prompt

    tool_implementations = _gather_tools(tools, python_tools)

    if tool_implementations:
        prompt_method = conversation.chain
        kwargs["options"] = validated_options
        kwargs["chain_limit"] = chain_limit
        if tools_debug:
            kwargs["after_call"] = _debug_tool_call
        if tools_approve:
            kwargs["before_call"] = _approve_tool_call
        kwargs["tools"] = tool_implementations
    else:
        # Merge in options for the .prompt() methods
        kwargs.update(validated_options)

    try:
        if async_:

            async def inner():
                if should_stream:
                    response = prompt_method(
                        prompt,
                        attachments=resolved_attachments,
                        system=system,
                        schema=schema,
                        fragments=resolved_fragments,
                        system_fragments=resolved_system_fragments,
                        **kwargs,
                    )
                    async for chunk in response:
                        print(chunk, end="")
                        sys.stdout.flush()
                    print("")
                else:
                    response = prompt_method(
                        prompt,
                        fragments=resolved_fragments,
                        attachments=resolved_attachments,
                        schema=schema,
                        system=system,
                        system_fragments=resolved_system_fragments,
                        **kwargs,
                    )
                    text = await response.text()
                    if extract or extract_last:
                        text = (
                            extract_fenced_code_block(text, last=extract_last) or text
                        )
                    print(text)
                return response

            response = asyncio.run(inner())
        else:
            response = prompt_method(
                prompt,
                fragments=resolved_fragments,
                attachments=resolved_attachments,
                system=system,
                schema=schema,
                system_fragments=resolved_system_fragments,
                **kwargs,
            )
            if should_stream:
                for chunk in response:
                    print(chunk, end="")
                    sys.stdout.flush()
                print("")
            else:
                text = response.text()
                if extract or extract_last:
                    text = extract_fenced_code_block(text, last=extract_last) or text
                print(text)
    # List of exceptions that should never be raised in pytest:
    except (ValueError, NotImplementedError) as ex:
        raise click.ClickException(str(ex))
    except Exception as ex:
        # All other exceptions should raise in pytest, show to user otherwise
        if getattr(sys, "_called_from_test", False) or os.environ.get(
            "LLM_RAISE_ERRORS", None
        ):
            raise
        raise click.ClickException(str(ex))

    if usage:
        if isinstance(response, ChainResponse):
            responses = response._responses
        else:
            responses = [response]
        for response_object in responses:
            # Show token usage to stderr in yellow
            click.echo(
                click.style(
                    "Token usage: {}".format(response_object.token_usage()),
                    fg="yellow",
                    bold=True,
                ),
                err=True,
            )

    # Log responses to the database
    if (logs_on() or log) and not no_log:
        # Could be Response, AsyncResponse, ChainResponse, AsyncChainResponse
        if isinstance(response, AsyncResponse):
            response = asyncio.run(response.to_sync_response())
        # At this point ALL forms should have a log_to_db() method that works:
        response.log_to_db(db)


@cli.command()
@click.option("-s", "--system", help="System prompt to use")
@click.option("model_id", "-m", "--model", help="Model to use", envvar="LLM_MODEL")
@click.option(
    "_continue",
    "-c",
    "--continue",
    is_flag=True,
    flag_value=-1,
    help="Continue the most recent conversation.",
)
@click.option(
    "conversation_id",
    "--cid",
    "--conversation",
    help="Continue the conversation with the given ID.",
)
@click.option(
    "fragments",
    "-f",
    "--fragment",
    multiple=True,
    help="Fragment (alias, URL, hash or file path) to add to the prompt",
)
@click.option(
    "system_fragments",
    "--sf",
    "--system-fragment",
    multiple=True,
    help="Fragment to add to system prompt",
)
@click.option("-t", "--template", help="Template to use")
@click.option(
    "-p",
    "--param",
    multiple=True,
    type=(str, str),
    help="Parameters for template",
)
@click.option(
    "options",
    "-o",
    "--option",
    type=(str, str),
    multiple=True,
    help="key/value options for the model",
)
@click.option(
    "-d",
    "--database",
    type=click.Path(readable=True, dir_okay=False),
    help="Path to log database",
)
@click.option("--no-stream", is_flag=True, help="Do not stream output")
@click.option("--key", help="API key to use")
@click.option(
    "tools",
    "-T",
    "--tool",
    multiple=True,
    help="Name of a tool to make available to the model",
)
@click.option(
    "python_tools",
    "--functions",
    help="Python code block or file path defining functions to register as tools",
    multiple=True,
)
@click.option(
    "tools_debug",
    "--td",
    "--tools-debug",
    is_flag=True,
    help="Show full details of tool executions",
    envvar="LLM_TOOLS_DEBUG",
)
@click.option(
    "tools_approve",
    "--ta",
    "--tools-approve",
    is_flag=True,
    help="Manually approve every tool execution",
)
@click.option(
    "chain_limit",
    "--cl",
    "--chain-limit",
    type=int,
    default=5,
    help="How many chained tool responses to allow, default 5, set 0 for unlimited",
)
def chat(
    system,
    model_id,
    _continue,
    conversation_id,
    fragments,
    system_fragments,
    template,
    param,
    options,
    no_stream,
    key,
    database,
    tools,
    python_tools,
    tools_debug,
    tools_approve,
    chain_limit,
):
    """
    Hold an ongoing chat with a model.
    """
    # Left and right arrow keys to move cursor:
    if sys.platform != "win32":
        readline.parse_and_bind("\\e[D: backward-char")
        readline.parse_and_bind("\\e[C: forward-char")
    else:
        readline.parse_and_bind("bind -x '\\e[D: backward-char'")
        readline.parse_and_bind("bind -x '\\e[C: forward-char'")
    log_path = pathlib.Path(database) if database else logs_db_path()
    (log_path.parent).mkdir(parents=True, exist_ok=True)
    db = sqlite_utils.Database(log_path)
    migrate(db)

    conversation = None
    if conversation_id or _continue:
        # Load the conversation - loads most recent if no ID provided
        try:
            conversation = load_conversation(conversation_id, database=database)
        except UnknownModelError as ex:
            raise click.ClickException(str(ex))

    if conversation_tools := _get_conversation_tools(conversation, tools):
        tools = conversation_tools

    template_obj = None
    if template:
        params = dict(param)
        try:
            template_obj = load_template(template)
        except LoadTemplateError as ex:
            raise click.ClickException(str(ex))
        if model_id is None and template_obj.model:
            model_id = template_obj.model
        if template_obj.tools:
            tools = [*template_obj.tools, *tools]
        if template_obj.functions and template_obj._functions_is_trusted:
            python_tools = [template_obj.functions, *python_tools]

    # Figure out which model we are using
    if model_id is None:
        if conversation:
            model_id = conversation.model.model_id
        else:
            model_id = get_default_model()

    # Now resolve the model
    try:
        model = get_model(model_id)
    except KeyError:
        raise click.ClickException("'{}' is not a known model".format(model_id))

    if conversation is None:
        # Start a fresh conversation for this chat
        conversation = Conversation(model=model)
    else:
        # Ensure it can see the API key
        conversation.model = model

    if tools_debug:
        conversation.after_call = _debug_tool_call
    if tools_approve:
        conversation.before_call = _approve_tool_call

    # Validate options
    validated_options = get_model_options(model.model_id)
    if options:
        try:
            validated_options = dict(
                (key, value)
                for key, value in model.Options(**dict(options))
                if value is not None
            )
        except pydantic.ValidationError as ex:
            raise click.ClickException(render_errors(ex.errors()))

    kwargs = {}
    if validated_options:
        kwargs["options"] = validated_options

    tool_functions = _gather_tools(tools, python_tools)

    if tool_functions:
        kwargs["chain_limit"] = chain_limit
        kwargs["tools"] = tool_functions

    should_stream = model.can_stream and not no_stream
    if not should_stream:
        kwargs["stream"] = False

    if key and isinstance(model, KeyModel):
        kwargs["key"] = key

    try:
        fragments_and_attachments = resolve_fragments(
            db, fragments, allow_attachments=True
        )
        argument_fragments = [
            fragment
            for fragment in fragments_and_attachments
            if isinstance(fragment, Fragment)
        ]
        argument_attachments = [
            attachment
            for attachment in fragments_and_attachments
            if isinstance(attachment, Attachment)
        ]
        argument_system_fragments = resolve_fragments(db, system_fragments)
    except FragmentNotFound as ex:
        raise click.ClickException(str(ex))

    click.echo("Chatting with {}".format(model.model_id))
    click.echo("Type 'exit' or 'quit' to exit")
    click.echo("Type '!multi' to enter multiple lines, then '!end' to finish")
    click.echo("Type '!edit' to open your default editor and modify the prompt")
    click.echo(
        "Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
    )
    in_multi = False

    accumulated = []
    accumulated_fragments = []
    accumulated_attachments = []
    end_token = "!end"
    while True:
        prompt = click.prompt("", prompt_suffix="> " if not in_multi else "")
        fragments = []
        attachments = []
        if argument_fragments:
            fragments += argument_fragments
            # fragments from --fragments will get added to the first message only
            argument_fragments = []
        if argument_attachments:
            attachments = argument_attachments
            argument_attachments = []
        if prompt.strip().startswith("!multi"):
            in_multi = True
            bits = prompt.strip().split()
            if len(bits) > 1:
                end_token = "!end {}".format(" ".join(bits[1:]))
            continue
        if prompt.strip() == "!edit":
            edited_prompt = click.edit()
            if edited_prompt is None:
                click.echo("Editor closed without saving.", err=True)
                continue
            prompt = edited_prompt.strip()
        if prompt.strip().startswith("!fragment "):
            prompt, fragments, attachments = process_fragments_in_chat(db, prompt)

        if in_multi:
            if prompt.strip() == end_token:
                prompt = "\n".join(accumulated)
                fragments = accumulated_fragments
                attachments = accumulated_attachments
                in_multi = False
                accumulated = []
                accumulated_fragments = []
                accumulated_attachments = []
            else:
                if prompt:
                    accumulated.append(prompt)
                accumulated_fragments += fragments
                accumulated_attachments += attachments
                continue
        if template_obj:
            try:
                # Mirror prompt() logic: only pass input if template uses it
                uses_input = "input" in template_obj.vars()
                input_ = prompt if uses_input else ""
                template_prompt, template_system = template_obj.evaluate(input_, params)
            except Template.MissingVariables as ex:
                raise click.ClickException(str(ex))
            if template_system and not system:
                system = template_system
            if template_prompt:
                if prompt and not uses_input:
                    prompt = f"{template_prompt}\n{prompt}"
                else:
                    prompt = template_prompt
        if prompt.strip() in ("exit", "quit"):
            break

        response = conversation.chain(
            prompt,
            fragments=fragments,
            system_fragments=argument_system_fragments,
            attachments=attachments,
            system=system,
            **kwargs,
        )

        # System prompt and system fragments only sent for the first message
        system = None
        argument_system_fragments = []
        for chunk in response:
            print(chunk, end="")
            sys.stdout.flush()
        response.log_to_db(db)
        print("")


def load_conversation(
    conversation_id: Optional[str],
    async_=False,
    database=None,
) -> Optional[_BaseConversation]:
    log_path = pathlib.Path(database) if database else logs_db_path()
    db = sqlite_utils.Database(log_path)
    migrate(db)
    if conversation_id is None:
        # Return the most recent conversation, or None if there are none
        matches = list(db["conversations"].rows_where(order_by="id desc", limit=1))
        if matches:
            conversation_id = matches[0]["id"]
        else:
            return None
    try:
        row = cast(sqlite_utils.db.Table, db["conversations"]).get(conversation_id)
    except sqlite_utils.db.NotFoundError:
        raise click.ClickException(
            "No conversation found with id={}".format(conversation_id)
        )
    # Inflate that conversation
    conversation_class = AsyncConversation if async_ else Conversation
    response_class = AsyncResponse if async_ else Response
    conversation = conversation_class.from_row(row)
    for response in db["responses"].rows_where(
        "conversation_id = ?", [conversation_id]
    ):
        conversation.responses.append(response_class.from_row(db, response))
    return conversation


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def keys():
    "Manage stored API keys for different models"


@keys.command(name="list")
def keys_list():
    "List names of all stored keys"
    path = user_dir() / "keys.json"
    if not path.exists():
        click.echo("No keys found")
        return
    keys = json.loads(path.read_text())
    for key in sorted(keys.keys()):
        if key != "// Note":
            click.echo(key)


@keys.command(name="path")
def keys_path_command():
    "Output the path to the keys.json file"
    click.echo(user_dir() / "keys.json")


@keys.command(name="get")
@click.argument("name")
def keys_get(name):
    """
    Return the value of a stored key

    Example usage:

    \b
        export OPENAI_API_KEY=$(llm keys get openai)
    """
    path = user_dir() / "keys.json"
    if not path.exists():
        raise click.ClickException("No keys found")
    keys = json.loads(path.read_text())
    try:
        click.echo(keys[name])
    except KeyError:
        raise click.ClickException("No key found with name '{}'".format(name))


@keys.command(name="set")
@click.argument("name")
@click.option("--value", prompt="Enter key", hide_input=True, help="Value to set")
def keys_set(name, value):
    """
    Save a key in the keys.json file

    Example usage:

    \b
        $ llm keys set openai
        Enter key: ...
    """
    default = {"// Note": "This file stores secret API credentials. Do not share!"}
    path = user_dir() / "keys.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    if not path.exists():
        path.write_text(json.dumps(default))
        path.chmod(0o600)
    try:
        current = json.loads(path.read_text())
    except json.decoder.JSONDecodeError:
        current = default
    current[name] = value
    path.write_text(json.dumps(current, indent=2) + "\n")


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def logs():
    "Tools for exploring logged prompts and responses"


@logs.command(name="path")
def logs_path():
    "Output the path to the logs.db file"
    click.echo(logs_db_path())


@logs.command(name="status")
def logs_status():
    "Show current status of database logging"
    path = logs_db_path()
    if not path.exists():
        click.echo("No log database found at {}".format(path))
        return
    if logs_on():
        click.echo("Logging is ON for all prompts".format())
    else:
        click.echo("Logging is OFF".format())
    db = sqlite_utils.Database(path)
    migrate(db)
    click.echo("Found log database at {}".format(path))
    click.echo("Number of conversations logged:\t{}".format(db["conversations"].count))
    click.echo("Number of responses logged:\t{}".format(db["responses"].count))
    click.echo(
        "Database file size: \t\t{}".format(_human_readable_size(path.stat().st_size))
    )


@logs.command(name="backup")
@click.argument("path", type=click.Path(dir_okay=True, writable=True))
def backup(path):
    "Backup your logs database to this file"
    logs_path = logs_db_path()
    path = pathlib.Path(path)
    db = sqlite_utils.Database(logs_path)
    try:
        db.execute("vacuum into ?", [str(path)])
    except Exception as ex:
        raise click.ClickException(str(ex))
    click.echo(
        "Backed up {} to {}".format(_human_readable_size(path.stat().st_size), path)
    )


@logs.command(name="on")
def logs_turn_on():
    "Turn on logging for all prompts"
    path = user_dir() / "logs-off"
    if path.exists():
        path.unlink()


@logs.command(name="off")
def logs_turn_off():
    "Turn off logging for all prompts"
    path = user_dir() / "logs-off"
    path.touch()


LOGS_COLUMNS = """    responses.id,
    responses.model,
    responses.resolved_model,
    responses.prompt,
    responses.system,
    responses.prompt_json,
    responses.options_json,
    responses.response,
    responses.response_json,
    responses.conversation_id,
    responses.duration_ms,
    responses.datetime_utc,
    responses.input_tokens,
    responses.output_tokens,
    responses.token_details,
    conversations.name as conversation_name,
    conversations.model as conversation_model,
    schemas.content as schema_json"""

LOGS_SQL = """
select
{columns}
from
    responses
left join schemas on responses.schema_id = schemas.id
left join conversations on responses.conversation_id = conversations.id{extra_where}
order by {order_by}{limit}
"""
LOGS_SQL_SEARCH = """
select
{columns}
from
    responses
left join schemas on responses.schema_id = schemas.id
left join conversations on responses.conversation_id = conversations.id
join responses_fts on responses_fts.rowid = responses.rowid
where responses_fts match :query{extra_where}
order by {order_by}{limit}
"""

ATTACHMENTS_SQL = """
select
    response_id,
    attachments.id,
    attachments.type,
    attachments.path,
    attachments.url,
    length(attachments.content) as content_length
from attachments
join prompt_attachments
    on attachments.id = prompt_attachments.attachment_id
where prompt_attachments.response_id in ({})
order by prompt_attachments."order"
"""


@logs.command(name="list")
@click.option(
    "-n",
    "--count",
    type=int,
    default=None,
    help="Number of entries to show - defaults to 3, use 0 for all",
)
@click.option(
    "-p",
    "--path",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
    hidden=True,
)
@click.option(
    "-d",
    "--database",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
)
@click.option("-m", "--model", help="Filter by model or model alias")
@click.option("-q", "--query", help="Search for logs matching this string")
@click.option(
    "fragments",
    "--fragment",
    "-f",
    help="Filter for prompts using these fragments",
    multiple=True,
)
@click.option(
    "tools",
    "-T",
    "--tool",
    multiple=True,
    help="Filter for prompts with results from these tools",
)
@click.option(
    "any_tools",
    "--tools",
    is_flag=True,
    help="Filter for prompts with results from any tools",
)
@schema_option
@click.option(
    "--schema-multi",
    help="JSON schema used for multiple results",
)
@click.option(
    "-l", "--latest", is_flag=True, help="Return latest results matching search query"
)
@click.option(
    "--data", is_flag=True, help="Output newline-delimited JSON data for schema"
)
@click.option("--data-array", is_flag=True, help="Output JSON array of data for schema")
@click.option("--data-key", help="Return JSON objects from array in this key")
@click.option(
    "--data-ids", is_flag=True, help="Attach corresponding IDs to JSON objects"
)
@click.option("-t", "--truncate", is_flag=True, help="Truncate long strings in output")
@click.option(
    "-s", "--short", is_flag=True, help="Shorter YAML output with truncated prompts"
)
@click.option("-u", "--usage", is_flag=True, help="Include token usage")
@click.option("-r", "--response", is_flag=True, help="Just output the last response")
@click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block")
@click.option(
    "extract_last",
    "--xl",
    "--extract-last",
    is_flag=True,
    help="Extract last fenced code block",
)
@click.option(
    "current_conversation",
    "-c",
    "--current",
    is_flag=True,
    flag_value=-1,
    help="Show logs from the current conversation",
)
@click.option(
    "conversation_id",
    "--cid",
    "--conversation",
    help="Show logs for this conversation ID",
)
@click.option("--id-gt", help="Return responses with ID > this")
@click.option("--id-gte", help="Return responses with ID >= this")
@click.option(
    "json_output",
    "--json",
    is_flag=True,
    help="Output logs as JSON",
)
@click.option(
    "--expand",
    "-e",
    is_flag=True,
    help="Expand fragments to show their content",
)
def logs_list(
    count,
    path,
    database,
    model,
    query,
    fragments,
    tools,
    any_tools,
    schema_input,
    schema_multi,
    latest,
    data,
    data_array,
    data_key,
    data_ids,
    truncate,
    short,
    usage,
    response,
    extract,
    extract_last,
    current_conversation,
    conversation_id,
    id_gt,
    id_gte,
    json_output,
    expand,
):
    "Show logged prompts and their responses"
    if database and not path:
        path = database
    path = pathlib.Path(path or logs_db_path())
    if not path.exists():
        raise click.ClickException("No log database found at {}".format(path))
    db = sqlite_utils.Database(path)
    migrate(db)

    if schema_multi:
        schema_input = schema_multi
    schema = resolve_schema_input(db, schema_input, load_template)
    if schema_multi:
        schema = multi_schema(schema)

    if short and (json_output or response):
        invalid = " or ".join(
            [
                flag[0]
                for flag in (("--json", json_output), ("--response", response))
                if flag[1]
            ]
        )
        raise click.ClickException("Cannot use --short and {} together".format(invalid))

    if response and not current_conversation and not conversation_id:
        current_conversation = True

    if current_conversation:
        try:
            conversation_id = next(
                db.query(
                    "select conversation_id from responses order by id desc limit 1"
                )
            )["conversation_id"]
        except StopIteration:
            # No conversations yet
            raise click.ClickException("No conversations found")

    # For --conversation set limit 0, if not explicitly set
    if count is None:
        if conversation_id:
            count = 0
        else:
            count = 3

    model_id = None
    if model:
        # Resolve alias, if any
        try:
            model_id = get_model(model).model_id
        except UnknownModelError:
            # Maybe they uninstalled a model, use the -m option as-is
            model_id = model

    sql = LOGS_SQL
    order_by = "responses.id desc"
    if query:
        sql = LOGS_SQL_SEARCH
        if not latest:
            order_by = "responses_fts.rank desc"

    limit = ""
    if count is not None and count > 0:
        limit = " limit {}".format(count)

    sql_format = {
        "limit": limit,
        "columns": LOGS_COLUMNS,
        "extra_where": "",
        "order_by": order_by,
    }
    where_bits = []
    sql_params = {
        "model": model_id,
        "query": query,
        "conversation_id": conversation_id,
        "id_gt": id_gt,
        "id_gte": id_gte,
    }
    if model_id:
        where_bits.append("responses.model = :model")
    if conversation_id:
        where_bits.append("responses.conversation_id = :conversation_id")
    if id_gt:
        where_bits.append("responses.id > :id_gt")
    if id_gte:
        where_bits.append("responses.id >= :id_gte")
    if fragments:
        # Resolve the fragments to their hashes
        fragment_hashes = [
            fragment.id() for fragment in resolve_fragments(db, fragments)
        ]
        exists_clauses = []

        for i, fragment_hash in enumerate(fragment_hashes):
            exists_clause = f"""
            exists (
                select 1 from prompt_fragments
                where prompt_fragments.response_id = responses.id
                and prompt_fragments.fragment_id in (
                    select fragments.id from fragments
                    where hash = :f{i}
                )
                union
                select 1 from system_fragments
                where system_fragments.response_id = responses.id
                and system_fragments.fragment_id in (
                    select fragments.id from fragments
                    where hash = :f{i}
                )
            )
            """
            exists_clauses.append(exists_clause)
            sql_params["f{}".format(i)] = fragment_hash

        where_bits.append(" and ".join(exists_clauses))

    if any_tools:
        # Any response that involved at least one tool result
        where_bits.append("""
            exists (
              select 1
                from tool_results
              where
                tool_results.response_id = responses.id
            )
        """)
    if tools:
        tools_by_name = get_tools()
        # Filter responses by tools (must have ALL of the named tools, including plugin)
        tool_clauses = []
        for i, tool_name in enumerate(tools):
            try:
                plugin_name = tools_by_name[tool_name].plugin
            except KeyError:
                raise click.ClickException(f"Unknown tool: {tool_name}")

            tool_clauses.append(f"""
            exists (
              select 1
                from tool_results
                join tools on tools.id = tool_results.tool_id
               where tool_results.response_id = responses.id
                 and tools.name = :tool{i}
                 and tools.plugin = :plugin{i}
            )
            """)
            sql_params[f"tool{i}"] = tool_name
            sql_params[f"plugin{i}"] = plugin_name

        # AND means “must have all” — use OR instead if you want “any of”
        where_bits.append(" and ".join(tool_clauses))

    schema_id = None
    if schema:
        schema_id = make_schema_id(schema)[0]
        where_bits.append("responses.schema_id = :schema_id")
        sql_params["schema_id"] = schema_id

    if where_bits:
        where_ = " and " if query else " where "
        sql_format["extra_where"] = where_ + " and ".join(where_bits)

    final_sql = sql.format(**sql_format)
    rows = list(db.query(final_sql, sql_params))

    # Reverse the order - we do this because we 'order by id desc limit 3' to get the
    # 3 most recent results, but we still want to display them in chronological order
    # ... except for searches where we don't do this
    if not query and not data:
        rows.reverse()

    # Fetch any attachments
    ids = [row["id"] for row in rows]
    attachments = list(db.query(ATTACHMENTS_SQL.format(",".join("?" * len(ids))), ids))
    attachments_by_id = {}
    for attachment in attachments:
        attachments_by_id.setdefault(attachment["response_id"], []).append(attachment)

    FRAGMENTS_SQL = """
    select
        {table}.response_id,
        fragments.hash,
        fragments.id as fragment_id,
        fragments.content,
        (
            select json_group_array(fragment_aliases.alias)
            from fragment_aliases
            where fragment_aliases.fragment_id = fragments.id
        ) as aliases
    from {table}
    join fragments on {table}.fragment_id = fragments.id
    where {table}.response_id in ({placeholders})
    order by {table}."order"
    """

    # Fetch any prompt or system prompt fragments
    prompt_fragments_by_id = {}
    system_fragments_by_id = {}
    for table, dictionary in (
        ("prompt_fragments", prompt_fragments_by_id),
        ("system_fragments", system_fragments_by_id),
    ):
        for fragment in db.query(
            FRAGMENTS_SQL.format(placeholders=",".join("?" * len(ids)), table=table),
            ids,
        ):
            dictionary.setdefault(fragment["response_id"], []).append(fragment)

    if data or data_array or data_key or data_ids:
        # Special case for --data to output valid JSON
        to_output = []
        for row in rows:
            response = row["response"] or ""
            try:
                decoded = json.loads(response)
                new_items = []
                if (
                    isinstance(decoded, dict)
                    and (data_key in decoded)
                    and all(isinstance(item, dict) for item in decoded[data_key])
                ):
                    for item in decoded[data_key]:
                        new_items.append(item)
                else:
                    new_items.append(decoded)
                if data_ids:
                    for item in new_items:
                        item[find_unused_key(item, "response_id")] = row["id"]
                        item[find_unused_key(item, "conversation_id")] = row["id"]
                to_output.extend(new_items)
            except ValueError:
                pass
        for line in output_rows_as_json(to_output, nl=not data_array, compact=True):
            click.echo(line)
        return

    # Tool usage information
    TOOLS_SQL = """
    SELECT responses.id,
    -- Tools related to this response
    COALESCE(
        (SELECT json_group_array(json_object(
            'id', t.id,
            'hash', t.hash,
            'name', t.name,
            'description', t.description,
            'input_schema', json(t.input_schema)
        ))
        FROM tools t
        JOIN tool_responses tr ON t.id = tr.tool_id
        WHERE tr.response_id = responses.id
        ),
        '[]'
    ) AS tools,
    -- Tool calls for this response
    COALESCE(
        (SELECT json_group_array(json_object(
            'id', tc.id,
            'tool_id', tc.tool_id,
            'name', tc.name,
            'arguments', json(tc.arguments),
            'tool_call_id', tc.tool_call_id
        ))
        FROM tool_calls tc
        WHERE tc.response_id = responses.id
        ),
        '[]'
    ) AS tool_calls,
    -- Tool results for this response
    COALESCE(
        (SELECT json_group_array(json_object(
            'id', tr.id,
            'tool_id', tr.tool_id,
            'name', tr.name,
            'output', tr.output,
            'tool_call_id', tr.tool_call_id,
            'exception', tr.exception,
            'attachments', COALESCE(
                (SELECT json_group_array(json_object(
                    'id', a.id,
                    'type', a.type,
                    'path', a.path,
                    'url', a.url,
                    'content', a.content
                ))
                FROM tool_results_attachments tra
                JOIN attachments a ON tra.attachment_id = a.id
                WHERE tra.tool_result_id = tr.id
                ),
                '[]'
            )
        ))
        FROM tool_results tr
        WHERE tr.response_id = responses.id
        ),
        '[]'
    ) AS tool_results
    FROM responses
    where id in ({placeholders})
    """
    tool_info_by_id = {
        row["id"]: {
            "tools": json.loads(row["tools"]),
            "tool_calls": json.loads(row["tool_calls"]),
            "tool_results": json.loads(row["tool_results"]),
        }
        for row in db.query(
            TOOLS_SQL.format(placeholders=",".join("?" * len(ids))), ids
        )
    }

    for row in rows:
        if truncate:
            row["prompt"] = truncate_string(row["prompt"] or "")
            row["response"] = truncate_string(row["response"] or "")
        # Add prompt and system fragments
        for key in ("prompt_fragments", "system_fragments"):
            row[key] = [
                {
                    "hash": fragment["hash"],
                    "content": (
                        fragment["content"]
                        if expand
                        else truncate_string(fragment["content"])
                    ),
                    "aliases": json.loads(fragment["aliases"]),
                }
                for fragment in (
                    prompt_fragments_by_id.get(row["id"], [])
                    if key == "prompt_fragments"
                    else system_fragments_by_id.get(row["id"], [])
                )
            ]
        # Either decode or remove all JSON keys
        keys = list(row.keys())
        for key in keys:
            if key.endswith("_json") and row[key] is not None:
                if truncate:
                    del row[key]
                else:
                    row[key] = json.loads(row[key])
        row.update(tool_info_by_id[row["id"]])

    output = None
    if json_output:
        # Output as JSON if requested
        for row in rows:
            row["attachments"] = [
                {k: v for k, v in attachment.items() if k != "response_id"}
                for attachment in attachments_by_id.get(row["id"], [])
            ]
        output = json.dumps(list(rows), indent=2)
    elif extract or extract_last:
        # Extract and return first code block
        for row in rows:
            output = extract_fenced_code_block(row["response"], last=extract_last)
            if output is not None:
                break
    elif response:
        # Just output the last response
        if rows:
            output = rows[-1]["response"]

    if output is not None:
        click.echo(output)
    else:
        # Output neatly formatted human-readable logs
        def _display_fragments(fragments, title):
            if not fragments:
                return
            if not expand:
                content = "\n".join(
                    ["- {}".format(fragment["hash"]) for fragment in fragments]
                )
            else:
                # <details><summary> for each one
                bits = []
                for fragment in fragments:
                    bits.append(
                        "<details><summary>{}</summary>\n{}\n</details>".format(
                            fragment["hash"], maybe_fenced_code(fragment["content"])
                        )
                    )
                content = "\n".join(bits)
            click.echo(f"\n### {title}\n\n{content}")

        current_system = None
        should_show_conversation = True
        for row in rows:
            if short:
                system = truncate_string(
                    row["system"] or "", 120, normalize_whitespace=True
                )
                prompt = truncate_string(
                    row["prompt"] or "", 120, normalize_whitespace=True, keep_end=True
                )
                cid = row["conversation_id"]
                attachments = attachments_by_id.get(row["id"])
                obj = {
                    "model": row["model"],
                    "datetime": row["datetime_utc"].split(".")[0],
                    "conversation": cid,
                }
                if row["tool_calls"]:
                    obj["tool_calls"] = [
                        "{}({})".format(
                            tool_call["name"], json.dumps(tool_call["arguments"])
                        )
                        for tool_call in row["tool_calls"]
                    ]
                if row["tool_results"]:
                    obj["tool_results"] = [
                        "{}: {}".format(
                            tool_result["name"], truncate_string(tool_result["output"])
                        )
                        for tool_result in row["tool_results"]
                    ]
                if system:
                    obj["system"] = system
                if prompt:
                    obj["prompt"] = prompt
                if attachments:
                    items = []
                    for attachment in attachments:
                        details = {"type": attachment["type"]}
                        if attachment.get("path"):
                            details["path"] = attachment["path"]
                        if attachment.get("url"):
                            details["url"] = attachment["url"]
                        items.append(details)
                    obj["attachments"] = items
                for key in ("prompt_fragments", "system_fragments"):
                    obj[key] = [fragment["hash"] for fragment in row[key]]
                if usage and (row["input_tokens"] or row["output_tokens"]):
                    usage_details = {
                        "input": row["input_tokens"],
                        "output": row["output_tokens"],
                    }
                    if row["token_details"]:
                        usage_details["details"] = json.loads(row["token_details"])
                    obj["usage"] = usage_details
                click.echo(yaml.dump([obj], sort_keys=False).strip())
                continue
            # Not short, output Markdown
            click.echo(
                "# {}{}\n{}".format(
                    row["datetime_utc"].split(".")[0],
                    (
                        "    conversation: {} id: {}".format(
                            row["conversation_id"], row["id"]
                        )
                        if should_show_conversation
                        else ""
                    ),
                    (
                        (
                            "\nModel: **{}**{}\n".format(
                                row["model"],
                                (
                                    " (resolved: **{}**)".format(row["resolved_model"])
                                    if row["resolved_model"]
                                    else ""
                                ),
                            )
                        )
                        if should_show_conversation
                        else ""
                    ),
                )
            )
            # In conversation log mode only show it for the first one
            if conversation_id:
                should_show_conversation = False
            click.echo("## Prompt\n\n{}".format(row["prompt"] or "-- none --"))
            _display_fragments(row["prompt_fragments"], "Prompt fragments")
            if row["options_json"]:
                options = row["options_json"]
                if isinstance(options, str):
                    options = json.loads(options)
                if options:
                    options_text = "\n".join(
                        "- {}: {}".format(key, value) for key, value in options.items()
                    )
                    click.echo("\n## Options\n\n{}".format(options_text))
            if row["system"] != current_system:
                if row["system"] is not None:
                    click.echo("\n## System\n\n{}".format(row["system"]))
                current_system = row["system"]
            _display_fragments(row["system_fragments"], "System fragments")
            if row["schema_json"]:
                click.echo(
                    "\n## Schema\n\n```json\n{}\n```".format(
                        json.dumps(row["schema_json"], indent=2)
                    )
                )
            # Show tool calls and results
            if row["tools"]:
                click.echo("\n### Tools\n")
                for tool in row["tools"]:
                    click.echo(
                        "- **{}**: `{}`<br>\n    {}<br>\n    Arguments: {}".format(
                            tool["name"],
                            tool["hash"],
                            tool["description"],
                            json.dumps(tool["input_schema"]["properties"]),
                        )
                    )
            if row["tool_results"]:
                click.echo("\n### Tool results\n")
                for tool_result in row["tool_results"]:
                    attachments = ""
                    for attachment in tool_result["attachments"]:
                        desc = ""
                        if attachment.get("type"):
                            desc += attachment["type"] + ": "
                        if attachment.get("path"):
                            desc += attachment["path"]
                        elif attachment.get("url"):
                            desc += attachment["url"]
                        elif attachment.get("content"):
                            desc += f"<{attachment['content_length']:,} bytes>"
                        attachments += "\n    - {}".format(desc)
                    click.echo(
                        "- **{}**: `{}`<br>\n{}{}{}".format(
                            tool_result["name"],
                            tool_result["tool_call_id"],
                            textwrap.indent(tool_result["output"], "    "),
                            (
                                "<br>\n    **Error**: {}\n".format(
                                    tool_result["exception"]
                                )
                                if tool_result["exception"]
                                else ""
                            ),
                            attachments,
                        )
                    )
            attachments = attachments_by_id.get(row["id"])
            if attachments:
                click.echo("\n### Attachments\n")
                for i, attachment in enumerate(attachments, 1):
                    if attachment["path"]:
                        path = attachment["path"]
                        click.echo(
                            "{}. **{}**: `{}`".format(i, attachment["type"], path)
                        )
                    elif attachment["url"]:
                        click.echo(
                            "{}. **{}**: {}".format(
                                i, attachment["type"], attachment["url"]
                            )
                        )
                    elif attachment["content_length"]:
                        click.echo(
                            "{}. **{}**: `<{} bytes>`".format(
                                i,
                                attachment["type"],
                                f"{attachment['content_length']:,}",
                            )
                        )

            # If a schema was provided and the row is valid JSON, pretty print and syntax highlight it
            response = row["response"]
            if row["schema_json"]:
                try:
                    parsed = json.loads(response)
                    response = "```json\n{}\n```".format(json.dumps(parsed, indent=2))
                except ValueError:
                    pass
            click.echo("\n## Response\n")
            if row["tool_calls"]:
                click.echo("### Tool calls\n")
                for tool_call in row["tool_calls"]:
                    click.echo(
                        "- **{}**: `{}`<br>\n    Arguments: {}".format(
                            tool_call["name"],
                            tool_call["tool_call_id"],
                            json.dumps(tool_call["arguments"]),
                        )
                    )
                click.echo("")
            if response:
                click.echo("{}\n".format(response))
            if usage:
                token_usage = token_usage_string(
                    row["input_tokens"],
                    row["output_tokens"],
                    json.loads(row["token_details"]) if row["token_details"] else None,
                )
                if token_usage:
                    click.echo("## Token usage\n\n{}\n".format(token_usage))


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def models():
    "Manage available models"


_type_lookup = {
    "number": "float",
    "integer": "int",
    "string": "str",
    "object": "dict",
}


@models.command(name="list")
@click.option(
    "--options", is_flag=True, help="Show options for each model, if available"
)
@click.option("async_", "--async", is_flag=True, help="List async models")
@click.option("--schemas", is_flag=True, help="List models that support schemas")
@click.option("--tools", is_flag=True, help="List models that support tools")
@click.option(
    "-q",
    "--query",
    multiple=True,
    help="Search for models matching these strings",
)
@click.option("model_ids", "-m", "--model", help="Specific model IDs", multiple=True)
def models_list(options, async_, schemas, tools, query, model_ids):
    "List available models"
    models_that_have_shown_options = set()
    for model_with_aliases in get_models_with_aliases():
        if async_ and not model_with_aliases.async_model:
            continue
        if query:
            # Only show models where every provided query string matches
            if not all(model_with_aliases.matches(q) for q in query):
                continue
        if model_ids:
            ids_and_aliases = set(
                [model_with_aliases.model.model_id] + model_with_aliases.aliases
            )
            if not ids_and_aliases.intersection(model_ids):
                continue
        if schemas and not model_with_aliases.model.supports_schema:
            continue
        if tools and not model_with_aliases.model.supports_tools:
            continue
        extra_info = []
        if model_with_aliases.aliases:
            extra_info.append(
                "aliases: {}".format(", ".join(model_with_aliases.aliases))
            )
        model = (
            model_with_aliases.model if not async_ else model_with_aliases.async_model
        )
        output = str(model)
        if extra_info:
            output += " ({})".format(", ".join(extra_info))
        if options and model.Options.model_json_schema()["properties"]:
            output += "\n  Options:"
            for name, field in model.Options.model_json_schema()["properties"].items():
                any_of = field.get("anyOf")
                if any_of is None:
                    any_of = [{"type": field.get("type", "str")}]
                types = ", ".join(
                    [
                        _type_lookup.get(item.get("type"), item.get("type", "str"))
                        for item in any_of
                        if item.get("type") != "null"
                    ]
                )
                bits = ["\n    ", name, ": ", types]
                description = field.get("description", "")
                if description and (
                    model.__class__ not in models_that_have_shown_options
                ):
                    wrapped = textwrap.wrap(description, 70)
                    bits.append("\n      ")
                    bits.extend("\n      ".join(wrapped))
                output += "".join(bits)
            models_that_have_shown_options.add(model.__class__)
        if options and model.attachment_types:
            attachment_types = ", ".join(sorted(model.attachment_types))
            wrapper = textwrap.TextWrapper(
                width=min(max(shutil.get_terminal_size().columns, 30), 70),
                initial_indent="    ",
                subsequent_indent="    ",
            )
            output += "\n  Attachment types:\n{}".format(wrapper.fill(attachment_types))
        features = (
            []
            + (["streaming"] if model.can_stream else [])
            + (["schemas"] if model.supports_schema else [])
            + (["tools"] if model.supports_tools else [])
            + (["async"] if model_with_aliases.async_model else [])
        )
        if options and features:
            output += "\n  Features:\n{}".format(
                "\n".join("  - {}".format(feature) for feature in features)
            )
        if options and hasattr(model, "needs_key") and model.needs_key:
            output += "\n  Keys:"
            if hasattr(model, "needs_key") and model.needs_key:
                output += "\n    key: {}".format(model.needs_key)
            if hasattr(model, "key_env_var") and model.key_env_var:
                output += "\n    env_var: {}".format(model.key_env_var)
        click.echo(output)
    if not query and not options and not schemas and not model_ids:
        click.echo(f"Default: {get_default_model()}")


@models.command(name="default")
@click.argument("model", required=False)
def models_default(model):
    "Show or set the default model"
    if not model:
        click.echo(get_default_model())
        return
    # Validate it is a known model
    try:
        model = get_model(model)
        set_default_model(model.model_id)
    except KeyError:
        raise click.ClickException("Unknown model: {}".format(model))


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def templates():
    "Manage stored prompt templates"


@templates.command(name="list")
def templates_list():
    "List available prompt templates"
    path = template_dir()
    pairs = []
    for file in path.glob("*.yaml"):
        name = file.stem
        try:
            template = load_template(name)
        except LoadTemplateError:
            # Skip invalid templates
            continue
        text = []
        if template.system:
            text.append(f"system: {template.system}")
            if template.prompt:
                text.append(f" prompt: {template.prompt}")
        else:
            text = [template.prompt if template.prompt else ""]
        pairs.append((name, "".join(text).replace("\n", " ")))
    try:
        max_name_len = max(len(p[0]) for p in pairs)
    except ValueError:
        return
    else:
        fmt = "{name:<" + str(max_name_len) + "} : {prompt}"
        for name, prompt in sorted(pairs):
            text = fmt.format(name=name, prompt=prompt)
            click.echo(display_truncated(text))


@templates.command(name="show")
@click.argument("name")
def templates_show(name):
    "Show the specified prompt template"
    try:
        template = load_template(name)
    except LoadTemplateError:
        raise click.ClickException(f"Template '{name}' not found or invalid")
    click.echo(
        yaml.dump(
            dict((k, v) for k, v in template.model_dump().items() if v is not None),
            indent=4,
            default_flow_style=False,
        )
    )


@templates.command(name="edit")
@click.argument("name")
def templates_edit(name):
    "Edit the specified prompt template using the default $EDITOR"
    # First ensure it exists
    path = template_dir() / f"{name}.yaml"
    if not path.exists():
        path.write_text(DEFAULT_TEMPLATE, "utf-8")
    click.edit(filename=str(path))
    # Validate that template
    load_template(name)


@templates.command(name="path")
def templates_path():
    "Output the path to the templates directory"
    click.echo(template_dir())


@templates.command(name="loaders")
def templates_loaders():
    "Show template loaders registered by plugins"
    found = False
    for prefix, loader in get_template_loaders().items():
        found = True
        docs = "Undocumented"
        if loader.__doc__:
            docs = textwrap.dedent(loader.__doc__).strip()
        click.echo(f"{prefix}:")
        click.echo(textwrap.indent(docs, "  "))
    if not found:
        click.echo("No template loaders found")


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def schemas():
    "Manage stored schemas"


@schemas.command(name="list")
@click.option(
    "-p",
    "--path",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
    hidden=True,
)
@click.option(
    "-d",
    "--database",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
)
@click.option(
    "queries",
    "-q",
    "--query",
    multiple=True,
    help="Search for schemas matching this string",
)
@click.option("--full", is_flag=True, help="Output full schema contents")
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
@click.option("nl", "--nl", is_flag=True, help="Output as newline-delimited JSON")
def schemas_list(path, database, queries, full, json_, nl):
    "List stored schemas"
    if database and not path:
        path = database
    path = pathlib.Path(path or logs_db_path())
    if not path.exists():
        raise click.ClickException("No log database found at {}".format(path))
    db = sqlite_utils.Database(path)
    migrate(db)

    params = []
    where_sql = ""
    if queries:
        where_bits = ["schemas.content like ?" for _ in queries]
        where_sql += " where {}".format(" and ".join(where_bits))
        params.extend("%{}%".format(q) for q in queries)

    sql = """
    select
      schemas.id,
      schemas.content,
      max(responses.datetime_utc) as recently_used,
      count(*) as times_used
    from schemas
    join responses
      on responses.schema_id = schemas.id
    {} group by responses.schema_id
    order by recently_used
    """.format(where_sql)
    rows = db.query(sql, params)

    if json_ or nl:
        for line in output_rows_as_json(rows, json_cols={"content"}, nl=nl):
            click.echo(line)
        return

    for row in rows:
        click.echo("- id: {}".format(row["id"]))
        if full:
            click.echo(
                "  schema: |\n{}".format(
                    textwrap.indent(
                        json.dumps(json.loads(row["content"]), indent=2), "    "
                    )
                )
            )
        else:
            click.echo(
                "  summary: |\n    {}".format(
                    schema_summary(json.loads(row["content"]))
                )
            )
        click.echo(
            "  usage: |\n    {} time{}, most recently {}".format(
                row["times_used"],
                "s" if row["times_used"] != 1 else "",
                row["recently_used"],
            )
        )


@schemas.command(name="show")
@click.argument("schema_id")
@click.option(
    "-p",
    "--path",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
    hidden=True,
)
@click.option(
    "-d",
    "--database",
    type=click.Path(readable=True, exists=True, dir_okay=False),
    help="Path to log database",
)
def schemas_show(schema_id, path, database):
    "Show a stored schema"
    if database and not path:
        path = database
    path = pathlib.Path(path or logs_db_path())
    if not path.exists():
        raise click.ClickException("No log database found at {}".format(path))
    db = sqlite_utils.Database(path)
    migrate(db)

    try:
        row = db["schemas"].get(schema_id)
    except sqlite_utils.db.NotFoundError:
        raise click.ClickException("Invalid schema ID")
    click.echo(json.dumps(json.loads(row["content"]), indent=2))


@schemas.command(name="dsl")
@click.argument("input")
@click.option("--multi", is_flag=True, help="Wrap in an array")
def schemas_dsl_debug(input, multi):
    """
    Convert LLM's schema DSL to a JSON schema

    \b
        llm schema dsl 'name, age int, bio: their bio'
    """
    schema = schema_dsl(input, multi)
    click.echo(json.dumps(schema, indent=2))


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def tools():
    "Manage tools that can be made available to LLMs"


@tools.command(name="list")
@click.argument("tool_defs", nargs=-1)
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
@click.option(
    "python_tools",
    "--functions",
    help="Python code block or file path defining functions to register as tools",
    multiple=True,
)
def tools_list(tool_defs, json_, python_tools):
    "List available tools that have been provided by plugins"

    def introspect_tools(toolbox_class):
        methods = []
        for tool in toolbox_class.method_tools():
            methods.append(
                {
                    "name": tool.name,
                    "description": tool.description,
                    "arguments": tool.input_schema,
                    "implementation": tool.implementation,
                }
            )
        return methods

    if tool_defs:
        tools = {}
        for tool in _gather_tools(tool_defs, python_tools):
            if hasattr(tool, "name"):
                tools[tool.name] = tool
            else:
                tools[tool.__class__.__name__] = tool
    else:
        tools = get_tools()
        if python_tools:
            for code_or_path in python_tools:
                for tool in _tools_from_code(code_or_path):
                    tools[tool.name] = tool

    output_tools = []
    output_toolboxes = []
    tool_objects = []
    toolbox_objects = []
    for name, tool in sorted(tools.items()):
        if isinstance(tool, Tool):
            tool_objects.append(tool)
            output_tools.append(
                {
                    "name": name,
                    "description": tool.description,
                    "arguments": tool.input_schema,
                    "plugin": tool.plugin,
                }
            )
        else:
            toolbox_objects.append(tool)
            output_toolboxes.append(
                {
                    "name": name,
                    "tools": [
                        {
                            "name": tool["name"],
                            "description": tool["description"],
                            "arguments": tool["arguments"],
                        }
                        for tool in introspect_tools(tool)
                    ],
                }
            )
    if json_:
        click.echo(
            json.dumps(
                {"tools": output_tools, "toolboxes": output_toolboxes},
                indent=2,
            )
        )
    else:
        for tool in tool_objects:
            sig = "()"
            if tool.implementation:
                sig = str(inspect.signature(tool.implementation))
            click.echo(
                "{}{}{}\n".format(
                    tool.name,
                    sig,
                    " (plugin: {})".format(tool.plugin) if tool.plugin else "",
                )
            )
            if tool.description:
                click.echo(textwrap.indent(tool.description.strip(), "  ") + "\n")
        for toolbox in toolbox_objects:
            click.echo(toolbox.name + ":\n")
            for tool in toolbox.method_tools():
                sig = (
                    str(inspect.signature(tool.implementation))
                    .replace("(self, ", "(")
                    .replace("(self)", "()")
                )
                click.echo(
                    "  {}{}\n".format(
                        tool.name,
                        sig,
                    )
                )
                if tool.description:
                    click.echo(textwrap.indent(tool.description.strip(), "    ") + "\n")


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def aliases():
    "Manage model aliases"


@aliases.command(name="list")
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
def aliases_list(json_):
    "List current aliases"
    to_output = []
    for alias, model in get_model_aliases().items():
        if alias != model.model_id:
            to_output.append((alias, model.model_id, ""))
    for alias, embedding_model in get_embedding_model_aliases().items():
        if alias != embedding_model.model_id:
            to_output.append((alias, embedding_model.model_id, "embedding"))
    if json_:
        click.echo(
            json.dumps({key: value for key, value, type_ in to_output}, indent=4)
        )
        return
    max_alias_length = max(len(a) for a, _, _ in to_output)
    fmt = "{alias:<" + str(max_alias_length) + "} : {model_id}{type_}"
    for alias, model_id, type_ in to_output:
        click.echo(
            fmt.format(
                alias=alias, model_id=model_id, type_=f" ({type_})" if type_ else ""
            )
        )


@aliases.command(name="set")
@click.argument("alias")
@click.argument("model_id", required=False)
@click.option(
    "-q",
    "--query",
    multiple=True,
    help="Set alias for model matching these strings",
)
def aliases_set(alias, model_id, query):
    """
    Set an alias for a model

    Example usage:

    \b
        llm aliases set mini gpt-4o-mini

    Alternatively you can omit the model ID and specify one or more -q options.
    The first model matching all of those query strings will be used.

    \b
        llm aliases set mini -q 4o -q mini
    """
    if not model_id:
        if not query:
            raise click.ClickException(
                "You must provide a model_id or at least one -q option"
            )
        # Search for the first model matching all query strings
        found = None
        for model_with_aliases in get_models_with_aliases():
            if all(model_with_aliases.matches(q) for q in query):
                found = model_with_aliases
                break
        if not found:
            raise click.ClickException(
                "No model found matching query: " + ", ".join(query)
            )
        model_id = found.model.model_id
        set_alias(alias, model_id)
        click.echo(
            f"Alias '{alias}' set to model '{model_id}'",
            err=True,
        )
    else:
        set_alias(alias, model_id)


@aliases.command(name="remove")
@click.argument("alias")
def aliases_remove(alias):
    """
    Remove an alias

    Example usage:

    \b
        $ llm aliases remove turbo
    """
    try:
        remove_alias(alias)
    except KeyError as ex:
        raise click.ClickException(ex.args[0])


@aliases.command(name="path")
def aliases_path():
    "Output the path to the aliases.json file"
    click.echo(user_dir() / "aliases.json")


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def fragments():
    """
    Manage fragments that are stored in the database

    Fragments are reusable snippets of text that are shared across multiple prompts.
    """


@fragments.command(name="list")
@click.option(
    "queries",
    "-q",
    "--query",
    multiple=True,
    help="Search for fragments matching these strings",
)
@click.option("--aliases", is_flag=True, help="Show only fragments with aliases")
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
def fragments_list(queries, aliases, json_):
    "List current fragments"
    db = sqlite_utils.Database(logs_db_path())
    migrate(db)
    params = {}
    param_count = 0
    where_bits = []
    if aliases:
        where_bits.append("fragment_aliases.alias is not null")
    for q in queries:
        param_count += 1
        p = f"p{param_count}"
        params[p] = q
        where_bits.append(f"""
            (fragments.hash = :{p} or fragment_aliases.alias = :{p}
            or fragments.source like '%' || :{p} || '%'
            or fragments.content like '%' || :{p} || '%')
        """)
    where = "\n      and\n  ".join(where_bits)
    if where:
        where = " where " + where
    sql = """
    select
        fragments.hash,
        json_group_array(fragment_aliases.alias) filter (
            where
            fragment_aliases.alias is not null
        ) as aliases,
        fragments.datetime_utc,
        fragments.source,
        fragments.content
    from
        fragments
    left join
        fragment_aliases on fragment_aliases.fragment_id = fragments.id
    {where}
    group by
        fragments.id, fragments.hash, fragments.content, fragments.datetime_utc, fragments.source
    order by fragments.datetime_utc
    """.format(where=where)
    results = list(db.query(sql, params))
    for result in results:
        result["aliases"] = json.loads(result["aliases"])
    if json_:
        click.echo(json.dumps(results, indent=4))
    else:
        yaml.add_representer(
            str,
            lambda dumper, data: dumper.represent_scalar(
                "tag:yaml.org,2002:str", data, style="|" if "\n" in data else None
            ),
        )
        for result in results:
            result["content"] = truncate_string(result["content"])
            click.echo(yaml.dump([result], sort_keys=False, width=sys.maxsize).strip())


@fragments.command(name="set")
@click.argument("alias", callback=validate_fragment_alias)
@click.argument("fragment")
def fragments_set(alias, fragment):
    """
    Set an alias for a fragment

    Accepts an alias and a file path, URL, hash or '-' for stdin

    Example usage:

    \b
        llm fragments set mydocs ./docs.md
    """
    db = sqlite_utils.Database(logs_db_path())
    migrate(db)
    try:
        resolved = resolve_fragments(db, [fragment])[0]
    except FragmentNotFound as ex:
        raise click.ClickException(str(ex))
    migrate(db)
    alias_sql = """
    insert into fragment_aliases (alias, fragment_id)
    values (:alias, :fragment_id)
    on conflict(alias) do update set
        fragment_id = excluded.fragment_id;
    """
    with db.conn:
        fragment_id = ensure_fragment(db, resolved)
        db.conn.execute(alias_sql, {"alias": alias, "fragment_id": fragment_id})


@fragments.command(name="show")
@click.argument("alias_or_hash")
def fragments_show(alias_or_hash):
    """
    Display the fragment stored under an alias or hash

    \b
        llm fragments show mydocs
    """
    db = sqlite_utils.Database(logs_db_path())
    migrate(db)
    try:
        resolved = resolve_fragments(db, [alias_or_hash])[0]
    except FragmentNotFound as ex:
        raise click.ClickException(str(ex))
    click.echo(resolved)


@fragments.command(name="remove")
@click.argument("alias", callback=validate_fragment_alias)
def fragments_remove(alias):
    """
    Remove a fragment alias

    Example usage:

    \b
        llm fragments remove docs
    """
    db = sqlite_utils.Database(logs_db_path())
    migrate(db)
    with db.conn:
        db.conn.execute(
            "delete from fragment_aliases where alias = :alias", {"alias": alias}
        )


@fragments.command(name="loaders")
def fragments_loaders():
    """Show fragment loaders registered by plugins"""
    from llm import get_fragment_loaders

    found = False
    for prefix, loader in get_fragment_loaders().items():
        if found:
            # Extra newline on all after the first
            click.echo("")
        found = True
        docs = "Undocumented"
        if loader.__doc__:
            docs = textwrap.dedent(loader.__doc__).strip()
        click.echo(f"{prefix}:")
        click.echo(textwrap.indent(docs, "  "))
    if not found:
        click.echo("No fragment loaders found")


@cli.command(name="plugins")
@click.option("--all", help="Include built-in default plugins", is_flag=True)
@click.option(
    "hooks", "--hook", help="Filter for plugins that implement this hook", multiple=True
)
def plugins_list(all, hooks):
    "List installed plugins"
    plugins = get_plugins(all)
    hooks = set(hooks)
    if hooks:
        plugins = [plugin for plugin in plugins if hooks.intersection(plugin["hooks"])]
    click.echo(json.dumps(plugins, indent=2))


def display_truncated(text):
    console_width = shutil.get_terminal_size()[0]
    if len(text) > console_width:
        return text[: console_width - 3] + "..."
    else:
        return text


@cli.command()
@click.argument("packages", nargs=-1, required=False)
@click.option(
    "-U", "--upgrade", is_flag=True, help="Upgrade packages to latest version"
)
@click.option(
    "-e",
    "--editable",
    help="Install a project in editable mode from this path",
)
@click.option(
    "--force-reinstall",
    is_flag=True,
    help="Reinstall all packages even if they are already up-to-date",
)
@click.option(
    "--no-cache-dir",
    is_flag=True,
    help="Disable the cache",
)
@click.option(
    "--pre",
    is_flag=True,
    help="Include pre-release and development versions",
)
def install(packages, upgrade, editable, force_reinstall, no_cache_dir, pre):
    """Install packages from PyPI into the same environment as LLM"""
    args = ["pip", "install"]
    if upgrade:
        args += ["--upgrade"]
    if editable:
        args += ["--editable", editable]
    if force_reinstall:
        args += ["--force-reinstall"]
    if no_cache_dir:
        args += ["--no-cache-dir"]
    if pre:
        args += ["--pre"]
    args += list(packages)
    sys.argv = args
    run_module("pip", run_name="__main__")


@cli.command()
@click.argument("packages", nargs=-1, required=True)
@click.option("-y", "--yes", is_flag=True, help="Don't ask for confirmation")
def uninstall(packages, yes):
    """Uninstall Python packages from the LLM environment"""
    sys.argv = ["pip", "uninstall"] + list(packages) + (["-y"] if yes else [])
    run_module("pip", run_name="__main__")


@cli.command()
@click.argument("collection", required=False)
@click.argument("id", required=False)
@click.option(
    "-i",
    "--input",
    type=click.Path(exists=True, readable=True, allow_dash=True),
    help="File to embed",
)
@click.option(
    "-m", "--model", help="Embedding model to use", envvar="LLM_EMBEDDING_MODEL"
)
@click.option("--store", is_flag=True, help="Store the text itself in the database")
@click.option(
    "-d",
    "--database",
    type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
    envvar="LLM_EMBEDDINGS_DB",
)
@click.option(
    "-c",
    "--content",
    help="Content to embed",
)
@click.option("--binary", is_flag=True, help="Treat input as binary data")
@click.option(
    "--metadata",
    help="JSON object metadata to store",
    callback=json_validator("metadata"),
)
@click.option(
    "format_",
    "-f",
    "--format",
    type=click.Choice(["json", "blob", "base64", "hex"]),
    help="Output format",
)
def embed(
    collection, id, input, model, store, database, content, binary, metadata, format_
):
    """Embed text and store or return the result"""
    if collection and not id:
        raise click.ClickException("Must provide both collection and id")

    if store and not collection:
        raise click.ClickException("Must provide collection when using --store")

    # Lazy load this because we do not need it for -c or -i versions
    def get_db():
        if database:
            return sqlite_utils.Database(database)
        else:
            return sqlite_utils.Database(user_dir() / "embeddings.db")

    collection_obj = None
    model_obj = None
    if collection:
        db = get_db()
        if Collection.exists(db, collection):
            # Load existing collection and use its model
            collection_obj = Collection(collection, db)
            model_obj = collection_obj.model()
        else:
            # We will create a new one, but that means model is required
            if not model:
                model = get_default_embedding_model()
                if model is None:
                    raise click.ClickException(
                        "You need to specify an embedding model (no default model is set)"
                    )
            collection_obj = Collection(collection, db=db, model_id=model)
            model_obj = collection_obj.model()

    if model_obj is None:
        if model is None:
            model = get_default_embedding_model()
        try:
            model_obj = get_embedding_model(model)
        except UnknownModelError:
            raise click.ClickException(
                "You need to specify an embedding model (no default model is set)"
            )

    show_output = True
    if collection and (format_ is None):
        show_output = False

    # Resolve input text
    if not content:
        if not input or input == "-":
            # Read from stdin
            input_source = sys.stdin.buffer if binary else sys.stdin
            content = input_source.read()
        else:
            mode = "rb" if binary else "r"
            with open(input, mode) as f:
                content = f.read()

    if not content:
        raise click.ClickException("No content provided")

    if collection_obj:
        embedding = collection_obj.embed(id, content, metadata=metadata, store=store)
    else:
        embedding = model_obj.embed(content)

    if show_output:
        if format_ == "json" or format_ is None:
            click.echo(json.dumps(embedding))
        elif format_ == "blob":
            click.echo(encode(embedding))
        elif format_ == "base64":
            click.echo(base64.b64encode(encode(embedding)).decode("ascii"))
        elif format_ == "hex":
            click.echo(encode(embedding).hex())


@cli.command()
@click.argument("collection")
@click.argument(
    "input_path",
    type=click.Path(exists=True, dir_okay=False, allow_dash=True, readable=True),
    required=False,
)
@click.option(
    "--format",
    type=click.Choice(["json", "csv", "tsv", "nl"]),
    help="Format of input file - defaults to auto-detect",
)
@click.option(
    "--files",
    type=(click.Path(file_okay=False, dir_okay=True, allow_dash=False), str),
    multiple=True,
    help="Embed files in this directory - specify directory and glob pattern",
)
@click.option(
    "encodings",
    "--encoding",
    help="Encodings to try when reading --files",
    multiple=True,
)
@click.option("--binary", is_flag=True, help="Treat --files as binary data")
@click.option("--sql", help="Read input using this SQL query")
@click.option(
    "--attach",
    type=(str, click.Path(file_okay=True, dir_okay=False, allow_dash=False)),
    multiple=True,
    help="Additional databases to attach - specify alias and file path",
)
@click.option(
    "--batch-size", type=int, help="Batch size to use when running embeddings"
)
@click.option("--prefix", help="Prefix to add to the IDs", default="")
@click.option(
    "-m", "--model", help="Embedding model to use", envvar="LLM_EMBEDDING_MODEL"
)
@click.option(
    "--prepend",
    help="Prepend this string to all content before embedding",
)
@click.option("--store", is_flag=True, help="Store the text itself in the database")
@click.option(
    "-d",
    "--database",
    type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
    envvar="LLM_EMBEDDINGS_DB",
)
def embed_multi(
    collection,
    input_path,
    format,
    files,
    encodings,
    binary,
    sql,
    attach,
    batch_size,
    prefix,
    model,
    prepend,
    store,
    database,
):
    """
    Store embeddings for multiple strings at once in the specified collection.

    Input data can come from one of three sources:

    \b
    1. A CSV, TSV, JSON or JSONL file:
       - CSV/TSV: First column is ID, remaining columns concatenated as content
       - JSON: Array of objects with "id" field and content fields
       - JSONL: Newline-delimited JSON objects

    \b
       Examples:
         llm embed-multi docs input.csv
         cat data.json | llm embed-multi docs -
         llm embed-multi docs input.json --format json

    \b
    2. A SQL query against a SQLite database:
       - First column returned is used as ID
       - Other columns concatenated to form content

    \b
       Examples:
         llm embed-multi docs --sql "SELECT id, title, body FROM posts"
         llm embed-multi docs --attach blog blog.db --sql "SELECT id, content FROM blog.posts"

    \b
    3. Files in directories matching glob patterns:
       - Each file becomes one embedding
       - Relative file paths become IDs

    \b
       Examples:
         llm embed-multi docs --files docs '**/*.md'
         llm embed-multi images --files photos '*.jpg' --binary
         llm embed-multi texts --files texts '*.txt' --encoding utf-8 --encoding latin-1
    """
    if binary and not files:
        raise click.UsageError("--binary must be used with --files")
    if binary and encodings:
        raise click.UsageError("--binary cannot be used with --encoding")
    if not input_path and not sql and not files:
        raise click.UsageError("Either --sql or input path or --files is required")

    if files:
        if input_path or sql or format:
            raise click.UsageError(
                "Cannot use --files with --sql, input path or --format"
            )

    if database:
        db = sqlite_utils.Database(database)
    else:
        db = sqlite_utils.Database(user_dir() / "embeddings.db")

    for alias, attach_path in attach:
        db.attach(alias, attach_path)

    try:
        collection_obj = Collection(
            collection, db=db, model_id=model or get_default_embedding_model()
        )
    except ValueError:
        raise click.ClickException(
            "You need to specify an embedding model (no default model is set)"
        )

    expected_length = None
    if files:
        encodings = encodings or ("utf-8", "latin-1")

        def count_files():
            i = 0
            for directory, pattern in files:
                for path in pathlib.Path(directory).glob(pattern):
                    i += 1
            return i

        def iterate_files():
            for directory, pattern in files:
                p = pathlib.Path(directory)
                if not p.exists() or not p.is_dir():
                    # fixes issue/274 - raise error if directory does not exist
                    raise click.UsageError(f"Invalid directory: {directory}")
                for path in pathlib.Path(directory).glob(pattern):
                    if path.is_dir():
                        continue  # fixed issue/280 - skip directories
                    relative = path.relative_to(directory)
                    content = None
                    if binary:
                        content = path.read_bytes()
                    else:
                        for encoding in encodings:
                            try:
                                content = path.read_text(encoding=encoding)
                            except UnicodeDecodeError:
                                continue
                    if content is None:
                        # Log to stderr
                        click.echo(
                            "Could not decode text in file {}".format(path),
                            err=True,
                        )
                    else:
                        yield {"id": str(relative), "content": content}

        expected_length = count_files()
        rows = iterate_files()
    elif sql:
        rows = db.query(sql)
        count_sql = "select count(*) as c from ({})".format(sql)
        expected_length = next(db.query(count_sql))["c"]
    else:

        def load_rows(fp):
            return rows_from_file(fp, Format[format.upper()] if format else None)[0]

        try:
            if input_path != "-":
                # Read the file twice - first time is to get a count
                expected_length = 0
                with open(input_path, "rb") as fp:
                    for _ in load_rows(fp):
                        expected_length += 1

            rows = load_rows(
                open(input_path, "rb")
                if input_path != "-"
                else io.BufferedReader(sys.stdin.buffer)
            )
        except json.JSONDecodeError as ex:
            raise click.ClickException(str(ex))

    with click.progressbar(
        rows, label="Embedding", show_percent=True, length=expected_length
    ) as rows:

        def tuples() -> Iterable[Tuple[str, Union[bytes, str]]]:
            for row in rows:
                values = list(row.values())
                id: str = prefix + str(values[0])
                content: Optional[Union[bytes, str]] = None
                if binary:
                    content = cast(bytes, values[1])
                else:
                    content = " ".join(v or "" for v in values[1:])
                if prepend and isinstance(content, str):
                    content = prepend + content
                yield id, content or ""

        embed_kwargs = {"store": store}
        if batch_size:
            embed_kwargs["batch_size"] = batch_size
        collection_obj.embed_multi(tuples(), **embed_kwargs)


@cli.command()
@click.argument("collection")
@click.argument("id", required=False)
@click.option(
    "-i",
    "--input",
    type=click.Path(exists=True, readable=True, allow_dash=True),
    help="File to embed for comparison",
)
@click.option("-c", "--content", help="Content to embed for comparison")
@click.option("--binary", is_flag=True, help="Treat input as binary data")
@click.option(
    "-n", "--number", type=int, default=10, help="Number of results to return"
)
@click.option("-p", "--plain", is_flag=True, help="Output in plain text format")
@click.option(
    "-d",
    "--database",
    type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
    envvar="LLM_EMBEDDINGS_DB",
)
@click.option("--prefix", help="Just IDs with this prefix", default="")
def similar(collection, id, input, content, binary, number, plain, database, prefix):
    """
    Return top N similar IDs from a collection using cosine similarity.

    Example usage:

    \b
        llm similar my-collection -c "I like cats"

    Or to find content similar to a specific stored ID:

    \b
        llm similar my-collection 1234
    """
    if not id and not content and not input:
        raise click.ClickException("Must provide content or an ID for the comparison")

    if database:
        db = sqlite_utils.Database(database)
    else:
        db = sqlite_utils.Database(user_dir() / "embeddings.db")

    if not db["embeddings"].exists():
        raise click.ClickException("No embeddings table found in database")

    try:
        collection_obj = Collection(collection, db, create=False)
    except Collection.DoesNotExist:
        raise click.ClickException("Collection does not exist")

    if id:
        try:
            results = collection_obj.similar_by_id(id, number, prefix=prefix)
        except Collection.DoesNotExist:
            raise click.ClickException("ID not found in collection")
    else:
        # Resolve input text
        if not content:
            if not input or input == "-":
                # Read from stdin
                input_source = sys.stdin.buffer if binary else sys.stdin
                content = input_source.read()
            else:
                mode = "rb" if binary else "r"
                with open(input, mode) as f:
                    content = f.read()
        if not content:
            raise click.ClickException("No content provided")
        results = collection_obj.similar(content, number, prefix=prefix)

    for result in results:
        if plain:
            click.echo(f"{result.id} ({result.score})\n")
            if result.content:
                click.echo(textwrap.indent(result.content, "  "))
            if result.metadata:
                click.echo(textwrap.indent(json.dumps(result.metadata), "  "))
            click.echo("")
        else:
            click.echo(json.dumps(asdict(result)))


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def embed_models():
    "Manage available embedding models"


@embed_models.command(name="list")
@click.option(
    "-q",
    "--query",
    multiple=True,
    help="Search for embedding models matching these strings",
)
def embed_models_list(query):
    "List available embedding models"
    output = []
    for model_with_aliases in get_embedding_models_with_aliases():
        if query:
            if not all(model_with_aliases.matches(q) for q in query):
                continue
        s = str(model_with_aliases.model)
        if model_with_aliases.aliases:
            s += " (aliases: {})".format(", ".join(model_with_aliases.aliases))
        output.append(s)
    click.echo("\n".join(output))


@embed_models.command(name="default")
@click.argument("model", required=False)
@click.option(
    "--remove-default", is_flag=True, help="Reset to specifying no default model"
)
def embed_models_default(model, remove_default):
    "Show or set the default embedding model"
    if not model and not remove_default:
        default = get_default_embedding_model()
        if default is None:
            click.echo("<No default embedding model set>", err=True)
        else:
            click.echo(default)
        return
    # Validate it is a known model
    try:
        if remove_default:
            set_default_embedding_model(None)
        else:
            model = get_embedding_model(model)
            set_default_embedding_model(model.model_id)
    except KeyError:
        raise click.ClickException("Unknown embedding model: {}".format(model))


@cli.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def collections():
    "View and manage collections of embeddings"


@collections.command(name="path")
def collections_path():
    "Output the path to the embeddings database"
    click.echo(user_dir() / "embeddings.db")


@collections.command(name="list")
@click.option(
    "-d",
    "--database",
    type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
    envvar="LLM_EMBEDDINGS_DB",
    help="Path to embeddings database",
)
@click.option("json_", "--json", is_flag=True, help="Output as JSON")
def embed_db_collections(database, json_):
    "View a list of collections"
    database = database or (user_dir() / "embeddings.db")
    db = sqlite_utils.Database(str(database))
    if not db["collections"].exists():
        raise click.ClickException("No collections table found in {}".format(database))
    rows = db.query("""
    select
        collections.name,
        collections.model,
        count(embeddings.id) as num_embeddings
    from
        collections left join embeddings
        on collections.id = embeddings.collection_id
    group by
        collections.name, collections.model
    """)
    if json_:
        click.echo(json.dumps(list(rows), indent=4))
    else:
        for row in rows:
            click.echo("{}: {}".format(row["name"], row["model"]))
            click.echo(
                "  {} embedding{}".format(
                    row["num_embeddings"], "s" if row["num_embeddings"] != 1 else ""
                )
            )


@collections.command(name="delete")
@click.argument("collection")
@click.option(
    "-d",
    "--database",
    type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
    envvar="LLM_EMBEDDINGS_DB",
    help="Path to embeddings database",
)
def collections_delete(collection, database):
    """
    Delete the specified collection

    Example usage:

    \b
        llm collections delete my-collection
    """
    database = database or (user_dir() / "embeddings.db")
    db = sqlite_utils.Database(str(database))
    try:
        collection_obj = Collection(collection, db, create=False)
    except Collection.DoesNotExist:
        raise click.ClickException("Collection does not exist")
    collection_obj.delete()


@models.group(
    cls=DefaultGroup,
    default="list",
    default_if_no_args=True,
)
def options():
    "Manage default options for models"


@options.command(name="list")
def options_list():
    """
    List default options for all models

    Example usage:

    \b
        llm models options list
    """
    options = get_all_model_options()
    if not options:
        click.echo("No default options set for any models.", err=True)
        return

    for model_id, model_options in options.items():
        click.echo(f"{model_id}:")
        for key, value in model_options.items():
            click.echo(f"  {key}: {value}")


@options.command(name="show")
@click.argument("model")
def options_show(model):
    """
    List default options set for a specific model

    Example usage:

    \b
        llm models options show gpt-4o
    """
    import llm

    try:
        # Resolve alias to model ID
        model_obj = llm.get_model(model)
        model_id = model_obj.model_id
    except llm.UnknownModelError:
        # Use as-is if not found
        model_id = model

    options = get_model_options(model_id)
    if not options:
        click.echo(f"No default options set for model '{model_id}'.", err=True)
        return

    for key, value in options.items():
        click.echo(f"{key}: {value}")


@options.command(name="set")
@click.argument("model")
@click.argument("key")
@click.argument("value")
def options_set(model, key, value):
    """
    Set a default option for a model

    Example usage:

    \b
        llm models options set gpt-4o temperature 0.5
    """
    import llm

    try:
        # Resolve alias to model ID
        model_obj = llm.get_model(model)
        model_id = model_obj.model_id

        # Validate option against model schema
        try:
            # Create a test Options object to validate
            test_options = {key: value}
            model_obj.Options(**test_options)
        except pydantic.ValidationError as ex:
            raise click.ClickException(render_errors(ex.errors()))

    except llm.UnknownModelError:
        # Use as-is if not found
        model_id = model

    set_model_option(model_id, key, value)
    click.echo(f"Set default option {key}={value} for model {model_id}", err=True)


@options.command(name="clear")
@click.argument("model")
@click.argument("key", required=False)
def options_clear(model, key):
    """
    Clear default option(s) for a model

    Example usage:

    \b
        llm models options clear gpt-4o
        # Or for a single option
        llm models options clear gpt-4o temperature
    """
    import llm

    try:
        # Resolve alias to model ID
        model_obj = llm.get_model(model)
        model_id = model_obj.model_id
    except llm.UnknownModelError:
        # Use as-is if not found
        model_id = model

    cleared_keys = []
    if not key:
        cleared_keys = list(get_model_options(model_id).keys())
        for key_ in cleared_keys:
            clear_model_option(model_id, key_)
    else:
        cleared_keys.append(key)
        clear_model_option(model_id, key)
    if cleared_keys:
        if len(cleared_keys) == 1:
            click.echo(f"Cleared option '{cleared_keys[0]}' for model {model_id}")
        else:
            click.echo(
                f"Cleared {', '.join(cleared_keys)} options for model {model_id}"
            )


def template_dir():
    path = user_dir() / "templates"
    path.mkdir(parents=True, exist_ok=True)
    return path


def logs_db_path():
    return user_dir() / "logs.db"


def get_history(chat_id):
    if chat_id is None:
        return None, []
    log_path = logs_db_path()
    db = sqlite_utils.Database(log_path)
    migrate(db)
    if chat_id == -1:
        # Return the most recent chat
        last_row = list(db["logs"].rows_where(order_by="-id", limit=1))
        if last_row:
            chat_id = last_row[0].get("chat_id") or last_row[0].get("id")
        else:  # Database is empty
            return None, []
    rows = db["logs"].rows_where(
        "id = ? or chat_id = ?", [chat_id, chat_id], order_by="id"
    )
    return chat_id, rows


def render_errors(errors):
    output = []
    for error in errors:
        output.append(", ".join(error["loc"]))
        output.append("  " + error["msg"])
    return "\n".join(output)


load_plugins()

pm.hook.register_commands(cli=cli)


def _human_readable_size(size_bytes):
    if size_bytes == 0:
        return "0B"

    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = 0

    while size_bytes >= 1024 and i < len(size_name) - 1:
        size_bytes /= 1024.0
        i += 1

    return "{:.2f}{}".format(size_bytes, size_name[i])


def logs_on():
    return not (user_dir() / "logs-off").exists()


def get_all_model_options() -> dict:
    """
    Get all default options for all models
    """
    path = user_dir() / "model_options.json"
    if not path.exists():
        return {}

    try:
        options = json.loads(path.read_text())
    except json.JSONDecodeError:
        return {}

    return options


def get_model_options(model_id: str) -> dict:
    """
    Get default options for a specific model

    Args:
        model_id: Return options for model with this ID

    Returns:
        A dictionary of model options
    """
    path = user_dir() / "model_options.json"
    if not path.exists():
        return {}

    try:
        options = json.loads(path.read_text())
    except json.JSONDecodeError:
        return {}

    return options.get(model_id, {})


def set_model_option(model_id: str, key: str, value: Any) -> None:
    """
    Set a default option for a model.

    Args:
        model_id: The model ID
        key: The option key
        value: The option value
    """
    path = user_dir() / "model_options.json"
    if path.exists():
        try:
            options = json.loads(path.read_text())
        except json.JSONDecodeError:
            options = {}
    else:
        options = {}

    # Ensure the model has an entry
    if model_id not in options:
        options[model_id] = {}

    # Set the option
    options[model_id][key] = value

    # Save the options
    path.write_text(json.dumps(options, indent=2))


def clear_model_option(model_id: str, key: str) -> None:
    """
    Clear a model option

    Args:
        model_id: The model ID
        key: Key to clear
    """
    path = user_dir() / "model_options.json"
    if not path.exists():
        return

    try:
        options = json.loads(path.read_text())
    except json.JSONDecodeError:
        return

    if model_id not in options:
        return

    if key in options[model_id]:
        del options[model_id][key]
        if not options[model_id]:
            del options[model_id]

    path.write_text(json.dumps(options, indent=2))


class LoadTemplateError(ValueError):
    pass


def _parse_yaml_template(name, content):
    try:
        loaded = yaml.safe_load(content)
    except yaml.YAMLError as ex:
        raise LoadTemplateError("Invalid YAML: {}".format(str(ex)))
    if isinstance(loaded, str):
        return Template(name=name, prompt=loaded)
    loaded["name"] = name
    try:
        return Template(**loaded)
    except pydantic.ValidationError as ex:
        msg = "A validation error occurred:\n"
        msg += render_errors(ex.errors())
        raise LoadTemplateError(msg)


def load_template(name: str) -> Template:
    "Load template, or raise LoadTemplateError(msg)"
    if name.startswith("https://") or name.startswith("http://"):
        response = httpx.get(name)
        try:
            response.raise_for_status()
        except httpx.HTTPStatusError as ex:
            raise LoadTemplateError("Could not load template {}: {}".format(name, ex))
        return _parse_yaml_template(name, response.text)

    potential_path = pathlib.Path(name)

    if has_plugin_prefix(name) and not potential_path.exists():
        prefix, rest = name.split(":", 1)
        loaders = get_template_loaders()
        if prefix not in loaders:
            raise LoadTemplateError("Unknown template prefix: {}".format(prefix))
        loader = loaders[prefix]
        try:
            return loader(rest)
        except Exception as ex:
            raise LoadTemplateError("Could not load template {}: {}".format(name, ex))

    # Try local file
    if potential_path.exists():
        path = potential_path
    else:
        # Look for template in template_dir()
        path = template_dir() / f"{name}.yaml"
    if not path.exists():
        raise LoadTemplateError(f"Invalid template: {name}")
    content = path.read_text()
    template_obj = _parse_yaml_template(name, content)
    # We trust functions here because they came from the filesystem
    template_obj._functions_is_trusted = True
    return template_obj


def _tools_from_code(code_or_path: str) -> List[Tool]:
    """
    Treat all Python functions in the code as tools
    """
    if "\n" not in code_or_path and code_or_path.endswith(".py"):
        try:
            code_or_path = pathlib.Path(code_or_path).read_text()
        except FileNotFoundError:
            raise click.ClickException("File not found: {}".format(code_or_path))
    namespace: Dict[str, Any] = {}
    tools = []
    try:
        exec(code_or_path, namespace)
    except SyntaxError as ex:
        raise click.ClickException("Error in --functions definition: {}".format(ex))
    # Register all callables in the locals dict:
    for name, value in namespace.items():
        if callable(value) and not name.startswith("_"):
            tools.append(Tool.function(value))
    return tools


def _debug_tool_call(_, tool_call, tool_result):
    click.echo(
        click.style(
            "\nTool call: {}({})".format(tool_call.name, tool_call.arguments),
            fg="yellow",
            bold=True,
        ),
        err=True,
    )
    output = ""
    attachments = ""
    if tool_result.attachments:
        attachments += "\nAttachments:\n"
        for attachment in tool_result.attachments:
            attachments += f"  {repr(attachment)}\n"

    try:
        output = json.dumps(json.loads(tool_result.output), indent=2)
    except ValueError:
        output = tool_result.output
    output += attachments
    click.echo(
        click.style(
            textwrap.indent(output, "  ") + ("\n" if not tool_result.exception else ""),
            fg="green",
            bold=True,
        ),
        err=True,
    )
    if tool_result.exception:
        click.echo(
            click.style(
                "  Exception: {}".format(tool_result.exception),
                fg="red",
                bold=True,
            ),
            err=True,
        )


def _approve_tool_call(_, tool_call):
    click.echo(
        click.style(
            "Tool call: {}({})".format(tool_call.name, tool_call.arguments),
            fg="yellow",
            bold=True,
        ),
        err=True,
    )
    if not click.confirm("Approve tool call?"):
        raise CancelToolCall("User cancelled tool call")


def _gather_tools(
    tool_specs: List[str], python_tools: List[str]
) -> List[Union[Tool, Type[Toolbox]]]:
    tools: List[Union[Tool, Type[Toolbox]]] = []
    if python_tools:
        for code_or_path in python_tools:
            tools.extend(_tools_from_code(code_or_path))
    registered_tools = get_tools()
    registered_classes = dict(
        (key, value)
        for key, value in registered_tools.items()
        if inspect.isclass(value)
    )
    bad_tools = [
        tool for tool in tool_specs if tool.split("(")[0] not in registered_tools
    ]
    if bad_tools:
        raise click.ClickException(
            "Tool(s) {} not found. Available tools: {}".format(
                ", ".join(bad_tools), ", ".join(registered_tools.keys())
            )
        )
    for tool_spec in tool_specs:
        if not tool_spec[0].isupper():
            # It's a function
            tools.append(registered_tools[tool_spec])
        else:
            # It's a class
            tools.append(instantiate_from_spec(registered_classes, tool_spec))
    return tools


def _get_conversation_tools(conversation, tools):
    if conversation and not tools and conversation.responses:
        # Copy plugin tools from first response in conversation
        initial_tools = conversation.responses[0].prompt.tools
        if initial_tools:
            # Only tools from plugins:
            return [tool.name for tool in initial_tools if tool.plugin]


================================================
FILE: llm/default_plugins/__init__.py
================================================


================================================
FILE: llm/default_plugins/default_tools.py
================================================
import llm
from llm.tools import llm_time, llm_version


@llm.hookimpl
def register_tools(register):
    register(llm_version)
    register(llm_time)


================================================
FILE: llm/default_plugins/openai_models.py
================================================
from llm import (
    AsyncConversation,
    AsyncKeyModel,
    AsyncResponse,
    Conversation,
    EmbeddingModel,
    KeyModel,
    Prompt,
    Response,
    hookimpl,
)
import llm
from llm.utils import (
    dicts_to_table_string,
    remove_dict_none_values,
    logging_client,
    simplify_usage_dict,
)
import click
import datetime
from enum import Enum
import httpx
import openai
import os

from pydantic import field_validator, Field

from typing import AsyncGenerator, cast, List, Iterable, Iterator, Optional, Union
import json
import yaml


@hookimpl
def register_models(register):
    # GPT-4o
    register(
        Chat("gpt-4o", vision=True, supports_schema=True, supports_tools=True),
        AsyncChat("gpt-4o", vision=True, supports_schema=True, supports_tools=True),
        aliases=("4o",),
    )
    register(
        Chat("chatgpt-4o-latest", vision=True),
        AsyncChat("chatgpt-4o-latest", vision=True),
        aliases=("chatgpt-4o",),
    )
    register(
        Chat("gpt-4o-mini", vision=True, supports_schema=True, supports_tools=True),
        AsyncChat(
            "gpt-4o-mini", vision=True, supports_schema=True, supports_tools=True
        ),
        aliases=("4o-mini",),
    )
    for audio_model_id in (
        "gpt-4o-audio-preview",
        "gpt-4o-audio-preview-2024-12-17",
        "gpt-4o-audio-preview-2024-10-01",
        "gpt-4o-mini-audio-preview",
        "gpt-4o-mini-audio-preview-2024-12-17",
    ):
        register(
            Chat(audio_model_id, audio=True),
            AsyncChat(audio_model_id, audio=True),
        )
    # GPT-4.1
    for model_id in ("gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"):
        register(
            Chat(model_id, vision=True, supports_schema=True, supports_tools=True),
            AsyncChat(model_id, vision=True, supports_schema=True, supports_tools=True),
            aliases=(model_id.replace("gpt-", ""),),
        )
    # 3.5 and 4
    register(
        Chat("gpt-3.5-turbo"), AsyncChat("gpt-3.5-turbo"), aliases=("3.5", "chatgpt")
    )
    register(
        Chat("gpt-3.5-turbo-16k"),
        AsyncChat("gpt-3.5-turbo-16k"),
        aliases=("chatgpt-16k", "3.5-16k"),
    )
    register(Chat("gpt-4"), AsyncChat("gpt-4"), aliases=("4", "gpt4"))
    register(Chat("gpt-4-32k"), AsyncChat("gpt-4-32k"), aliases=("4-32k",))
    # GPT-4 Turbo models
    register(Chat("gpt-4-1106-preview"), AsyncChat("gpt-4-1106-preview"))
    register(Chat("gpt-4-0125-preview"), AsyncChat("gpt-4-0125-preview"))
    register(Chat("gpt-4-turbo-2024-04-09"), AsyncChat("gpt-4-turbo-2024-04-09"))
    register(
        Chat("gpt-4-turbo"),
        AsyncChat("gpt-4-turbo"),
        aliases=("gpt-4-turbo-preview", "4-turbo", "4t"),
    )
    # GPT-4.5
    register(
        Chat(
            "gpt-4.5-preview-2025-02-27",
            vision=True,
            supports_schema=True,
            supports_tools=True,
        ),
        AsyncChat(
            "gpt-4.5-preview-2025-02-27",
            vision=True,
            supports_schema=True,
            supports_tools=True,
        ),
    )
    register(
        Chat("gpt-4.5-preview", vision=True, supports_schema=True, supports_tools=True),
        AsyncChat(
            "gpt-4.5-preview", vision=True, supports_schema=True, supports_tools=True
        ),
        aliases=("gpt-4.5",),
    )
    # o1
    for model_id in ("o1", "o1-2024-12-17"):
        register(
            Chat(
                model_id,
                vision=True,
                can_stream=False,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
            AsyncChat(
                model_id,
                vision=True,
                can_stream=False,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
        )

    register(
        Chat("o1-preview", allows_system_prompt=False),
        AsyncChat("o1-preview", allows_system_prompt=False),
    )
    register(
        Chat("o1-mini", allows_system_prompt=False),
        AsyncChat("o1-mini", allows_system_prompt=False),
    )
    register(
        Chat("o3-mini", reasoning=True, supports_schema=True, supports_tools=True),
        AsyncChat("o3-mini", reasoning=True, supports_schema=True, supports_tools=True),
    )
    register(
        Chat(
            "o3", vision=True, reasoning=True, supports_schema=True, supports_tools=True
        ),
        AsyncChat(
            "o3", vision=True, reasoning=True, supports_schema=True, supports_tools=True
        ),
    )
    register(
        Chat(
            "o4-mini",
            vision=True,
            reasoning=True,
            supports_schema=True,
            supports_tools=True,
        ),
        AsyncChat(
            "o4-mini",
            vision=True,
            reasoning=True,
            supports_schema=True,
            supports_tools=True,
        ),
    )
    # GPT-5
    for model_id in (
        "gpt-5",
        "gpt-5-mini",
        "gpt-5-nano",
        "gpt-5-2025-08-07",
        "gpt-5-mini-2025-08-07",
        "gpt-5-nano-2025-08-07",
    ):
        register(
            Chat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
            AsyncChat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
        )
    # GPT-5.1
    for model_id in (
        "gpt-5.1",
        "gpt-5.1-chat-latest",
    ):
        register(
            Chat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
            AsyncChat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
        )
    # GPT-5.2
    for model_id in ("gpt-5.2", "gpt-5.2-chat-latest"):
        register(
            Chat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
            AsyncChat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
        )
        # "gpt-5.2-pro" is Responses API only

    # GPT-5.4
    for model_id in (
        "gpt-5.4",
        "gpt-5.4-2026-03-05",
        "gpt-5.4-mini",
        "gpt-5.4-mini-2026-03-17",
        "gpt-5.4-nano",
        "gpt-5.4-nano-2026-03-17",
    ):
        register(
            Chat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
            AsyncChat(
                model_id,
                vision=True,
                reasoning=True,
                supports_schema=True,
                supports_tools=True,
            ),
        )

    # The -instruct completion model
    register(
        Completion("gpt-3.5-turbo-instruct", default_max_tokens=256),
        aliases=("3.5-instruct", "chatgpt-instruct"),
    )

    # Load extra models
    extra_path = llm.user_dir() / "extra-openai-models.yaml"
    if not extra_path.exists():
        return
    with open(extra_path) as f:
        extra_models = yaml.safe_load(f)
    for extra_model in extra_models:
        model_id = extra_model["model_id"]
        aliases = extra_model.get("aliases", [])
        model_name = extra_model["model_name"]
        api_base = extra_model.get("api_base")
        api_type = extra_model.get("api_type")
        api_version = extra_model.get("api_version")
        api_engine = extra_model.get("api_engine")
        headers = extra_model.get("headers")
        reasoning = extra_model.get("reasoning")
        kwargs = {}
        if extra_model.get("can_stream") is False:
            kwargs["can_stream"] = False
        if extra_model.get("supports_schema") is True:
            kwargs["supports_schema"] = True
        if extra_model.get("supports_tools") is True:
            kwargs["supports_tools"] = True
        if extra_model.get("vision") is True:
            kwargs["vision"] = True
        if extra_model.get("audio") is True:
            kwargs["audio"] = True
        if extra_model.get("completion"):
            klass = Completion
        else:
            klass = Chat
        chat_model = klass(
            model_id,
            model_name=model_name,
            api_base=api_base,
            api_type=api_type,
            api_version=api_version,
            api_engine=api_engine,
            headers=headers,
            reasoning=reasoning,
            **kwargs,
        )
        if api_base:
            chat_model.needs_key = None
        if extra_model.get("api_key_name"):
            chat_model.needs_key = extra_model["api_key_name"]
        register(
            chat_model,
            aliases=aliases,
        )


@hookimpl
def register_embedding_models(register):
    register(
        OpenAIEmbeddingModel("text-embedding-ada-002", "text-embedding-ada-002"),
        aliases=(
            "ada",
            "ada-002",
        ),
    )
    register(
        OpenAIEmbeddingModel("text-embedding-3-small", "text-embedding-3-small"),
        aliases=("3-small",),
    )
    register(
        OpenAIEmbeddingModel("text-embedding-3-large", "text-embedding-3-large"),
        aliases=("3-large",),
    )
    # With varying dimensions
    register(
        OpenAIEmbeddingModel(
            "text-embedding-3-small-512", "text-embedding-3-small", 512
        ),
        aliases=("3-small-512",),
    )
    register(
        OpenAIEmbeddingModel(
            "text-embedding-3-large-256", "text-embedding-3-large", 256
        ),
        aliases=("3-large-256",),
    )
    register(
        OpenAIEmbeddingModel(
            "text-embedding-3-large-1024", "text-embedding-3-large", 1024
        ),
        aliases=("3-large-1024",),
    )


class OpenAIEmbeddingModel(EmbeddingModel):
    needs_key = "openai"
    key_env_var = "OPENAI_API_KEY"
    batch_size = 100

    def __init__(self, model_id, openai_model_id, dimensions=None):
        self.model_id = model_id
        self.openai_model_id = openai_model_id
        self.dimensions = dimensions

    def embed_batch(self, items: Iterable[Union[str, bytes]]) -> Iterator[List[float]]:
        kwargs = {
            "input": items,
            "model": self.openai_model_id,
        }
        if self.dimensions:
            kwargs["dimensions"] = self.dimensions
        client = openai.OpenAI(api_key=self.get_key())
        results = client.embeddings.create(**kwargs).data
        return ([float(r) for r in result.embedding] for result in results)


@hookimpl
def register_commands(cli):
    @cli.group(name="openai")
    def openai_():
        "Commands for working directly with the OpenAI API"

    @openai_.command()
    @click.option("json_", "--json", is_flag=True, help="Output as JSON")
    @click.option("--key", help="OpenAI API key")
    def models(json_, key):
        "List models available to you from the OpenAI API"
        from llm import get_key

        api_key = get_key(key, "openai", "OPENAI_API_KEY")
        response = httpx.get(
            "https://api.openai.com/v1/models",
            headers={"Authorization": f"Bearer {api_key}"},
        )
        if response.status_code != 200:
            raise click.ClickException(
                f"Error {response.status_code} from OpenAI API: {response.text}"
            )
        models = response.json()["data"]
        if json_:
            click.echo(json.dumps(models, indent=4))
        else:
            to_print = []
            for model in models:
                # Print id, owned_by, root, created as ISO 8601
                created_str = datetime.datetime.fromtimestamp(
                    model["created"], datetime.timezone.utc
                ).isoformat()
                to_print.append(
                    {
                        "id": model["id"],
                        "owned_by": model["owned_by"],
                        "created": created_str,
                    }
                )
            done = dicts_to_table_string("id owned_by created".split(), to_print)
            print("\n".join(done))


class SharedOptions(llm.Options):
    temperature: Optional[float] = Field(
        description=(
            "What sampling temperature to use, between 0 and 2. Higher values like "
            "0.8 will make the output more random, while lower values like 0.2 will "
            "make it more focused and deterministic."
        ),
        ge=0,
        le=2,
        default=None,
    )
    max_tokens: Optional[int] = Field(
        description="Maximum number of tokens to generate.", default=None
    )
    top_p: Optional[float] = Field(
        description=(
            "An alternative to sampling with temperature, called nucleus sampling, "
            "where the model considers the results of the tokens with top_p "
            "probability mass. So 0.1 means only the tokens comprising the top "
            "10% probability mass are considered. Recommended to use top_p or "
            "temperature but not both."
        ),
        ge=0,
        le=1,
        default=None,
    )
    frequency_penalty: Optional[float] = Field(
        description=(
            "Number between -2.0 and 2.0. Positive values penalize new tokens based "
            "on their existing frequency in the text so far, decreasing the model's "
            "likelihood to repeat the same line verbatim."
        ),
        ge=-2,
        le=2,
        default=None,
    )
    presence_penalty: Optional[float] = Field(
        description=(
            "Number between -2.0 and 2.0. Positive values penalize new tokens based "
            "on whether they appear in the text so far, increasing the model's "
            "likelihood to talk about new topics."
        ),
        ge=-2,
        le=2,
        default=None,
    )
    stop: Optional[str] = Field(
        description=("A string where the API will stop generating further tokens."),
        default=None,
    )
    logit_bias: Optional[Union[dict, str]] = Field(
        description=(
            "Modify the likelihood of specified tokens appearing in the completion. "
            'Pass a JSON string like \'{"1712":-100, "892":-100, "1489":-100}\''
        ),
        default=None,
    )
    seed: Optional[int] = Field(
        description="Integer seed to attempt to sample deterministically",
        default=None,
    )

    @field_validator("logit_bias")
    def validate_logit_bias(cls, logit_bias):
        if logit_bias is None:
            return None

        if isinstance(logit_bias, str):
            try:
                logit_bias = json.loads(logit_bias)
            except json.JSONDecodeError:
                raise ValueError("Invalid JSON in logit_bias string")

        validated_logit_bias = {}
        for key, value in logit_bias.items():
            try:
                int_key = int(key)
                int_value = int(value)
                if -100 <= int_value <= 100:
                    validated_logit_bias[int_key] = int_value
                else:
                    raise ValueError("Value must be between -100 and 100")
            except ValueError:
                raise ValueError("Invalid key-value pair in logit_bias dictionary")

        return validated_logit_bias


class ReasoningEffortEnum(str, Enum):
    none = "none"
    minimal = "minimal"
    low = "low"
    medium = "medium"
    high = "high"
    xhigh = "xhigh"


class OptionsForReasoning(SharedOptions):
    json_object: Optional[bool] = Field(
        description="Output a valid JSON object {...}. Prompt must mention JSON.",
        default=None,
    )
    reasoning_effort: Optional[ReasoningEffortEnum] = Field(
        description=(
            "Constraints effort on reasoning for reasoning models. Currently supported "
            "values are low, medium, and high. Reducing reasoning effort can result in "
            "faster responses and fewer tokens used on reasoning in a response."
        ),
        default=None,
    )


def _attachment(attachment):
    url = attachment.url
    base64_content = ""
    if not url or attachment.resolve_type().startswith("audio/"):
        base64_content = attachment.base64_content()
        url = f"data:{attachment.resolve_type()};base64,{base64_content}"
    if attachment.resolve_type() == "application/pdf":
        if not base64_content:
            base64_content = attachment.base64_content()
        return {
            "type": "file",
            "file": {
                "filename": f"{attachment.id()}.pdf",
                "file_data": f"data:application/pdf;base64,{base64_content}",
            },
        }
    if attachment.resolve_type().startswith("image/"):
        return {"type": "image_url", "image_url": {"url": url}}
    else:
        format_ = "wav" if attachment.resolve_type() == "audio/wav" else "mp3"
        return {
            "type": "input_audio",
            "input_audio": {
                "data": base64_content,
                "format": format_,
            },
        }


class _Shared:
    def __init__(
        self,
        model_id,
        key=None,
        model_name=None,
        api_base=None,
        api_type=None,
        api_version=None,
        api_engine=None,
        headers=None,
        can_stream=True,
        vision=False,
        audio=False,
        reasoning=False,
        supports_schema=False,
        supports_tools=False,
        allows_system_prompt=True,
    ):
        self.model_id = model_id
        self.key = key
        self.supports_schema = supports_schema
        self.supports_tools = supports_tools
        self.model_name = model_name
        self.api_base = api_base
        self.api_type = api_type
        self.api_version = api_version
        self.api_engine = api_engine
        self.headers = headers
        self.can_stream = can_stream
        self.vision = vision
        self.allows_system_prompt = allows_system_prompt

        self.attachment_types = set()

        if reasoning:
            self.Options = OptionsForReasoning

        if vision:
            self.attachment_types.update(
                {
                    "image/png",
                    "image/jpeg",
                    "image/webp",
                    "image/gif",
                    "application/pdf",
                }
            )

        if audio:
            self.attachment_types.update(
                {
                    "audio/wav",
                    "audio/mpeg",
                }
            )

    def __str__(self) -> str:
        return "OpenAI Chat: {}".format(self.model_id)

    def build_messages(self, prompt, conversation):
        messages = []
        current_system = None
        if conversation is not None:
            for prev_response in conversation.responses:
                if (
                    prev_response.prompt.system
                    and prev_response.prompt.system != current_system
                ):
                    messages.append(
                        {"role": "system", "content": prev_response.prompt.system}
                    )
                    current_system = prev_response.prompt.system
                if prev_response.attachments:
                    attachment_message = []
                    if prev_response.prompt.prompt:
                        attachment_message.append(
                            {"type": "text", "text": prev_response.prompt.prompt}
                        )
                    for attachment in prev_response.attachments:
                        attachment_message.append(_attachment(attachment))
                    messages.append({"role": "user", "content": attachment_message})
                elif prev_response.prompt.prompt:
                    messages.append(
                        {"role": "user", "content": prev_response.prompt.prompt}
                    )
                for tool_result in prev_response.prompt.tool_results:
                    messages.append(
                        {
                            "role": "tool",
                            "tool_call_id": tool_result.tool_call_id,
                            "content": tool_result.output,
                        }
                    )
                prev_text = prev_response.text_or_raise()
                if prev_text:
                    messages.append({"role": "assistant", "content": prev_text})
                tool_calls = prev_response.tool_calls_or_raise()
                if tool_calls:
                    messages.append(
                        {
                            "role": "assistant",
                            "tool_calls": [
                                {
                                    "type": "function",
                                    "id": tool_call.tool_call_id,
                                    "function": {
                                        "name": tool_call.name,
                                        "arguments": json.dumps(tool_call.arguments),
                                    },
                                }
                                for tool_call in tool_calls
                            ],
                        }
                    )
        if prompt.system and prompt.system != current_system:
            messages.append({"role": "system", "content": prompt.system})
        for tool_result in prompt.tool_results:
            messages.append(
                {
                    "role": "tool",
                    "tool_call_id": tool_result.tool_call_id,
                    "content": tool_result.output,
                }
            )
        if not prompt.attachments:
            if prompt.prompt:
                messages.append({"role": "user", "content": prompt.prompt or ""})
        else:
            attachment_message = []
            if prompt.prompt:
                attachment_message.append({"type": "text", "text": prompt.prompt})
            for attachment in prompt.attachments:
                attachment_message.append(_attachment(attachment))
            messages.append({"role": "user", "content": attachment_message})
        return messages

    def set_usage(self, response, usage):
        if not usage:
            return
        input_tokens = usage.pop("prompt_tokens")
        output_tokens = usage.pop("completion_tokens")
        usage.pop("total_tokens")
        response.set_usage(
            input=input_tokens, output=output_tokens, details=simplify_usage_dict(usage)
        )

    def get_client(self, key, *, async_=False):
        kwargs = {}
        if self.api_base:
            kwargs["base_url"] = self.api_base
        if self.api_type:
            kwargs["api_type"] = self.api_type
        if self.api_version:
            kwargs["api_version"] = self.api_version
        if self.api_engine:
            kwargs["engine"] = self.api_engine
        if self.needs_key:
            kwargs["api_key"] = self.get_key(key)
        else:
            # OpenAI-compatible models don't need a key, but the
            # openai client library requires one
            kwargs["api_key"] = "DUMMY_KEY"
        if self.headers:
            kwargs["default_headers"] = self.headers
        if os.environ.get("LLM_OPENAI_SHOW_RESPONSES"):
            kwargs["http_client"] = logging_client()
        if async_:
            return openai.AsyncOpenAI(**kwargs)
        else:
            return openai.OpenAI(**kwargs)

    def build_kwargs(self, prompt, stream):
        kwargs = dict(not_nulls(prompt.options))
        json_object = kwargs.pop("json_object", None)
        if "max_tokens" not in kwargs and self.default_max_tokens is not None:
            kwargs["max_tokens"] = self.default_max_tokens
        if json_object:
            kwargs["response_format"] = {"type": "json_object"}
        if prompt.schema:
            kwargs["response_format"] = {
                "type": "json_schema",
                "json_schema": {"name": "output", "schema": prompt.schema},
            }
        if prompt.tools:
            kwargs["tools"] = [
                {
                    "type": "function",
                    "function": {
                        "name": tool.name,
                        "description": tool.description or None,
                        "parameters": tool.input_schema,
                    },
                }
                for tool in prompt.tools
            ]
        if stream:
            kwargs["stream_options"] = {"include_usage": True}
        return kwargs


class Chat(_Shared, KeyModel):
    needs_key = "openai"
    key_env_var = "OPENAI_API_KEY"
    default_max_tokens = None

    class Options(SharedOptions):
        json_object: Optional[bool] = Field(
            description="Output a valid JSON object {...}. Prompt must mention JSON.",
            default=None,
        )

    def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: Response,
        conversation: Optional[Conversation] = None,
        key: Optional[str] = None,
    ) -> Iterator[str]:
        if prompt.system and not self.allows_system_prompt:
            raise NotImplementedError("Model does not support system prompts")
        messages = self.build_messages(prompt, conversation)
        kwargs = self.build_kwargs(prompt, stream)
        client = self.get_client(key)
        usage = None
        if stream:
            completion = client.chat.completions.create(
                model=self.model_name or self.model_id,
                messages=messages,
                stream=True,
                **kwargs,
            )
            chunks = []
            tool_calls = {}
            for chunk in completion:
                chunks.append(chunk)
                if chunk.usage:
                    usage = chunk.usage.model_dump()
                if chunk.choices and chunk.choices[0].delta:
                    for tool_call in chunk.choices[0].delta.tool_calls or []:
                        if tool_call.function.arguments is None:
                            tool_call.function.arguments = ""
                        index = tool_call.index
                        if index not in tool_calls:
                            tool_calls[index] = tool_call
                        else:
                            tool_calls[
                                index
                            ].function.arguments += tool_call.function.arguments
                try:
                    content = chunk.choices[0].delta.content
                except IndexError:
                    content = None
                if content is not None:
                    yield content
            response.response_json = remove_dict_none_values(combine_chunks(chunks))
            if tool_calls:
                for value in tool_calls.values():
                    # value.function looks like this:
                    # ChoiceDeltaToolCallFunction(arguments='{"city":"San Francisco"}', name='get_weather')
                    response.add_tool_call(
                        llm.ToolCall(
                            tool_call_id=value.id,
                            name=value.function.name,
                            arguments=json.loads(value.function.arguments),
                        )
                    )
        else:
            completion = client.chat.completions.create(
                model=self.model_name or self.model_id,
                messages=messages,
                stream=False,
                **kwargs,
            )
            usage = completion.usage.model_dump()
            response.response_json = remove_dict_none_values(completion.model_dump())
            for tool_call in completion.choices[0].message.tool_calls or []:
                response.add_tool_call(
                    llm.ToolCall(
                        tool_call_id=tool_call.id,
                        name=tool_call.function.name,
                        arguments=json.loads(tool_call.function.arguments),
                    )
                )
            if completion.choices[0].message.content is not None:
                yield completion.choices[0].message.content
        self.set_usage(response, usage)
        response._prompt_json = redact_data({"messages": messages})


class AsyncChat(_Shared, AsyncKeyModel):
    needs_key = "openai"
    key_env_var = "OPENAI_API_KEY"
    default_max_tokens = None

    class Options(SharedOptions):
        json_object: Optional[bool] = Field(
            description="Output a valid JSON object {...}. Prompt must mention JSON.",
            default=None,
        )

    async def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: AsyncResponse,
        conversation: Optional[AsyncConversation] = None,
        key: Optional[str] = None,
    ) -> AsyncGenerator[str, None]:
        if prompt.system and not self.allows_system_prompt:
            raise NotImplementedError("Model does not support system prompts")
        messages = self.build_messages(prompt, conversation)
        kwargs = self.build_kwargs(prompt, stream)
        client = self.get_client(key, async_=True)
        usage = None
        if stream:
            completion = await client.chat.completions.create(
                model=self.model_name or self.model_id,
                messages=messages,
                stream=True,
                **kwargs,
            )
            chunks = []
            tool_calls = {}
            async for chunk in completion:
                if chunk.usage:
                    usage = chunk.usage.model_dump()
                chunks.append(chunk)
                if chunk.usage:
                    usage = chunk.usage.model_dump()
                if chunk.choices and chunk.choices[0].delta:
                    for tool_call in chunk.choices[0].delta.tool_calls or []:
                        if tool_call.function.arguments is None:
                            tool_call.function.arguments = ""
                        index = tool_call.index
                        if index not in tool_calls:
                            tool_calls[index] = tool_call
                        else:
                            tool_calls[
                                index
                            ].function.arguments += tool_call.function.arguments
                try:
                    content = chunk.choices[0].delta.content
                except IndexError:
                    content = None
                if content is not None:
                    yield content
            if tool_calls:
                for value in tool_calls.values():
                    # value.function looks like this:
                    # ChoiceDeltaToolCallFunction(arguments='{"city":"San Francisco"}', name='get_weather')
                    response.add_tool_call(
                        llm.ToolCall(
                            tool_call_id=value.id,
                            name=value.function.name,
                            arguments=json.loads(value.function.arguments),
                        )
                    )
            response.response_json = remove_dict_none_values(combine_chunks(chunks))
        else:
            completion = await client.chat.completions.create(
                model=self.model_name or self.model_id,
                messages=messages,
                stream=False,
                **kwargs,
            )
            response.response_json = remove_dict_none_values(completion.model_dump())
            usage = completion.usage.model_dump()
            for tool_call in completion.choices[0].message.tool_calls or []:
                response.add_tool_call(
                    llm.ToolCall(
                        tool_call_id=tool_call.id,
                        name=tool_call.function.name,
                        arguments=json.loads(tool_call.function.arguments),
                    )
                )
            if completion.choices[0].message.content is not None:
                yield completion.choices[0].message.content
        self.set_usage(response, usage)
        response._prompt_json = redact_data({"messages": messages})


class Completion(Chat):
    class Options(SharedOptions):
        logprobs: Optional[int] = Field(
            description="Include the log probabilities of most likely N per token",
            default=None,
            le=5,
        )

    def __init__(self, *args, default_max_tokens=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.default_max_tokens = default_max_tokens

    def __str__(self) -> str:
        return "OpenAI Completion: {}".format(self.model_id)

    def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: Response,
        conversation: Optional[Conversation] = None,
        key: Optional[str] = None,
    ) -> Iterator[str]:
        if prompt.system:
            raise NotImplementedError(
                "System prompts are not supported for OpenAI completion models"
            )
        messages = []
        if conversation is not None:
            for prev_response in conversation.responses:
                messages.append(prev_response.prompt.prompt)
                messages.append(cast(Response, prev_response).text())
        messages.append(prompt.prompt)
        kwargs = self.build_kwargs(prompt, stream)
        client = self.get_client(key)
        if stream:
            completion = client.completions.create(
                model=self.model_name or self.model_id,
                prompt="\n".join(messages),
                stream=True,
                **kwargs,
            )
            chunks = []
            for chunk in completion:
                chunks.append(chunk)
                try:
                    content = chunk.choices[0].text
                except IndexError:
                    content = None
                if content is not None:
                    yield content
            combined = combine_chunks(chunks)
            cleaned = remove_dict_none_values(combined)
            response.response_json = cleaned
        else:
            completion = client.completions.create(
                model=self.model_name or self.model_id,
                prompt="\n".join(messages),
                stream=False,
                **kwargs,
            )
            response.response_json = remove_dict_none_values(completion.model_dump())
            yield completion.choices[0].text
        response._prompt_json = redact_data({"messages": messages})


def not_nulls(data) -> dict:
    return {key: value for key, value in data if value is not None}


def combine_chunks(chunks: List) -> dict:
    content = ""
    role = None
    finish_reason = None
    # If any of them have log probability, we're going to persist
    # those later on
    logprobs = []
    usage = {}

    for item in chunks:
        if item.usage:
            usage = item.usage.model_dump()
        for choice in item.choices:
            if choice.logprobs and hasattr(choice.logprobs, "top_logprobs"):
                logprobs.append(
                    {
                        "text": choice.text if hasattr(choice, "text") else None,
                        "top_logprobs": choice.logprobs.top_logprobs,
                    }
                )

            if not hasattr(choice, "delta"):
                content += choice.text
                continue
            role = choice.delta.role
            if choice.delta.content is not None:
                content += choice.delta.content
            if choice.finish_reason is not None:
                finish_reason = choice.finish_reason

    # Imitations of the OpenAI API may be missing some of these fields
    combined = {
        "content": content,
        "role": role,
        "finish_reason": finish_reason,
        "usage": usage,
    }
    if logprobs:
        combined["logprobs"] = logprobs
    if chunks:
        for key in ("id", "object", "model", "created", "index"):
            value = getattr(chunks[0], key, None)
            if value is not None:
                combined[key] = value

    return combined


def redact_data(input_dict):
    """
    Recursively search through the input dictionary for any 'image_url' keys
    and modify the 'url' value to be just 'data:...'.

    Also redact input_audio.data keys
    """
    if isinstance(input_dict, dict):
        for key, value in input_dict.items():
            if (
                key == "image_url"
                and isinstance(value, dict)
                and "url" in value
                and value["url"].startswith("data:")
            ):
                value["url"] = "data:..."
            elif key == "input_audio" and isinstance(value, dict) and "data" in value:
                value["data"] = "..."
            else:
                redact_data(value)
    elif isinstance(input_dict, list):
        for item in input_dict:
            redact_data(item)
    return input_dict


================================================
FILE: llm/embeddings.py
================================================
from .models import EmbeddingModel
from .embeddings_migrations import embeddings_migrations
from dataclasses import dataclass
import hashlib
from itertools import islice
import json
from sqlite_utils import Database
from sqlite_utils.db import Table
import time
from typing import cast, Any, Dict, Iterable, List, Optional, Tuple, Union


@dataclass
class Entry:
    id: str
    score: Optional[float]
    content: Optional[str] = None
    metadata: Optional[Dict[str, Any]] = None


class Collection:
    class DoesNotExist(Exception):
        pass

    def __init__(
        self,
        name: str,
        db: Optional[Database] = None,
        *,
        model: Optional[EmbeddingModel] = None,
        model_id: Optional[str] = None,
        create: bool = True,
    ) -> None:
        """
        A collection of embeddings

        Returns the collection with the given name, creating it if it does not exist.

        If you set create=False a Collection.DoesNotExist exception will be raised if the
        collection does not already exist.

        Args:
            db (sqlite_utils.Database): Database to store the collection in
            name (str): Name of the collection
            model (llm.models.EmbeddingModel, optional): Embedding model to use
            model_id (str, optional): Alternatively, ID of the embedding model to use
            create (bool, optional): Whether to create the collection if it does not exist
        """
        import llm

        self.db = db or Database(memory=True)
        self.name = name
        self._model = model

        embeddings_migrations.apply(self.db)

        rows = list(self.db["collections"].rows_where("name = ?", [self.name]))
        if rows:
            row = rows[0]
            self.id = row["id"]
            self.model_id = row["model"]
        else:
            if create:
                # Collection does not exist, so model or model_id is required
                if not model and not model_id:
                    raise ValueError(
                        "Either model= or model_id= must be provided when creating a new collection"
                    )
                # Create it
                if model_id:
                    # Resolve alias
                    model = llm.get_embedding_model(model_id)
                    self._model = model
                model_id = cast(EmbeddingModel, model).model_id
                self.id = (
                    cast(Table, self.db["collections"])
                    .insert(
                        {
                            "name": self.name,
                            "model": model_id,
                        }
                    )
                    .last_pk
                )
            else:
                raise self.DoesNotExist(f"Collection '{name}' does not exist")

    def model(self) -> EmbeddingModel:
        "Return the embedding model used by this collection"
        import llm

        if self._model is None:
            self._model = llm.get_embedding_model(self.model_id)

        return cast(EmbeddingModel, self._model)

    def count(self) -> int:
        """
        Count the number of items in the collection.

        Returns:
            int: Number of items in the collection
        """
        return next(
            self.db.query(
                """
            select count(*) as c from embeddings where collection_id = (
                select id from collections where name = ?
            )
            """,
                (self.name,),
            )
        )["c"]

    def embed(
        self,
        id: str,
        value: Union[str, bytes],
        metadata: Optional[Dict[str, Any]] = None,
        store: bool = False,
    ) -> None:
        """
        Embed value and store it in the collection with a given ID.

        Args:
            id (str): ID for the value
            value (str or bytes): value to be embedded
            metadata (dict, optional): Metadata to be stored
            store (bool, optional): Whether to store the value in the content or content_blob column
        """
        from llm import encode

        content_hash = self.content_hash(value)
        if self.db["embeddings"].count_where(
            "content_hash = ? and collection_id = ?", [content_hash, self.id]
        ):
            return
        embedding = self.model().embed(value)
        cast(Table, self.db["embeddings"]).insert(
            {
                "collection_id": self.id,
                "id": id,
                "embedding": encode(embedding),
                "content": value if (store and isinstance(value, str)) else None,
                "content_blob": value if (store and isinstance(value, bytes)) else None,
                "content_hash": content_hash,
                "metadata": json.dumps(metadata) if metadata else None,
                "updated": int(time.time()),
            },
            replace=True,
        )

    def embed_multi(
        self,
        entries: Iterable[Tuple[str, Union[str, bytes]]],
        store: bool = False,
        batch_size: int = 100,
    ) -> None:
        """
        Embed multiple texts and store them in the collection with given IDs.

        Args:
            entries (iterable): Iterable of (id: str, text: str) tuples
            store (bool, optional): Whether to store the text in the content column
            batch_size (int, optional): custom maximum batch size to use
        """
        self.embed_multi_with_metadata(
            ((id, value, None) for id, value in entries),
            store=store,
            batch_size=batch_size,
        )

    def embed_multi_with_metadata(
        self,
        entries: Iterable[Tuple[str, Union[str, bytes], Optional[Dict[str, Any]]]],
        store: bool = False,
        batch_size: int = 100,
    ) -> None:
        """
        Embed multiple values along with metadata and store them in the collection with given IDs.

        Args:
            entries (iterable): Iterable of (id: str, value: str or bytes, metadata: None or dict)
            store (bool, optional): Whether to store the value in the content or content_blob column
            batch_size (int, optional): custom maximum batch size to use
        """
        import llm

        batch_size = min(batch_size, (self.model().batch_size or batch_size))
        iterator = iter(entries)
        collection_id = self.id
        while True:
            batch = list(islice(iterator, batch_size))
            if not batch:
                break
            # Calculate hashes first
            items_and_hashes = [(item, self.content_hash(item[1])) for item in batch]
            # Any of those hashes already exist?
            existing_ids = [
                row["id"]
                for row in self.db.query(
                    """
                    select id from embeddings
                    where collection_id = ? and content_hash in ({})
                    """.format(",".join("?" for _ in items_and_hashes)),
                    [collection_id]
                    + [item_and_hash[1] for item_and_hash in items_and_hashes],
                )
            ]
            filtered_batch = [item for item in batch if item[0] not in existing_ids]
            embeddings = list(
                self.model().embed_multi(item[1] for item in filtered_batch)
            )
            with self.db.conn:
                cast(Table, self.db["embeddings"]).insert_all(
                    (
                        {
                            "collection_id": collection_id,
                            "id": id,
                            "embedding": llm.encode(embedding),
                            "content": (
                                value if (store and isinstance(value, str)) else None
                            ),
                            "content_blob": (
                                value if (store and isinstance(value, bytes)) else None
                            ),
                            "content_hash": self.content_hash(value),
                            "metadata": json.dumps(metadata) if metadata else None,
                            "updated": int(time.time()),
                        }
                        for (embedding, (id, value, metadata)) in zip(
                            embeddings, filtered_batch
                        )
                    ),
                    replace=True,
                )

    def similar_by_vector(
        self,
        vector: List[float],
        number: int = 10,
        skip_id: Optional[str] = None,
        prefix: Optional[str] = None,
    ) -> List[Entry]:
        """
        Find similar items in the collection by a given vector.

        Args:
            vector (list): Vector to search by
            number (int, optional): Number of similar items to return
            skip_id (str, optional): An ID to exclude from the results
            prefix: (str, optional): Filter results to IDs witih this prefix

        Returns:
            list: List of Entry objects
        """
        import llm

        def distance_score(other_encoded):
            other_vector = llm.decode(other_encoded)
            return llm.cosine_similarity(other_vector, vector)

        self.db.register_function(distance_score, replace=True)

        where_bits = ["collection_id = ?"]
        where_args = [str(self.id)]

        if prefix:
            where_bits.append("id LIKE ? || '%'")
            where_args.append(prefix)

        if skip_id:
            where_bits.append("id != ?")
            where_args.append(skip_id)

        return [
            Entry(
                id=row["id"],
                score=row["score"],
                content=row["content"],
                metadata=json.loads(row["metadata"]) if row["metadata"] else None,
            )
            for row in self.db.query(
                """
            select id, content, metadata, distance_score(embedding) as score
            from embeddings
            where {where}
            order by score desc limit {number}
        """.format(
                    where=" and ".join(where_bits),
                    number=number,
                ),
                where_args,
            )
        ]

    def similar_by_id(
        self, id: str, number: int = 10, prefix: Optional[str] = None
    ) -> List[Entry]:
        """
        Find similar items in the collection by a given ID.

        Args:
            id (str): ID to search by
            number (int, optional): Number of similar items to return
            prefix: (str, optional): Filter results to IDs with this prefix

        Returns:
            list: List of Entry objects
        """
        import llm

        matches = list(
            self.db["embeddings"].rows_where(
                "collection_id = ? and id = ?", (self.id, id)
            )
        )
        if not matches:
            raise self.DoesNotExist("ID not found")
        embedding = matches[0]["embedding"]
        comparison_vector = llm.decode(embedding)
        return self.similar_by_vector(
            comparison_vector, number, skip_id=id, prefix=prefix
        )

    def similar(
        self, value: Union[str, bytes], number: int = 10, prefix: Optional[str] = None
    ) -> List[Entry]:
        """
        Find similar items in the collection by a given value.

        Args:
            value (str or bytes): value to search by
            number (int, optional): Number of similar items to return
            prefix: (str, optional): Filter results to IDs with this prefix

        Returns:
            list: List of Entry objects
        """
        comparison_vector = self.model().embed(value)
        return self.similar_by_vector(comparison_vector, number, prefix=prefix)

    @classmethod
    def exists(cls, db: Database, name: str) -> bool:
        """
        Does this collection exist in the database?

        Args:
            name (str): Name of the collection
        """
        rows = list(db["collections"].rows_where("name = ?", [name]))
        return bool(rows)

    def delete(self):
        """
        Delete the collection and its embeddings from the database
        """
        with self.db.conn:
            self.db.execute("delete from embeddings where collection_id = ?", [self.id])
            self.db.execute("delete from collections where id = ?", [self.id])

    @staticmethod
    def content_hash(input: Union[str, bytes]) -> bytes:
        "Hash content for deduplication. Override to change hashing behavior."
        if isinstance(input, str):
            input = input.encode("utf8")
        return hashlib.md5(input).digest()


================================================
FILE: llm/embeddings_migrations.py
================================================
from sqlite_migrate import Migrations
import hashlib
import time

embeddings_migrations = Migrations("llm.embeddings")


@embeddings_migrations()
def m001_create_tables(db):
    db["collections"].create({"id": int, "name": str, "model": str}, pk="id")
    db["collections"].create_index(["name"], unique=True)
    db["embeddings"].create(
        {
            "collection_id": int,
            "id": str,
            "embedding": bytes,
            "content": str,
            "metadata": str,
        },
        pk=("collection_id", "id"),
    )


@embeddings_migrations()
def m002_foreign_key(db):
    db["embeddings"].add_foreign_key("collection_id", "collections", "id")


@embeddings_migrations()
def m003_add_updated(db):
    db["embeddings"].add_column("updated", int)
    # Pretty-print the schema
    db["embeddings"].transform()
    # Assume anything existing was last updated right now
    db.query(
        "update embeddings set updated = ? where updated is null", [int(time.time())]
    )


@embeddings_migrations()
def m004_store_content_hash(db):
    db["embeddings"].add_column("content_hash", bytes)
    db["embeddings"].transform(
        column_order=(
            "collection_id",
            "id",
            "embedding",
            "content",
            "content_hash",
            "metadata",
            "updated",
        )
    )

    # Register functions manually so we can de-register later
    def md5(text):
        return hashlib.md5(text.encode("utf8")).digest()

    def random_md5():
        return hashlib.md5(str(time.time()).encode("utf8")).digest()

    db.conn.create_function("temp_md5", 1, md5)
    db.conn.create_function("temp_random_md5", 0, random_md5)

    with db.conn:
        db.execute("""
            update embeddings
            set content_hash = temp_md5(content)
            where content is not null
        """)
        db.execute("""
            update embeddings
            set content_hash = temp_random_md5()
            where content is null
        """)

    db["embeddings"].create_index(["content_hash"])

    # De-register functions
    db.conn.create_function("temp_md5", 1, None)
    db.conn.create_function("temp_random_md5", 0, None)


@embeddings_migrations()
def m005_add_content_blob(db):
    db["embeddings"].add_column("content_blob", bytes)
    db["embeddings"].transform(
        column_order=("collection_id", "id", "embedding", "content", "content_blob")
    )


================================================
FILE: llm/errors.py
================================================
class ModelError(Exception):
    "Models can raise this error, which will be displayed to the user"


class NeedsKeyException(ModelError):
    "Model needs an API key which has not been provided"


================================================
FILE: llm/hookspecs.py
================================================
from pluggy import HookimplMarker
from pluggy import HookspecMarker

hookspec = HookspecMarker("llm")
hookimpl = HookimplMarker("llm")


@hookspec
def register_commands(cli):
    """Register additional CLI commands, e.g. 'llm mycommand ...'"""


@hookspec
def register_models(register):
    "Register additional model instances representing LLM models that can be called"


@hookspec
def register_embedding_models(register):
    "Register additional model instances that can be used for embedding"


@hookspec
def register_template_loaders(register):
    "Register additional template loaders with prefixes"


@hookspec
def register_fragment_loaders(register):
    "Register additional fragment loaders with prefixes"


@hookspec
def register_tools(register):
    "Register functions that can be used as tools by the LLMs"


================================================
FILE: llm/migrations.py
================================================
import datetime
from typing import Callable, List

MIGRATIONS: List[Callable] = []
migration = MIGRATIONS.append


def migrate(db):
    ensure_migrations_table(db)
    already_applied = {r["name"] for r in db["_llm_migrations"].rows}
    for fn in MIGRATIONS:
        name = fn.__name__
        if name not in already_applied:
            fn(db)
            db["_llm_migrations"].insert(
                {
                    "name": name,
                    "applied_at": str(datetime.datetime.now(datetime.timezone.utc)),
                }
            )
            already_applied.add(name)


def ensure_migrations_table(db):
    if not db["_llm_migrations"].exists():
        db["_llm_migrations"].create(
            {
                "name": str,
                "applied_at": str,
            },
            pk="name",
        )


@migration
def m001_initial(db):
    # Ensure the original table design exists, so other migrations can run
    if db["log"].exists():
        # It needs to have the chat_id column
        if "chat_id" not in db["log"].columns_dict:
            db["log"].add_column("chat_id")
        return
    db["log"].create(
        {
            "provider": str,
            "system": str,
            "prompt": str,
            "chat_id": str,
            "response": str,
            "model": str,
            "timestamp": str,
        }
    )


@migration
def m002_id_primary_key(db):
    db["log"].transform(pk="id")


@migration
def m003_chat_id_foreign_key(db):
    db["log"].transform(types={"chat_id": int})
    db["log"].add_foreign_key("chat_id", "log", "id")


@migration
def m004_column_order(db):
    db["log"].transform(
        column_order=(
            "id",
            "model",
            "timestamp",
            "prompt",
            "system",
            "response",
            "chat_id",
        )
    )


@migration
def m004_drop_provider(db):
    db["log"].transform(drop=("provider",))


@migration
def m005_debug(db):
    db["log"].add_column("debug", str)
    db["log"].add_column("duration_ms", int)


@migration
def m006_new_logs_table(db):
    columns = db["log"].columns_dict
    for column, type in (
        ("options_json", str),
        ("prompt_json", str),
        ("response_json", str),
        ("reply_to_id", int),
    ):
        # It's possible people running development code like myself
        # might have accidentally created these columns already
        if column not in columns:
            db["log"].add_column(column, type)

    # Use .transform() to rename options and timestamp_utc, and set new order
    db["log"].transform(
        column_order=(
            "id",
            "model",
            "prompt",
            "system",
            "prompt_json",
            "options_json",
            "response",
            "response_json",
            "reply_to_id",
            "chat_id",
            "duration_ms",
            "timestamp_utc",
        ),
        rename={
            "timestamp": "timestamp_utc",
            "options": "options_json",
        },
    )


@migration
def m007_finish_logs_table(db):
    db["log"].transform(
        drop={"debug"},
        rename={"timestamp_utc": "datetime_utc"},
        drop_foreign_keys=("chat_id",),
    )
    with db.conn:
        db.execute("alter table log rename to logs")


@migration
def m008_reply_to_id_foreign_key(db):
    db["logs"].add_foreign_key("reply_to_id", "logs", "id")


@migration
def m008_fix_column_order_in_logs(db):
    # reply_to_id ended up at the end after foreign key added
    db["logs"].transform(
        column_order=(
            "id",
            "model",
            "prompt",
            "system",
            "prompt_json",
            "options_json",
            "response",
            "response_json",
            "reply_to_id",
            "chat_id",
            "duration_ms",
            "timestamp_utc",
        ),
    )


@migration
def m009_delete_logs_table_if_empty(db):
    # We moved to a new table design, but we don't delete the table
    # if someone has put data in it
    if not db["logs"].count:
        db["logs"].drop()


@migration
def m010_create_new_log_tables(db):
    db["conversations"].create(
        {
            "id": str,
            "name": str,
            "model": str,
        },
        pk="id",
    )
    db["responses"].create(
        {
            "id": str,
            "model": str,
            "prompt": str,
            "system": str,
            "prompt_json": str,
            "options_json": str,
            "response": str,
            "response_json": str,
            "conversation_id": str,
            "duration_ms": int,
            "datetime_utc": str,
        },
        pk="id",
        foreign_keys=(("conversation_id", "conversations", "id"),),
    )


@migration
def m011_fts_for_responses(db):
    db["responses"].enable_fts(["prompt", "response"], create_triggers=True)


@migration
def m012_attachments_tables(db):
    db["attachments"].create(
        {
            "id": str,
            "type": str,
            "path": str,
            "url": str,
            "content": bytes,
        },
        pk="id",
    )
    db["prompt_attachments"].create(
        {
            "response_id": str,
            "attachment_id": str,
            "order": int,
        },
        foreign_keys=(
            ("response_id", "responses", "id"),
            ("attachment_id", "attachments", "id"),
        ),
        pk=("response_id", "attachment_id"),
    )


@migration
def m013_usage(db):
    db["responses"].add_column("input_tokens", int)
    db["responses"].add_column("output_tokens", int)
    db["responses"].add_column("token_details", str)


@migration
def m014_schemas(db):
    db["schemas"].create(
        {
            "id": str,
            "content": str,
        },
        pk="id",
    )
    db["responses"].add_column("schema_id", str, fk="schemas", fk_col="id")
    # Clean up SQL create table indentation
    db["responses"].transform()
    # These changes may have dropped the FTS configuration, fix that
    db["responses"].enable_fts(
        ["prompt", "response"], create_triggers=True, replace=True
    )


@migration
def m015_fragments_tables(db):
    db["fragments"].create(
        {
            "id": int,
            "hash": str,
            "content": str,
            "datetime_utc": str,
            "source": str,
        },
        pk="id",
    )
    db["fragments"].create_index(["hash"], unique=True)
    db["fragment_aliases"].create(
        {
            "alias": str,
            "fragment_id": int,
        },
        foreign_keys=(("fragment_id", "fragments", "id"),),
        pk="alias",
    )
    db["prompt_fragments"].create(
        {
            "response_id": str,
            "fragment_id": int,
            "order": int,
        },
        foreign_keys=(
            ("response_id", "responses", "id"),
            ("fragment_id", "fragments", "id"),
        ),
        pk=("response_id", "fragment_id"),
    )
    db["system_fragments"].create(
        {
            "response_id": str,
            "fragment_id": int,
            "order": int,
        },
        foreign_keys=(
            ("response_id", "responses", "id"),
            ("fragment_id", "fragments", "id"),
        ),
        pk=("response_id", "fragment_id"),
    )


@migration
def m016_fragments_table_pks(db):
    # The same fragment can be attached to a response multiple times
    # https://github.com/simonw/llm/issues/863#issuecomment-2781720064
    db["prompt_fragments"].transform(pk=("response_id", "fragment_id", "order"))
    db["system_fragments"].transform(pk=("response_id", "fragment_id", "order"))


@migration
def m017_tools_tables(db):
    db["tools"].create(
        {
            "id": int,
            "hash": str,
            "name": str,
            "description": str,
            "input_schema": str,
        },
        pk="id",
    )
    db["tools"].create_index(["hash"], unique=True)
    # Many-to-many relationship between tools and responses
    db["tool_responses"].create(
        {
            "tool_id": int,
            "response_id": str,
        },
        foreign_keys=(
            ("tool_id", "tools", "id"),
            ("response_id", "responses", "id"),
        ),
        pk=("tool_id", "response_id"),
    )
    # tool_calls and tool_results are one-to-many against responses
    db["tool_calls"].create(
        {
            "id": int,
            "response_id": str,
            "tool_id": int,
            "name": str,
            "arguments": str,
            "tool_call_id": str,
        },
        pk="id",
        foreign_keys=(
            ("response_id", "responses", "id"),
            ("tool_id", "tools", "id"),
        ),
    )
    db["tool_results"].create(
        {
            "id": int,
            "response_id": str,
            "tool_id": int,
            "name": str,
            "output": str,
            "tool_call_id": str,
        },
        pk="id",
        foreign_keys=(
            ("response_id", "responses", "id"),
            ("tool_id", "tools", "id"),
        ),
    )


@migration
def m017_tools_plugin(db):
    db["tools"].add_column("plugin")


@migration
def m018_tool_instances(db):
    # Used to track instances of Toolbox classes that may be
    # used multiple times by different tools
    db["tool_instances"].create(
        {
            "id": int,
            "plugin": str,
            "name": str,
            "arguments": str,
        },
        pk="id",
    )
    # We record which instance was used only on the results
    db["tool_results"].add_column("instance_id", fk="tool_instances")


@migration
def m019_resolved_model(db):
    # For models like gemini-1.5-flash-latest where we wish to record
    # the resolved model name in addition to the alias
    db["responses"].add_column("resolved_model", str)


@migration
def m020_tool_results_attachments(db):
    db["tool_results_attachments"].create(
        {
            "tool_result_id": int,
            "attachment_id": str,
            "order": int,
        },
        foreign_keys=(
            ("tool_result_id", "tool_results", "id"),
            ("attachment_id", "attachments", "id"),
        ),
        pk=("tool_result_id", "attachment_id"),
    )


@migration
def m021_tool_results_exception(db):
    db["tool_results"].add_column("exception", str)


================================================
FILE: llm/models.py
================================================
import asyncio
import base64
from condense_json import condense_json
from dataclasses import dataclass, field
import datetime
from .errors import NeedsKeyException
import hashlib
import httpx
from itertools import islice
from pathlib import Path
import re
import time
from types import MethodType
from typing import (
    Any,
    AsyncGenerator,
    AsyncIterator,
    Awaitable,
    Callable,
    Dict,
    Iterable,
    Iterator,
    List,
    Optional,
    Set,
    Union,
    get_type_hints,
)
from .utils import (
    ensure_fragment,
    ensure_tool,
    make_schema_id,
    mimetype_from_path,
    mimetype_from_string,
    token_usage_string,
    monotonic_ulid,
    Fragment,
)
from abc import ABC, abstractmethod
import inspect
import json
from pydantic import BaseModel, ConfigDict, create_model

CONVERSATION_NAME_LENGTH = 32


@dataclass
class Usage:
    input: Optional[int] = None
    output: Optional[int] = None
    details: Optional[Dict[str, Any]] = None


@dataclass
class Attachment:
    type: Optional[str] = None
    path: Optional[str] = None
    url: Optional[str] = None
    content: Optional[bytes] = None
    _id: Optional[str] = None

    def id(self):
        # Hash of the binary content, or of '{"url": "https://..."}' for URL attachments
        if self._id is None:
            if self.content:
                self._id = hashlib.sha256(self.content).hexdigest()
            elif self.path:
                self._id = hashlib.sha256(Path(self.path).read_bytes()).hexdigest()
            else:
                self._id = hashlib.sha256(
                    json.dumps({"url": self.url}).encode("utf-8")
                ).hexdigest()
        return self._id

    def resolve_type(self):
        if self.type:
            return self.type
        # Derive it from path or url or content
        if self.path:
            return mimetype_from_path(self.path)
        if self.url:
            response = httpx.head(self.url)
            response.raise_for_status()
            return response.headers.get("content-type")
        if self.content:
            return mimetype_from_string(self.content)
        raise ValueError("Attachment has no type and no content to derive it from")

    def content_bytes(self):
        content = self.content
        if not content:
            if self.path:
                content = Path(self.path).read_bytes()
            elif self.url:
                response = httpx.get(self.url)
                response.raise_for_status()
                content = response.content
        return content

    def base64_content(self):
        return base64.b64encode(self.content_bytes()).decode("utf-8")

    def __repr__(self):
        info = [f"<Attachment: {self.id()}"]
        if self.type:
            info.append(f'type="{self.type}"')
        if self.path:
            info.append(f'path="{self.path}"')
        if self.url:
            info.append(f'url="{self.url}"')
        if self.content:
            info.append(f"content={len(self.content)} bytes")
        return " ".join(info) + ">"

    @classmethod
    def from_row(cls, row):
        return cls(
            _id=row["id"],
            type=row["type"],
            path=row["path"],
            url=row["url"],
            content=row["content"],
        )


@dataclass
class Tool:
    name: str
    description: Optional[str] = None
    input_schema: Dict = field(default_factory=dict)
    implementation: Optional[Callable] = None
    plugin: Optional[str] = None  # plugin tool came from, e.g. 'llm_tools_sqlite'

    def __post_init__(self):
        # Convert Pydantic model to JSON schema if needed
        self.input_schema = _ensure_dict_schema(self.input_schema)

    def hash(self):
        """Hash for tool based on its name, description and input schema (preserving key order)"""
        to_hash = {
            "name": self.name,
            "description": self.description,
            "input_schema": self.input_schema,
        }
        if self.plugin:
            to_hash["plugin"] = self.plugin
        return hashlib.sha256(json.dumps(to_hash).encode("utf-8")).hexdigest()

    @classmethod
    def function(cls, function, name=None, description=None):
        """
        Turn a Python function into a Tool object by:
         - Extracting the function name
         - Using the function docstring for the Tool description
         - Building a Pydantic model for inputs by inspecting the function signature
         - Building a Pydantic model for the return value by using the function's return annotation
        """
        if not name and function.__name__ == "<lambda>":
            raise ValueError(
                "Cannot create a Tool from a lambda function without providing name="
            )

        return cls(
            name=name or function.__name__,
            description=description or function.__doc__ or None,
            input_schema=_get_arguments_input_schema(function, name),
            implementation=function,
        )


def _get_arguments_input_schema(function, name):
    signature = inspect.signature(function)
    type_hints = get_type_hints(function)
    fields = {}
    for param_name, param in signature.parameters.items():
        if param_name == "self":
            continue
        # Determine the type annotation (default to string if missing)
        annotated_type = type_hints.get(param_name, str)

        # Handle default value if present; if there's no default, use '...'
        if param.default is inspect.Parameter.empty:
            fields[param_name] = (annotated_type, ...)
        else:
            fields[param_name] = (annotated_type, param.default)

    return create_model(f"{name}InputSchema", **fields)


class Toolbox:
    name: Optional[str] = None
    instance_id: Optional[int] = None
    _blocked = (
        "tools",
        "add_tool",
        "method_tools",
        "__init_subclass__",
        "prepare",
        "prepare_async",
    )
    _extra_tools: List[Tool] = []
    _config: Dict[str, Any] = {}
    _prepared: bool = False
    _async_prepared: bool = False

    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)

        original_init = cls.__init__

        def wrapped_init(self, *args, **kwargs):
            # Track args/kwargs passed to constructor in self._config
            # so we can serialize them to a database entry later on
            sig = inspect.signature(original_init)
            bound = sig.bind(self, *args, **kwargs)
            bound.apply_defaults()

            self._config = {
                name: value
                for name, value in bound.arguments.items()
                if name != "self"
                and sig.parameters[name].kind
                not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
            }
            self._extra_tools = []

            original_init(self, *args, **kwargs)

        cls.__init__ = wrapped_init

    @classmethod
    def method_tools(cls) -> List[Tool]:
        tools = []
        for method_name in dir(cls):
            if method_name.startswith("_") or method_name in cls._blocked:
                continue
            method = getattr(cls, method_name)
            if callable(method):
                tool = Tool.function(
                    method,
                    name="{}_{}".format(cls.__name__, method_name),
                )
                tools.append(tool)
        return tools

    def tools(self) -> Iterable[Tool]:
        "Returns an llm.Tool() for each class method, plus any extras registered with add_tool()"
        # method_tools() returns unbound methods, we need bound methods here:
        for name in dir(self):
            if name.startswith("_") or name in self._blocked:
                continue
            attr = getattr(self, name)
            if callable(attr):
                tool = Tool.function(attr, name=f"{self.__class__.__name__}_{name}")
                tool.plugin = getattr(self, "plugin", None)
                yield tool
        yield from self._extra_tools

    def add_tool(
        self, tool_or_function: Union[Tool, Callable[..., Any]], pass_self: bool = False
    ):
        "Add a tool to this toolbox"

        def _upgrade(fn):
            if pass_self:
                return MethodType(fn, self)
            return fn

        if isinstance(tool_or_function, Tool):
            self._extra_tools.append(tool_or_function)
        elif callable(tool_or_function):
            self._extra_tools.append(Tool.function(_upgrade(tool_or_function)))
        else:
            raise ValueError("Tool must be an instance of Tool or a callable function")

    def prepare(self):
        """
        Over-ride this to perform setup (and .add_tool() calls) before the toolbox is used.
        Implement a similar prepare_async() method for async setup.
        """
        pass

    async def prepare_async(self):
        """
        Over-ride this to perform async setup (and .add_tool() calls) before the toolbox is used.
        """
        pass


@dataclass
class ToolCall:
    name: str
    arguments: dict
    tool_call_id: Optional[str] = None


@dataclass
class ToolResult:
    name: str
    output: str
    attachments: List[Attachment] = field(default_factory=list)
    tool_call_id: Optional[str] = None
    instance: Optional[Toolbox] = None
    exception: Optional[Exception] = None


@dataclass
class ToolOutput:
    "Tool functions can return output with extra attachments"

    output: Optional[Union[str, dict, list, bool, int, float]] = None
    attachments: List[Attachment] = field(default_factory=list)


ToolDef = Union[Tool, Toolbox, Callable[..., Any]]
BeforeCallSync = Callable[[Optional[Tool], ToolCall], None]
AfterCallSync = Callable[[Tool, ToolCall, ToolResult], None]
BeforeCallAsync = Callable[[Optional[Tool], ToolCall], Union[None, Awaitable[None]]]
AfterCallAsync = Callable[[Tool, ToolCall, ToolResult], Union[None, Awaitable[None]]]


class CancelToolCall(Exception):
    pass


@dataclass
class Prompt:
    _prompt: Optional[str]
    model: "Model"
    fragments: Optional[List[Union[str, Fragment]]]
    attachments: Optional[List[Attachment]]
    _system: Optional[str]
    system_fragments: Optional[List[Union[str, Fragment]]]
    prompt_json: Optional[str]
    schema: Optional[Union[Dict, type[BaseModel]]]
    tools: List[Tool]
    tool_results: List[ToolResult]
    options: "Options"

    def __init__(
        self,
        prompt,
        model,
        *,
        fragments=None,
        attachments=None,
        system=None,
        system_fragments=None,
        prompt_json=None,
        options=None,
        schema=None,
        tools=None,
        tool_results=None,
    ):
        self._prompt = prompt
        self.model = model
        self.attachments = list(attachments or [])
        self.fragments = fragments or []
        self._system = system
        self.system_fragments = system_fragments or []
        self.prompt_json = prompt_json
        if schema and not isinstance(schema, dict) and issubclass(schema, BaseModel):
            schema = schema.model_json_schema()
        self.schema = schema
        self.tools = _wrap_tools(tools or [])
        self.tool_results = tool_results or []
        self.options = options or {}

    @property
    def prompt(self):
        return "\n".join(self.fragments + ([self._prompt] if self._prompt else []))

    @property
    def system(self):
        bits = [
            bit.strip()
            for bit in (self.system_fragments + [self._system or ""])
            if bit.strip()
        ]
        return "\n\n".join(bits)


def _wrap_tools(tools: List[ToolDef]) -> List[Tool]:
    wrapped_tools = []
    for tool in tools:
        if isinstance(tool, Tool):
            wrapped_tools.append(tool)
        elif isinstance(tool, Toolbox):
            wrapped_tools.extend(tool.tools())
        elif callable(tool):
            wrapped_tools.append(Tool.function(tool))
        else:
            raise ValueError(f"Invalid tool: {tool}")
    return wrapped_tools


@dataclass
class _BaseConversation:
    model: "_BaseModel"
    id: str = field(default_factory=lambda: str(monotonic_ulid()).lower())
    name: Optional[str] = None
    responses: List["_BaseResponse"] = field(default_factory=list)
    tools: Optional[List[ToolDef]] = None
    chain_limit: Optional[int] = None

    @classmethod
    @abstractmethod
    def from_row(cls, row: Any) -> "_BaseConversation":
        raise NotImplementedError


@dataclass
class Conversation(_BaseConversation):
    before_call: Optional[BeforeCallSync] = None
    after_call: Optional[AfterCallSync] = None

    def prompt(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[Union[str, Fragment]]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        system_fragments: Optional[List[Union[str, Fragment]]] = None,
        stream: bool = True,
        key: Optional[str] = None,
        **options,
    ) -> "Response":
        return Response(
            Prompt(
                prompt,
                model=self.model,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools or self.tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                options=self.model.Options(**options),
            ),
            self.model,
            stream,
            conversation=self,
            key=key,
        )

    def chain(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[str]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        system_fragments: Optional[List[str]] = None,
        stream: bool = True,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        chain_limit: Optional[int] = None,
        before_call: Optional[BeforeCallSync] = None,
        after_call: Optional[AfterCallSync] = None,
        key: Optional[str] = None,
        options: Optional[dict] = None,
    ) -> "ChainResponse":
        self.model._validate_attachments(attachments)
        return ChainResponse(
            Prompt(
                prompt,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools or self.tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                model=self.model,
                options=self.model.Options(**(options or {})),
            ),
            model=self.model,
            stream=stream,
            conversation=self,
            key=key,
            before_call=before_call or self.before_call,
            after_call=after_call or self.after_call,
            chain_limit=chain_limit if chain_limit is not None else self.chain_limit,
        )

    @classmethod
    def from_row(cls, row):
        from llm import get_model

        return cls(
            model=get_model(row["model"]),
            id=row["id"],
            name=row["name"],
        )

    def __repr__(self):
        count = len(self.responses)
        s = "s" if count == 1 else ""
        return f"<{self.__class__.__name__}: {self.id} - {count} response{s}"


@dataclass
class AsyncConversation(_BaseConversation):
    before_call: Optional[BeforeCallAsync] = None
    after_call: Optional[AfterCallAsync] = None

    def chain(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[str]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        system_fragments: Optional[List[str]] = None,
        stream: bool = True,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        chain_limit: Optional[int] = None,
        before_call: Optional[BeforeCallAsync] = None,
        after_call: Optional[AfterCallAsync] = None,
        key: Optional[str] = None,
        options: Optional[dict] = None,
    ) -> "AsyncChainResponse":
        self.model._validate_attachments(attachments)
        return AsyncChainResponse(
            Prompt(
                prompt,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools or self.tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                model=self.model,
                options=self.model.Options(**(options or {})),
            ),
            model=self.model,
            stream=stream,
            conversation=self,
            key=key,
            before_call=before_call or self.before_call,
            after_call=after_call or self.after_call,
            chain_limit=chain_limit if chain_limit is not None else self.chain_limit,
        )

    def prompt(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[str]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        system_fragments: Optional[List[str]] = None,
        stream: bool = True,
        key: Optional[str] = None,
        **options,
    ) -> "AsyncResponse":
        return AsyncResponse(
            Prompt(
                prompt,
                model=self.model,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                options=self.model.Options(**options),
            ),
            self.model,
            stream,
            conversation=self,
            key=key,
        )

    def to_sync_conversation(self):
        return Conversation(
            model=self.model,
            id=self.id,
            name=self.name,
            responses=[],  # Because we only use this in logging
            tools=self.tools,
            chain_limit=self.chain_limit,
        )

    @classmethod
    def from_row(cls, row):
        from llm import get_async_model

        return cls(
            model=get_async_model(row["model"]),
            id=row["id"],
            name=row["name"],
        )

    def __repr__(self):
        count = len(self.responses)
        s = "s" if count == 1 else ""
        return f"<{self.__class__.__name__}: {self.id} - {count} response{s}"


FRAGMENT_SQL = """
select
    'prompt' as fragment_type,
    fragments.content,
    pf."order" as ord
from prompt_fragments pf
join fragments on pf.fragment_id = fragments.id
where pf.response_id = :response_id
union all
select
    'system' as fragment_type,
    fragments.content,
    sf."order" as ord
from system_fragments sf
join fragments on sf.fragment_id = fragments.id
where sf.response_id = :response_id
order by fragment_type desc, ord asc;
"""


class _BaseResponse:
    """Base response class shared between sync and async responses"""

    id: str
    prompt: "Prompt"
    stream: bool
    resolved_model: Optional[str] = None
    conversation: Optional["_BaseConversation"] = None
    _key: Optional[str] = None
    _tool_calls: List[ToolCall] = []

    def __init__(
        self,
        prompt: Prompt,
        model: "_BaseModel",
        stream: bool,
        conversation: Optional[_BaseConversation] = None,
        key: Optional[str] = None,
    ):
        self.id = str(monotonic_ulid()).lower()
        self.prompt = prompt
        self._prompt_json = None
        self.model = model
        self.stream = stream
        self._key = key
        self._chunks: List[str] = []
        self._done = False
        self._tool_calls: List[ToolCall] = []
        self.response_json: Optional[Dict[str, Any]] = None
        self.conversation = conversation
        self.attachments: List[Attachment] = []
        self._start: Optional[float] = None
        self._end: Optional[float] = None
        self._start_utcnow: Optional[datetime.datetime] = None
        self.input_tokens: Optional[int] = None
        self.output_tokens: Optional[int] = None
        self.token_details: Optional[dict] = None
        self.done_callbacks: List[Callable] = []

        if self.prompt.schema and not self.model.supports_schema:
            raise ValueError(f"{self.model} does not support schemas")

        if self.prompt.tools and not self.model.supports_tools:
            raise ValueError(f"{self.model} does not support tools")

    def add_tool_call(self, tool_call: ToolCall):
        self._tool_calls.append(tool_call)

    def set_usage(
        self,
        *,
        input: Optional[int] = None,
        output: Optional[int] = None,
        details: Optional[dict] = None,
    ):
        self.input_tokens = input
        self.output_tokens = output
        self.token_details = details

    def set_resolved_model(self, model_id: str):
        self.resolved_model = model_id

    @classmethod
    def from_row(cls, db, row, _async=False):
        from llm import get_model, get_async_model

        if _async:
            model = get_async_model(row["model"])
        else:
            model = get_model(row["model"])

        # Schema
        schema = None
        if row["schema_id"]:
            schema = json.loads(db["schemas"].get(row["schema_id"])["content"])

        # Tool definitions and results for prompt
        tools = [
            Tool(
                name=tool_row["name"],
                description=tool_row["description"],
                input_schema=json.loads(tool_row["input_schema"]),
                # In this case we don't have a reference to the actual Python code
                # but that's OK, we should not need it for prompts deserialized from DB
                implementation=None,
                plugin=tool_row["plugin"],
            )
            for tool_row in db.query(
                """
                select tools.* from tools
                join tool_responses on tools.id = tool_responses.tool_id
                where tool_responses.response_id = ?
            """,
                [row["id"]],
            )
        ]
        tool_results = [
            ToolResult(
                name=tool_results_row["name"],
                output=tool_results_row["output"],
                tool_call_id=tool_results_row["tool_call_id"],
            )
            for tool_results_row in db.query(
                """
                select * from tool_results
                where response_id = ?
            """,
                [row["id"]],
            )
        ]

        all_fragments = list(db.query(FRAGMENT_SQL, {"response_id": row["id"]}))
        fragments = [
            row["content"] for row in all_fragments if row["fragment_type"] == "prompt"
        ]
        system_fragments = [
            row["content"] for row in all_fragments if row["fragment_type"] == "system"
        ]
        response = cls(
            model=model,
            prompt=Prompt(
                prompt=row["prompt"],
                model=model,
                fragments=fragments,
                attachments=[],
                system=row["system"],
                schema=schema,
                tools=tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                options=model.Options(**json.loads(row["options_json"])),
            ),
            stream=False,
        )
        prompt_json = json.loads(row["prompt_json"] or "null")
        response.id = row["id"]
        response._prompt_json = prompt_json
        response.response_json = json.loads(row["response_json"] or "null")
        response._done = True
        response._chunks = [row["response"]]
        # Attachments
        response.attachments = [
            Attachment.from_row(attachment_row)
            for attachment_row in db.query(
                """
                select attachments.* from attachments
                join prompt_attachments on attachments.id = prompt_attachments.attachment_id
                where prompt_attachments.response_id = ?
                order by prompt_attachments."order"
            """,
                [row["id"]],
            )
        ]
        # Tool calls
        response._tool_calls = [
            ToolCall(
                name=tool_row["name"],
                arguments=json.loads(tool_row["arguments"]),
                tool_call_id=tool_row["tool_call_id"],
            )
            for tool_row in db.query(
                """
                select * from tool_calls
                where response_id = ?
                order by tool_call_id
            """,
                [row["id"]],
            )
        ]

        return response

    def token_usage(self) -> str:
        return token_usage_string(
            self.input_tokens, self.output_tokens, self.token_details
        )

    def log_to_db(self, db):
        conversation = self.conversation
        if not conversation:
            conversation = Conversation(model=self.model)
        db["conversations"].insert(
            {
                "id": conversation.id,
                "name": _conversation_name(
                    self.prompt.prompt or self.prompt.system or ""
                ),
                "model": conversation.model.model_id,
            },
            ignore=True,
        )
        schema_id = None
        if self.prompt.schema:
            schema_id, schema_json = make_schema_id(self.prompt.schema)
            db["schemas"].insert({"id": schema_id, "content": schema_json}, ignore=True)

        response_id = self.id
        replacements = {}
        # Include replacements from previous responses
        for previous_response in conversation.responses[:-1]:
            for fragment in (previous_response.prompt.fragments or []) + (
                previous_response.prompt.system_fragments or []
            ):
                fragment_id = ensure_fragment(db, fragment)
                replacements[f"f:{fragment_id}"] = fragment
                replacements[f"r:{previous_response.id}"] = (
                    previous_response.text_or_raise()
                )

        for i, fragment in enumerate(self.prompt.fragments):
            fragment_id = ensure_fragment(db, fragment)
            replacements[f"f{fragment_id}"] = fragment
            db["prompt_fragments"].insert(
                {
                    "response_id": response_id,
                    "fragment_id": fragment_id,
                    "order": i,
                },
            )
        for i, fragment in enumerate(self.prompt.system_fragments):
            fragment_id = ensure_fragment(db, fragment)
            replacements[f"f{fragment_id}"] = fragment
            db["system_fragments"].insert(
                {
                    "response_id": response_id,
                    "fragment_id": fragment_id,
                    "order": i,
                },
            )

        response_text = self.text_or_raise()
        replacements[f"r:{response_id}"] = response_text
        json_data = self.json()

        response = {
            "id": response_id,
            "model": self.model.model_id,
            "prompt": self.prompt._prompt,
            "system": self.prompt._system,
            "prompt_json": condense_json(self._prompt_json, replacements),
            "options_json": {
                key: value
                for key, value in dict(self.prompt.options).items()
                if value is not None
            },
            "response": response_text,
            "response_json": condense_json(json_data, replacements),
            "conversation_id": conversation.id,
            "duration_ms": self.duration_ms(),
            "datetime_utc": self.datetime_utc(),
            "input_tokens": self.input_tokens,
            "output_tokens": self.output_tokens,
            "token_details": (
                json.dumps(self.token_details) if self.token_details else None
            ),
            "schema_id": schema_id,
            "resolved_model": self.resolved_model,
        }
        db["responses"].insert(response)

        # Persist any attachments - loop through with index
        for index, attachment in enumerate(self.prompt.attachments):
            attachment_id = attachment.id()
            db["attachments"].insert(
                {
                    "id": attachment_id,
                    "type": attachment.resolve_type(),
                    "path": attachment.path,
                    "url": attachment.url,
                    "content": attachment.content,
                },
                replace=True,
            )
            db["prompt_attachments"].insert(
                {
                    "response_id": response_id,
                    "attachment_id": attachment_id,
                    "order": index,
                },
            )

        # Persist any tools, tool calls and tool results
        tool_ids_by_name = {}
        for tool in self.prompt.tools:
            tool_id = ensure_tool(db, tool)
            tool_ids_by_name[tool.name] = tool_id
            db["tool_responses"].insert(
                {
                    "tool_id": tool_id,
                    "response_id": response_id,
                }
            )
        for tool_call in self.tool_calls():  # TODO Should  be _or_raise()
            db["tool_calls"].insert(
                {
                    "response_id": response_id,
                    "tool_id": tool_ids_by_name.get(tool_call.name) or None,
                    "name": tool_call.name,
                    "arguments": json.dumps(tool_call.arguments),
                    "tool_call_id": tool_call.tool_call_id,
                }
            )
        for tool_result in self.prompt.tool_results:
            instance_id = None
            if tool_result.instance:
                try:
                    if not tool_result.instance.instance_id:
                        tool_result.instance.instance_id = (
                            db["tool_instances"]
                            .insert(
                                {
                                    "plugin": tool.plugin,
                                    "name": tool.name.split("_")[0],
                                    "arguments": json.dumps(
                                        tool_result.instance._config
                                    ),
                                }
                            )
                            .last_pk
                        )
                    instance_id = tool_result.instance.instance_id
                except AttributeError:
                    pass
            tool_result_id = (
                db["tool_results"]
                .insert(
                    {
                        "response_id": response_id,
                        "tool_id": tool_ids_by_name.get(tool_result.name) or None,
                        "name": tool_result.name,
                        "output": tool_result.output,
                        "tool_call_id": tool_result.tool_call_id,
                        "instance_id": instance_id,
                        "exception": (
                            (
                                "{}: {}".format(
                                    tool_result.exception.__class__.__name__,
                                    str(tool_result.exception),
                                )
                            )
                            if tool_result.exception
                            else None
                        ),
                    }
                )
                .last_pk
            )
            # Persist attachments for tool results
            for index, attachment in enumerate(tool_result.attachments):
                attachment_id = attachment.id()
                db["attachments"].insert(
                    {
                        "id": attachment_id,
                        "type": attachment.resolve_type(),
                        "path": attachment.path,
                        "url": attachment.url,
                        "content": attachment.content,
                    },
                    replace=True,
                )
                db["tool_results_attachments"].insert(
                    {
                        "tool_result_id": tool_result_id,
                        "attachment_id": attachment_id,
                        "order": index,
                    },
                )


class Response(_BaseResponse):
    model: "Model"
    conversation: Optional["Conversation"] = None

    def on_done(self, callback):
        if not self._done:
            self.done_callbacks.append(callback)
        else:
            callback(self)

    def _on_done(self):
        for callback in self.done_callbacks:
            callback(self)

    def __str__(self) -> str:
        return self.text()

    def _force(self):
        if not self._done:
            list(self)

    def text(self) -> str:
        self._force()
        return "".join(self._chunks)

    def text_or_raise(self) -> str:
        return self.text()

    def execute_tool_calls(
        self,
        *,
        before_call: Optional[BeforeCallSync] = None,
        after_call: Optional[AfterCallSync] = None,
    ) -> List[ToolResult]:
        tool_results = []
        tools_by_name = {tool.name: tool for tool in self.prompt.tools}

        # Run prepare() on all Toolbox instances that need it
        instances_to_prepare: list[Toolbox] = []
        for tool_to_prep in tools_by_name.values():
            inst = _get_instance(tool_to_prep.implementation)
            if isinstance(inst, Toolbox) and not getattr(inst, "_prepared", False):
                instances_to_prepare.append(inst)

        for inst in instances_to_prepare:
            inst.prepare()
            inst._prepared = True

        for tool_call in self.tool_calls():
            tool: Optional[Tool] = tools_by_name.get(tool_call.name)
            # Tool could be None if the tool was not found in the prompt tools,
            # but we still call the before_call method:
            if before_call:
                try:
                    cb_result = before_call(tool, tool_call)
                    if inspect.isawaitable(cb_result):
                        raise TypeError(
                            "Asynchronous 'before_call' callback provided to a synchronous tool execution context. "
                            "Please use an async chain/response or a synchronous callback."
                        )
                except CancelToolCall as ex:
                    tool_results.append(
                        ToolResult(
                            name=tool_call.name,
                            output="Cancelled: " + str(ex),
                            tool_call_id=tool_call.tool_call_id,
                            exception=ex,
                        )
                    )
                    continue

            if tool is None:
                msg = 'tool "{}" does not exist'.format(tool_call.name)
                tool_results.append(
                    ToolResult(
                        name=tool_call.name,
                        output="Error: " + msg,
                        tool_call_id=tool_call.tool_call_id,
                        exception=KeyError(msg),
                    )
                )
                continue

            if not tool.implementation:
                raise ValueError(
                    "No implementation available for tool: {}".format(tool_call.name)
                )

            attachments = []
            exception = None

            try:
                if inspect.iscoroutinefunction(tool.implementation):
                    result = asyncio.run(tool.implementation(**tool_call.arguments))
                else:
                    result = tool.implementation(**tool_call.arguments)

                if isinstance(result, ToolOutput):
                    attachments = result.attachments
                    result = result.output

                if not isinstance(result, str):
                    result = json.dumps(result, default=repr)
            except Exception as ex:
                result = f"Error: {ex}"
                exception = ex

            tool_result_obj = ToolResult(
                name=tool_call.name,
                output=result,
                attachments=attachments,
                tool_call_id=tool_call.tool_call_id,
                instance=_get_instance(tool.implementation),
                exception=exception,
            )

            if after_call:
                cb_result = after_call(tool, tool_call, tool_result_obj)
                if inspect.isawaitable(cb_result):
                    raise TypeError(
                        "Asynchronous 'after_call' callback provided to a synchronous tool execution context. "
                        "Please use an async chain/response or a synchronous callback."
                    )
            tool_results.append(tool_result_obj)
        return tool_results

    def tool_calls(self) -> List[ToolCall]:
        self._force()
        return self._tool_calls

    def tool_calls_or_raise(self) -> List[ToolCall]:
        return self.tool_calls()

    def json(self) -> Optional[Dict[str, Any]]:
        self._force()
        return self.response_json

    def duration_ms(self) -> int:
        self._force()
        return int(((self._end or 0) - (self._start or 0)) * 1000)

    def datetime_utc(self) -> str:
        self._force()
        return self._start_utcnow.isoformat() if self._start_utcnow else ""

    def usage(self) -> Usage:
        self._force()
        return Usage(
            input=self.input_tokens,
            output=self.output_tokens,
            details=self.token_details,
        )

    def __iter__(self) -> Iterator[str]:
        self._start = time.monotonic()
        self._start_utcnow = datetime.datetime.now(datetime.timezone.utc)
        if self._done:
            yield from self._chunks
            return

        if isinstance(self.model, Model):
            for chunk in self.model.execute(
                self.prompt,
                stream=self.stream,
                response=self,
                conversation=self.conversation,
            ):
                assert chunk is not None
                yield chunk
                self._chunks.append(chunk)
        elif isinstance(self.model, KeyModel):
            for chunk in self.model.execute(
                self.prompt,
                stream=self.stream,
                response=self,
                conversation=self.conversation,
                key=self.model.get_key(self._key),
            ):
                assert chunk is not None
                yield chunk
                self._chunks.append(chunk)
        else:
            raise Exception("self.model must be a Model or KeyModel")

        if self.conversation:
            self.conversation.responses.append(self)
        self._end = time.monotonic()
        self._done = True
        self._on_done()

    def __repr__(self):
        text = "... not yet done ..."
        if self._done:
            text = "".join(self._chunks)
        return "<Response prompt='{}' text='{}'>".format(self.prompt.prompt, text)


class AsyncResponse(_BaseResponse):
    model: "AsyncModel"
    conversation: Optional["AsyncConversation"] = None

    @classmethod
    def from_row(cls, db, row, _async=False):
        return super().from_row(db, row, _async=True)

    async def on_done(self, callback):
        if not self._done:
            self.done_callbacks.append(callback)
        else:
            if callable(callback):
                # Ensure we handle both sync and async callbacks correctly
                processed_callback = callback(self)
                if inspect.isawaitable(processed_callback):
                    await processed_callback
            elif inspect.isawaitable(callback):
                await callback

    async def _on_done(self):
        for callback_func in self.done_callbacks:
            if callable(callback_func):
                processed_callback = callback_func(self)
                if inspect.isawaitable(processed_callback):
                    await processed_callback
            elif inspect.isawaitable(callback_func):
                await callback_func

    async def execute_tool_calls(
        self,
        *,
        before_call: Optional[BeforeCallAsync] = None,
        after_call: Optional[AfterCallAsync] = None,
    ) -> List[ToolResult]:
        tool_calls_list = await self.tool_calls()
        tools_by_name = {tool.name: tool for tool in self.prompt.tools}

        # Run async prepare_async() on all Toolbox instances that need it
        instances_to_prepare: list[Toolbox] = []
        for tool_to_prep in tools_by_name.values():
            inst = _get_instance(tool_to_prep.implementation)
            if isinstance(inst, Toolbox) and not getattr(
                inst, "_async_prepared", False
            ):
                instances_to_prepare.append(inst)

        for inst in instances_to_prepare:
            await inst.prepare_async()
            inst._async_prepared = True

        indexed_results: List[tuple[int, ToolResult]] = []
        async_tasks: List[asyncio.Task] = []

        for idx, tc in enumerate(tool_calls_list):
            tool: Optional[Tool] = tools_by_name.get(tc.name)
            exception: Optional[Exception] = None

            if tool is None:
                output = f'Error: tool "{tc.name}" does not exist'
                exception = KeyError(tc.name)
            elif not tool.implementation:
                output = f'Error: tool "{tc.name}" has no implementation'
                exception = KeyError(tc.name)
            elif inspect.iscoroutinefunction(tool.implementation):

                async def run_async(tc=tc, tool=tool, idx=idx):
                    # before_call inside the task
                    if before_call:
                        try:
                            cb = before_call(tool, tc)
                            if inspect.isawaitable(cb):
                                await cb
                        except CancelToolCall as ex:
                            return idx, ToolResult(
                                name=tc.name,
                                output="Cancelled: " + str(ex),
                                tool_call_id=tc.tool_call_id,
                                exception=ex,
                            )

                    exception = None
                    attachments = []

                    try:
                        result = await tool.implementation(**tc.arguments)
                        if isinstance(result, ToolOutput):
                            attachments.extend(result.attachments)
                            result = result.output
                        output = (
                            result
                            if isinstance(result, str)
                            else json.dumps(result, default=repr)
                        )
                    except Exception as ex:
                        output = f"Error: {ex}"
                        exception = ex

                    tr = ToolResult(
                        name=tc.name,
                        output=output,
                        attachments=attachments,
                        tool_call_id=tc.tool_call_id,
                        instance=_get_instance(tool.implementation),
                        exception=exception,
                    )

                    # after_call inside the task
                    if tool is not None and after_call:
                        cb2 = after_call(tool, tc, tr)
                        if inspect.isawaitable(cb2):
                            await cb2

                    return idx, tr

                async_tasks.append(asyncio.create_task(run_async()))

            else:
                # Sync implementation: do hooks and call inline
                if before_call:
                    try:
                        cb = before_call(tool, tc)
                        if inspect.isawaitable(cb):
                            await cb
                    except CancelToolCall as ex:
                        indexed_results.append(
                            (
                                idx,
                                ToolResult(
                                    name=tc.name,
                                    output="Cancelled: " + str(ex),
                                    tool_call_id=tc.tool_call_id,
                                    exception=ex,
                                ),
                            )
                        )
                        continue

                exception = None
                attachments = []

                if tool is None:
                    output = f'Error: tool "{tc.name}" does not exist'
                    exception = KeyError(tc.name)
                else:
                    try:
                        res = tool.implementation(**tc.arguments)
                        if inspect.isawaitable(res):
                            res = await res
                        if isinstance(res, ToolOutput):
                            attachments.extend(res.attachments)
                            res = res.output
                        output = (
                            res
                            if isinstance(res, str)
                            else json.dumps(res, default=repr)
                        )
                    except Exception as ex:
                        output = f"Error: {ex}"
                        exception = ex

                    tr = ToolResult(
                        name=tc.name,
                        output=output,
                        attachments=attachments,
                        tool_call_id=tc.tool_call_id,
                        instance=_get_instance(tool.implementation),
                        exception=exception,
                    )

                    if tool is not None and after_call:
                        cb2 = after_call(tool, tc, tr)
                        if inspect.isawaitable(cb2):
                            await cb2

                    indexed_results.append((idx, tr))

        # Await all async tasks in parallel
        if async_tasks:
            indexed_results.extend(await asyncio.gather(*async_tasks))

        # Reorder by original index
        indexed_results.sort(key=lambda x: x[0])
        return [tr for _, tr in indexed_results]

    def __aiter__(self):
        self._start = time.monotonic()
        self._start_utcnow = datetime.datetime.now(datetime.timezone.utc)
        if self._done:
            self._iter_chunks = list(self._chunks)  # Make a copy for iteration
        return self

    async def __anext__(self) -> str:
        if self._done:
            if hasattr(self, "_iter_chunks") and self._iter_chunks:
                return self._iter_chunks.pop(0)
            raise StopAsyncIteration

        if not hasattr(self, "_generator"):
            if isinstance(self.model, AsyncModel):
                self._generator = self.model.execute(
                    self.prompt,
                    stream=self.stream,
                    response=self,
                    conversation=self.conversation,
                )
            elif isinstance(self.model, AsyncKeyModel):
                self._generator = self.model.execute(
                    self.prompt,
                    stream=self.stream,
                    response=self,
                    conversation=self.conversation,
                    key=self.model.get_key(self._key),
                )
            else:
                raise ValueError("self.model must be an AsyncModel or AsyncKeyModel")

        try:
            chunk = await self._generator.__anext__()
            assert chunk is not None
            self._chunks.append(chunk)
            return chunk
        except StopAsyncIteration:
            if self.conversation:
                self.conversation.responses.append(self)
            self._end = time.monotonic()
            self._done = True
            if hasattr(self, "_generator"):
                del self._generator
            await self._on_done()
            raise

    async def _force(self):
        if not self._done:
            temp_chunks = []
            async for chunk in self:
                temp_chunks.append(chunk)
            # This should populate self._chunks
        return self

    def text_or_raise(self) -> str:
        if not self._done:
            raise ValueError("Response not yet awaited")
        return "".join(self._chunks)

    async def text(self) -> str:
        await self._force()
        return "".join(self._chunks)

    async def tool_calls(self) -> List[ToolCall]:
        await self._force()
        return self._tool_calls

    def tool_calls_or_raise(self) -> List[ToolCall]:
        if not self._done:
            raise ValueError("Response not yet awaited")
        return self._tool_calls

    async def json(self) -> Optional[Dict[str, Any]]:
        await self._force()
        return self.response_json

    async def duration_ms(self) -> int:
        await self._force()
        return int(((self._end or 0) - (self._start or 0)) * 1000)

    async def datetime_utc(self) -> str:
        await self._force()
        return self._start_utcnow.isoformat() if self._start_utcnow else ""

    async def usage(self) -> Usage:
        await self._force()
        return Usage(
            input=self.input_tokens,
            output=self.output_tokens,
            details=self.token_details,
        )

    def __await__(self):
        return self._force().__await__()

    async def to_sync_response(self) -> Response:
        await self._force()
        # This conversion might be tricky if the model is AsyncModel,
        # as Response expects a sync Model. For simplicity, we'll assume
        # the primary use case is data transfer after completion.
        # The model type on the new Response might need careful handling
        # if it's intended for further execution.
        # For now, let's assume self.model can be cast or is compatible.
        sync_model = self.model
        if not isinstance(self.model, (Model, KeyModel)):
            # This is a placeholder. A proper conversion or shared base might be needed
            # if the sync_response needs to be fully functional with its model.
            # For now, we pass the async model, which might limit what sync_response can do.
            pass

        response = Response(
            self.prompt,
            sync_model,  # This might need adjustment based on how Model/AsyncModel relate
            self.stream,
            # conversation type needs to be compatible too.
            conversation=(
                self.conversation.to_sync_conversation() if self.conversation else None
            ),
        )
        response.id = self.id
        response._chunks = list(self._chunks)  # Copy chunks
        response._done = self._done
        response._end = self._end
        response._start = self._start
        response._start_utcnow = self._start_utcnow
        response.input_tokens = self.input_tokens
        response.output_tokens = self.output_tokens
        response.token_details = self.token_details
        response._prompt_json = self._prompt_json
        response.response_json = self.response_json
        response._tool_calls = list(self._tool_calls)
        response.attachments = list(self.attachments)
        response.resolved_model = self.resolved_model
        return response

    @classmethod
    def fake(
        cls,
        model: "AsyncModel",
        prompt: str,
        *attachments: List[Attachment],
        system: str,
        response: str,
    ):
        "Utility method to help with writing tests"
        response_obj = cls(
            model=model,
            prompt=Prompt(
                prompt,
                model=model,
                attachments=attachments,
                system=system,
            ),
            stream=False,
        )
        response_obj._done = True
        response_obj._chunks = [response]
        return response_obj

    def __repr__(self):
        text = "... not yet awaited ..."
        if self._done:
            text = "".join(self._chunks)
        return "<AsyncResponse prompt='{}' text='{}'>".format(self.prompt.prompt, text)


class _BaseChainResponse:
    prompt: "Prompt"
    stream: bool
    conversation: Optional["_BaseConversation"] = None
    _key: Optional[str] = None

    def __init__(
        self,
        prompt: Prompt,
        model: "_BaseModel",
        stream: bool,
        conversation: _BaseConversation,
        key: Optional[str] = None,
        chain_limit: Optional[int] = 10,
        before_call: Optional[Union[BeforeCallSync, BeforeCallAsync]] = None,
        after_call: Optional[Union[AfterCallSync, AfterCallAsync]] = None,
    ):
        self.prompt = prompt
        self.model = model
        self.stream = stream
        self._key = key
        self._responses: List[Any] = []
        self.conversation = conversation
        self.chain_limit = chain_limit
        self.before_call = before_call
        self.after_call = after_call

    def log_to_db(self, db):
        for response in self._responses:
            if isinstance(response, AsyncResponse):
                sync_response = asyncio.run(response.to_sync_response())
            elif isinstance(response, Response):
                sync_response = response
            else:
                assert False, "Should have been a Response or AsyncResponse"
            sync_response.log_to_db(db)


class ChainResponse(_BaseChainResponse):
    _responses: List["Response"]
    before_call: Optional[BeforeCallSync] = None
    after_call: Optional[AfterCallSync] = None

    def responses(self) -> Iterator[Response]:
        prompt = self.prompt
        count = 0
        current_response: Optional[Response] = Response(
            prompt,
            self.model,
            self.stream,
            key=self._key,
            conversation=self.conversation,
        )
        while current_response:
            count += 1
            yield current_response
            self._responses.append(current_response)
            if self.chain_limit and count >= self.chain_limit:
                raise ValueError(f"Chain limit of {self.chain_limit} exceeded.")

            # This could raise llm.CancelToolCall:
            tool_results = current_response.execute_tool_calls(
                before_call=self.before_call, after_call=self.after_call
            )
            attachments = []
            for tool_result in tool_results:
                attachments.extend(tool_result.attachments)
            if tool_results:
                current_response = Response(
                    Prompt(
                        "",  # Next prompt is empty, tools drive it
                        self.model,
                        tools=current_response.prompt.tools,
                        tool_results=tool_results,
                        options=self.prompt.options,
                        attachments=attachments,
                    ),
                    self.model,
                    stream=self.stream,
                    key=self._key,
                    conversation=self.conversation,
                )
            else:
                current_response = None
                break

    def __iter__(self) -> Iterator[str]:
        for response_item in self.responses():
            yield from response_item

    def text(self) -> str:
        return "".join(self)


class AsyncChainResponse(_BaseChainResponse):
    _responses: List["AsyncResponse"]
    before_call: Optional[BeforeCallAsync] = None
    after_call: Optional[AfterCallAsync] = None

    async def responses(self) -> AsyncIterator[AsyncResponse]:
        prompt = self.prompt
        count = 0
        current_response: Optional[AsyncResponse] = AsyncResponse(
            prompt,
            self.model,
            self.stream,
            key=self._key,
            conversation=self.conversation,
        )
        while current_response:
            count += 1
            yield current_response
            self._responses.append(current_response)

            if self.chain_limit and count >= self.chain_limit:
                raise ValueError(f"Chain limit of {self.chain_limit} exceeded.")

            # This could raise llm.CancelToolCall:
            tool_results = await current_response.execute_tool_calls(
                before_call=self.before_call, after_call=self.after_call
            )
            if tool_results:
                attachments = []
                for tool_result in tool_results:
                    attachments.extend(tool_result.attachments)
                prompt = Prompt(
                    "",
                    self.model,
                    tools=current_response.prompt.tools,
                    tool_results=tool_results,
                    options=self.prompt.options,
                    attachments=attachments,
                )
                current_response = AsyncResponse(
                    prompt,
                    self.model,
                    stream=self.stream,
                    key=self._key,
                    conversation=self.conversation,
                )
            else:
                current_response = None
                break

    async def __aiter__(self) -> AsyncIterator[str]:
        async for response_item in self.responses():
            async for chunk in response_item:
                yield chunk

    async def text(self) -> str:
        all_chunks = []
        async for chunk in self:
            all_chunks.append(chunk)
        return "".join(all_chunks)


class Options(BaseModel):
    model_config = ConfigDict(extra="forbid")


_Options = Options


class _get_key_mixin:
    needs_key: Optional[str] = None
    key: Optional[str] = None
    key_env_var: Optional[str] = None

    def get_key(self, explicit_key: Optional[str] = None) -> Optional[str]:
        from llm import get_key

        if self.needs_key is None:
            # This model doesn't use an API key
            return None

        if self.key is not None:
            # Someone already set model.key='...'
            return self.key

        # Attempt to load a key using llm.get_key()
        key_value = get_key(
            explicit_key=explicit_key,
            key_alias=self.needs_key,
            env_var=self.key_env_var,
        )
        if key_value:
            return key_value

        # Show a useful error message
        message = "No key found - add one using 'llm keys set {}'".format(
            self.needs_key
        )
        if self.key_env_var:
            message += " or set the {} environment variable".format(self.key_env_var)
        raise NeedsKeyException(message)


class _BaseModel(ABC, _get_key_mixin):
    model_id: str
    can_stream: bool = False
    attachment_types: Set = set()

    supports_schema = False
    supports_tools = False

    class Options(_Options):
        pass

    def _validate_attachments(
        self, attachments: Optional[List[Attachment]] = None
    ) -> None:
        if attachments and not self.attachment_types:
            raise ValueError("This model does not support attachments")
        for attachment in attachments or []:
            attachment_type = attachment.resolve_type()
            if attachment_type not in self.attachment_types:
                raise ValueError(
                    f"This model does not support attachments of type '{attachment_type}', "
                    f"only {', '.join(self.attachment_types)}"
                )

    def __str__(self) -> str:
        return "{}{}: {}".format(
            self.__class__.__name__,
            " (async)" if isinstance(self, (AsyncModel, AsyncKeyModel)) else "",
            self.model_id,
        )

    def __repr__(self) -> str:
        return f"<{str(self)}>"


class _Model(_BaseModel):
    def conversation(
        self,
        tools: Optional[List[ToolDef]] = None,
        before_call: Optional[BeforeCallSync] = None,
        after_call: Optional[AfterCallSync] = None,
        chain_limit: Optional[int] = None,
    ) -> Conversation:
        return Conversation(
            model=self,
            tools=tools,
            before_call=before_call,
            after_call=after_call,
            chain_limit=chain_limit,
        )

    def prompt(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[Union[str, Fragment]]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        system_fragments: Optional[List[Union[str, Fragment]]] = None,
        stream: bool = True,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        **options,
    ) -> Response:
        key_value = options.pop("key", None)
        self._validate_attachments(attachments)
        return Response(
            Prompt(
                prompt,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                model=self,
                options=self.Options(**options),
            ),
            self,
            stream,
            key=key_value,
        )

    def chain(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[str]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        system_fragments: Optional[List[str]] = None,
        stream: bool = True,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        before_call: Optional[BeforeCallSync] = None,
        after_call: Optional[AfterCallSync] = None,
        key: Optional[str] = None,
        options: Optional[dict] = None,
    ) -> ChainResponse:
        return self.conversation().chain(
            prompt=prompt,
            fragments=fragments,
            attachments=attachments,
            system=system,
            system_fragments=system_fragments,
            stream=stream,
            schema=schema,
            tools=tools,
            tool_results=tool_results,
            before_call=before_call,
            after_call=after_call,
            key=key,
            options=options,
        )


class Model(_Model):
    @abstractmethod
    def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: Response,
        conversation: Optional[Conversation],
    ) -> Iterator[str]:
        pass


class KeyModel(_Model):
    @abstractmethod
    def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: Response,
        conversation: Optional[Conversation],
        key: Optional[str],
    ) -> Iterator[str]:
        pass


class _AsyncModel(_BaseModel):
    def conversation(
        self,
        tools: Optional[List[ToolDef]] = None,
        before_call: Optional[BeforeCallAsync] = None,
        after_call: Optional[AfterCallAsync] = None,
        chain_limit: Optional[int] = None,
    ) -> AsyncConversation:
        return AsyncConversation(
            model=self,
            tools=tools,
            before_call=before_call,
            after_call=after_call,
            chain_limit=chain_limit,
        )

    def prompt(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[Union[str, Fragment]]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        system_fragments: Optional[List[Union[str, Fragment]]] = None,
        stream: bool = True,
        **options,
    ) -> AsyncResponse:
        key_value = options.pop("key", None)
        self._validate_attachments(attachments)
        return AsyncResponse(
            Prompt(
                prompt,
                fragments=fragments,
                attachments=attachments,
                system=system,
                schema=schema,
                tools=tools,
                tool_results=tool_results,
                system_fragments=system_fragments,
                model=self,
                options=self.Options(**options),
            ),
            self,
            stream,
            key=key_value,
        )

    def chain(
        self,
        prompt: Optional[str] = None,
        *,
        fragments: Optional[List[str]] = None,
        attachments: Optional[List[Attachment]] = None,
        system: Optional[str] = None,
        system_fragments: Optional[List[str]] = None,
        stream: bool = True,
        schema: Optional[Union[dict, type[BaseModel]]] = None,
        tools: Optional[List[ToolDef]] = None,
        tool_results: Optional[List[ToolResult]] = None,
        before_call: Optional[BeforeCallAsync] = None,
        after_call: Optional[AfterCallAsync] = None,
        key: Optional[str] = None,
        options: Optional[dict] = None,
    ) -> AsyncChainResponse:
        return self.conversation().chain(
            prompt=prompt,
            fragments=fragments,
            attachments=attachments,
            system=system,
            system_fragments=system_fragments,
            stream=stream,
            schema=schema,
            tools=tools,
            tool_results=tool_results,
            before_call=before_call,
            after_call=after_call,
            key=key,
            options=options,
        )


class AsyncModel(_AsyncModel):
    @abstractmethod
    async def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: AsyncResponse,
        conversation: Optional[AsyncConversation],
    ) -> AsyncGenerator[str, None]:
        if False:  # Ensure it's a generator type
            yield ""
        pass


class AsyncKeyModel(_AsyncModel):
    @abstractmethod
    async def execute(
        self,
        prompt: Prompt,
        stream: bool,
        response: AsyncResponse,
        conversation: Optional[AsyncConversation],
        key: Optional[str],
    ) -> AsyncGenerator[str, None]:
        if False:  # Ensure it's a generator type
            yield ""
        pass


class EmbeddingModel(ABC, _get_key_mixin):
    model_id: str
    key: Optional[str] = None
    needs_key: Optional[str] = None
    key_env_var: Optional[str] = None
    supports_text: bool = True
    supports_binary: bool = False
    batch_size: Optional[int] = None

    def _check(self, item: Union[str, bytes]):
        if not self.supports_binary and isinstance(item, bytes):
            raise ValueError(
                "This model does not support binary data, only text strings"
            )
        if not self.supports_text and isinstance(item, str):
            raise ValueError(
                "This model does not support text strings, only binary data"
            )

    def embed(self, item: Union[str, bytes]) -> List[float]:
        "Embed a single text string or binary blob, return a list of floats"
        self._check(item)
        return next(iter(self.embed_batch([item])))

    def embed_multi(
        self, items: Iterable[Union[str, bytes]], batch_size: Optional[int] = None
    ) -> Iterator[List[float]]:
        "Embed multiple items in batches according to the model batch_size"
        iter_items = iter(items)
        effective_batch_size = self.batch_size if batch_size is None else batch_size
        if (not self.supports_binary) or (not self.supports_text):

            def checking_iter(inner_items):
                for item_to_check in inner_items:
                    self._check(item_to_check)
                    yield item_to_check

            iter_items = checking_iter(items)
        if effective_batch_size is None:
            yield from self.embed_batch(iter_items)
            return
        while True:
            batch_items = list(islice(iter_items, effective_batch_size))
            if not batch_items:
                break
            yield from self.embed_batch(batch_items)

    @abstractmethod
    def embed_batch(self, items: Iterable[Union[str, bytes]]) -> Iterator[List[float]]:
        """
        Embed a batch of strings or blobs, return a list of lists of floats
        """
        pass

    def __str__(self) -> str:
        return "{}: {}".format(self.__class__.__name__, self.model_id)

    def __repr__(self) -> str:
        return f"<{str(self)}>"


@dataclass
class ModelWithAliases:
    model: Model
    async_model: AsyncModel
    aliases: Set[str]

    def matches(self, query: str) -> bool:
        query_lower = query.lower()
        all_strings: List[str] = []
        all_strings.extend(self.aliases)
        if self.model:
            all_strings.append(str(self.model))
        if self.async_model:
            all_strings.append(str(self.async_model.model_id))
        return any(query_lower in alias.lower() for alias in all_strings)


@dataclass
class EmbeddingModelWithAliases:
    model: EmbeddingModel
    aliases: Set[str]

    def matches(self, query: str) -> bool:
        query_lower = query.lower()
        all_strings: List[str] = []
        all_strings.extend(self.aliases)
        all_strings.append(str(self.model))
        return any(query_lower in alias.lower() for alias in all_strings)


def _conversation_name(text):
    # Collapse whitespace, including newlines
    text = re.sub(r"\s+", " ", text)
    if len(text) <= CONVERSATION_NAME_LENGTH:
        return text
    return text[: CONVERSATION_NAME_LENGTH - 1] + "…"


def _ensure_dict_schema(schema):
    """Convert a Pydantic model to a JSON schema dict if needed."""
    if schema and not isinstance(schema, dict) and issubclass(schema, BaseModel):
        schema_dict = schema.model_json_schema()
        _remove_titles_recursively(schema_dict)
        return schema_dict
    return schema


def _remove_titles_recursively(obj):
    """Recursively remove all 'title' fields from a nested dictionary."""
    if isinstance(obj, dict):
        # Remove title if present
        obj.pop("title", None)

        # Recursively process all values
        for value in obj.values():
            _remove_titles_recursively(value)
    elif isinstance(obj, list):
        # Process each item in lists
        for item in obj:
            _remove_titles_recursively(item)


def _get_instance(implementation):
    if hasattr(implementation, "__self__"):
        return implementation.__self__
    return None


================================================
FILE: llm/plugins.py
================================================
import importlib
from importlib import metadata
import os
import pluggy
import sys
from . import hookspecs

DEFAULT_PLUGINS = (
    "llm.default_plugins.openai_models",
    "llm.default_plugins.default_tools",
)

pm = pluggy.PluginManager("llm")
pm.add_hookspecs(hookspecs)

LLM_LOAD_PLUGINS = os.environ.get("LLM_LOAD_PLUGINS", None)

_loaded = False


def load_plugins():
    global _loaded
    if _loaded:
        return
    _loaded = True
    if not hasattr(sys, "_called_from_test") and LLM_LOAD_PLUGINS is None:
        # Only load plugins if not running tests
        pm.load_setuptools_entrypoints("llm")

    # Load any plugins specified in LLM_LOAD_PLUGINS")
    if LLM_LOAD_PLUGINS is not None:
        for package_name in [
            name for name in LLM_LOAD_PLUGINS.split(",") if name.strip()
        ]:
            try:
                distribution = metadata.distribution(package_name)  # Updated call
                llm_entry_points = [
                    ep for ep in distribution.entry_points if ep.group == "llm"
                ]
                for entry_point in llm_entry_points:
                    mod = entry_point.load()
                    pm.register(mod, name=entry_point.name)
                    # Ensure name can be found in plugin_to_distinfo later:
                    pm._plugin_distinfo.append((mod, distribution))  # type: ignore
            except metadata.PackageNotFoundError:
                sys.stderr.write(f"Plugin {package_name} could not be found\n")

    for plugin in DEFAULT_PLUGINS:
        mod = importlib.import_module(plugin)
        pm.register(mod, plugin)


================================================
FILE: llm/py.typed
================================================


================================================
FILE: llm/templates.py
================================================
from pydantic import BaseModel, ConfigDict
import string
from typing import Optional, Any, Dict, List, Tuple


class AttachmentType(BaseModel):
    type: str
    value: str


class Template(BaseModel):
    name: str
    prompt: Optional[str] = None
    system: Optional[str] = None
    attachments: Optional[List[str]] = None
    attachment_types: Optional[List[AttachmentType]] = None
    model: Optional[str] = None
    defaults: Optional[Dict[str, Any]] = None
    options: Optional[Dict[str, Any]] = None
    extract: Optional[bool] = None  # For extracting fenced code blocks
    extract_last: Optional[bool] = None
    schema_object: Optional[dict] = None
    fragments: Optional[List[str]] = None
    system_fragments: Optional[List[str]] = None
    tools: Optional[List[str]] = None
    functions: Optional[str] = None

    model_config = ConfigDict(extra="forbid")

    class MissingVariables(Exception):
        pass

    def __init__(self, **data):
        super().__init__(**data)
        # Not a pydantic field to avoid YAML being able to set it
        # this controls if Python inline functions code is trusted
        self._functions_is_trusted = False

    def evaluate(
        self, input: str, params: Optional[Dict[str, Any]] = None
    ) -> Tuple[Optional[str], Optional[str]]:
        params = params or {}
        params["input"] = input
        if self.defaults:
            for k, v in self.defaults.items():
                if k not in params:
                    params[k] = v
        prompt: Optional[str] = None
        system: Optional[str] = None
        if not self.prompt:
            system = self.interpolate(self.system, params)
            prompt = input
        else:
            prompt = self.interpolate(self.prompt, params)
            system = self.interpolate(self.system, params)
        return prompt, system

    def vars(self) -> set:
        all_vars = set()
        for text in [self.prompt, self.system]:
            if not text:
                continue
            all_vars.update(self.extract_vars(string.Template(text)))
        return all_vars

    @classmethod
    def interpolate(cls, text: Optional[str], params: Dict[str, Any]) -> Optional[str]:
        if not text:
            return text
        # Confirm all variables in text are provided
        string_template = string.Template(text)
        vars = cls.extract_vars(string_template)
        missing = [p for p in vars if p not in params]
        if missing:
            raise cls.MissingVariables(
                "Missing variables: {}".format(", ".join(missing))
            )
        return string_template.substitute(**params)

    @staticmethod
    def extract_vars(string_template: string.Template) -> List[str]:
        return [
            match.group("named")
            for match in string_template.pattern.finditer(string_template.template)
            if match.group("named")
        ]


================================================
FILE: llm/tools.py
================================================
from datetime import datetime, timezone
from importlib.metadata import version
import time


def llm_version() -> str:
    "Return the installed version of llm"
    return version("llm")


def llm_time() -> dict:
    "Returns the current time, as local time and UTC"
    # Get current times
    utc_time = datetime.now(timezone.utc)
    local_time = datetime.now()

    # Get timezone information
    local_tz_name = time.tzname[time.localtime().tm_isdst]
    is_dst = bool(time.localtime().tm_isdst)

    # Calculate offset
    offset_seconds = -time.timezone if not is_dst else -time.altzone
    offset_hours = offset_seconds // 3600
    offset_minutes = (offset_seconds % 3600) // 60

    timezone_offset = (
        f"UTC{'+' if offset_hours >= 0 else ''}{offset_hours:02d}:{offset_minutes:02d}"
    )

    return {
        "utc_time": utc_time.strftime("%Y-%m-%d %H:%M:%S UTC"),
        "utc_time_iso": utc_time.isoformat(),
        "local_timezone": local_tz_name,
        "local_time": local_time.strftime("%Y-%m-%d %H:%M:%S"),
        "timezone_offset": timezone_offset,
        "is_dst": is_dst,
    }


================================================
FILE: llm/utils.py
================================================
import click
import hashlib
import httpx
import itertools
import json
import pathlib
import puremagic
import re
import sqlite_utils
import textwrap
from typing import Any, List, Dict, Optional, Tuple, Type
import os
import threading
import time
from typing import Final

from ulid import ULID

MIME_TYPE_FIXES = {
    "audio/wave": "audio/wav",
}


class Fragment(str):
    def __new__(cls, content, *args, **kwargs):
        # For immutable classes like str, __new__ creates the string object
        return super().__new__(cls, content)

    def __init__(self, content, source=""):
        # Initialize our custom attributes
        self.source = source

    def id(self):
        return hashlib.sha256(self.encode("utf-8")).hexdigest()


def mimetype_from_string(content) -> Optional[str]:
    try:
        type_ = puremagic.from_string(content, mime=True)
        return MIME_TYPE_FIXES.get(type_, type_)
    except puremagic.PureError:
        return None


def mimetype_from_path(path) -> Optional[str]:
    try:
        type_ = puremagic.from_file(path, mime=True)
        return MIME_TYPE_FIXES.get(type_, type_)
    except puremagic.PureError:
        return None


def dicts_to_table_string(
    headings: List[str], dicts: List[Dict[str, str]]
) -> List[str]:
    max_lengths = [len(h) for h in headings]

    # Compute maximum length for each column
    for d in dicts:
        for i, h in enumerate(headings):
            if h in d and len(str(d[h])) > max_lengths[i]:
                max_lengths[i] = len(str(d[h]))

    # Generate formatted table strings
    res = []
    res.append("    ".join(h.ljust(max_lengths[i]) for i, h in enumerate(headings)))

    for d in dicts:
        row = []
        for i, h in enumerate(headings):
            row.append(str(d.get(h, "")).ljust(max_lengths[i]))
        res.append("    ".join(row))

    return res


def remove_dict_none_values(d):
    """
    Recursively remove keys with value of None or value of a dict that is all values of None
    """
    if not isinstance(d, dict):
        return d
    new_dict = {}
    for key, value in d.items():
        if value is not None:
            if isinstance(value, dict):
                nested = remove_dict_none_values(value)
                if nested:
                    new_dict[key] = nested
            elif isinstance(value, list):
                new_dict[key] = [remove_dict_none_values(v) for v in value]
            else:
                new_dict[key] = value
    return new_dict


class _LogResponse(httpx.Response):
    def iter_bytes(self, *args, **kwargs):
        for chunk in super().iter_bytes(*args, **kwargs):
            click.echo(chunk.decode(), err=True)
            yield chunk


class _LogTransport(httpx.BaseTransport):
    def __init__(self, transport: httpx.BaseTransport):
        self.transport = transport

    def handle_request(self, request: httpx.Request) -> httpx.Response:
        response = self.transport.handle_request(request)
        return _LogResponse(
            status_code=response.status_code,
            headers=response.headers,
            stream=response.stream,
            extensions=response.extensions,
        )


def _no_accept_encoding(request: httpx.Request):
    request.headers.pop("accept-encoding", None)


def _log_response(response: httpx.Response):
    request = response.request
    click.echo(f"Request: {request.method} {request.url}", err=True)
    click.echo("  Headers:", err=True)
    for key, value in request.headers.items():
        if key.lower() == "authorization":
            value = "[...]"
        if key.lower() == "cookie":
            value = value.split("=")[0] + "=..."
        click.echo(f"    {key}: {value}", err=True)
    click.echo("  Body:", err=True)
    try:
        request_body = json.loads(request.content)
        click.echo(
            textwrap.indent(json.dumps(request_body, indent=2), "    "), err=True
        )
    except json.JSONDecodeError:
        click.echo(textwrap.indent(request.content.decode(), "    "), err=True)
    click.echo(f"Response: status_code={response.status_code}", err=True)
    click.echo("  Headers:", err=True)
    for key, value in response.headers.items():
        if key.lower() == "set-cookie":
            value = value.split("=")[0] + "=..."
        click.echo(f"    {key}: {value}", err=True)
    click.echo("  Body:", err=True)


def logging_client() -> httpx.Client:
    return httpx.Client(
        transport=_LogTransport(httpx.HTTPTransport()),
        event_hooks={"request": [_no_accept_encoding], "response": [_log_response]},
    )


def simplify_usage_dict(d):
    # Recursively remove keys with value 0 and empty dictionaries
    def remove_empty_and_zero(obj):
        if isinstance(obj, dict):
            cleaned = {
                k: remove_empty_and_zero(v)
                for k, v in obj.items()
                if v != 0 and v != {}
            }
            return {k: v for k, v in cleaned.items() if v is not None and v != {}}
        return obj

    return remove_empty_and_zero(d) or {}


def token_usage_string(input_tokens, output_tokens, token_details) -> str:
    bits = []
    if input_tokens is not None:
        bits.append(f"{format(input_tokens, ',')} input")
    if output_tokens is not None:
        bits.append(f"{format(output_tokens, ',')} output")
    if token_details:
        bits.append(json.dumps(token_details))
    return ", ".join(bits)


def extract_fenced_code_block(text: str, last: bool = False) -> Optional[str]:
    """
    Extracts and returns Markdown fenced code block found in the given text.

    The function handles fenced code blocks that:
    - Use at least three backticks (`).
    - May include a language tag immediately after the opening backticks.
    - Use more than three backticks as long as the closing fence has the same number.

    If no fenced code block is found, the function returns None.

    Args:
        text (str): The input text to search for a fenced code block.
        last (bool): Extract the last code block if True, otherwise the first.

    Returns:
        Optional[str]: The content of the fenced code block, or None if not found.
    """
    # Regex pattern to match fenced code blocks
    # - ^ or \n ensures that the fence is at the start of a line
    # - (`{3,}) captures the opening backticks (at least three)
    # - (\w+)? optionally captures the language tag
    # - \n matches the newline after the opening fence
    # - (.*?) non-greedy match for the code block content
    # - (?P=fence) ensures that the closing fence has the same number of backticks
    # - [ ]* allows for optional spaces between the closing fence and newline
    # - (?=\n|$) ensures that the closing fence is followed by a newline or end of string
    pattern = re.compile(
        r"""(?m)^(?P<fence>`{3,})(?P<lang>\w+)?\n(?P<code>.*?)^(?P=fence)[ ]*(?=\n|$)""",
        re.DOTALL,
    )
    matches = list(pattern.finditer(text))
    if matches:
        match = matches[-1] if last else matches[0]
        return match.group("code")
    return None


def make_schema_id(schema: dict) -> Tuple[str, str]:
    schema_json = json.dumps(schema, separators=(",", ":"))
    schema_id = hashlib.blake2b(schema_json.encode(), digest_size=16).hexdigest()
    return schema_id, schema_json


def output_rows_as_json(rows, nl=False, compact=False, json_cols=()):
    """
    Output rows as JSON - either newline-delimited or an array

    Parameters:
    - rows: Iterable of dictionaries to output
    - nl: Boolean, if True, use newline-delimited JSON
    - compact: Boolean, if True uses [{"...": "..."}\n {"...": "..."}] format
    - json_cols: Iterable of columns that contain JSON

    Yields:
    - Stream of strings to be output
    """
    current_iter, next_iter = itertools.tee(rows, 2)
    next(next_iter, None)
    first = True

    for row, next_row in itertools.zip_longest(current_iter, next_iter):
        is_last = next_row is None
        for col in json_cols:
            row[col] = json.loads(row[col])

        if nl:
            # Newline-delimited JSON: one JSON object per line
            yield json.dumps(row)
        elif compact:
            # Compact array format: [{"...": "..."}\n {"...": "..."}]
            yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
                firstchar="[" if first else " ",
                serialized=json.dumps(row),
                maybecomma="," if not is_last else "",
                lastchar="]" if is_last else "",
            )
        else:
            # Pretty-printed array format with indentation
            yield "{firstchar}{serialized}{maybecomma}{lastchar}".format(
                firstchar="[\n" if first else "",
                serialized=textwrap.indent(json.dumps(row, indent=2), "  "),
                maybecomma="," if not is_last else "",
                lastchar="\n]" if is_last else "",
            )
        first = False

    if first and not nl:
        # We didn't output any rows, so yield the empty list
        yield "[]"


def resolve_schema_input(db, schema_input, load_template):
    # schema_input might be JSON or a filepath or an ID or t:name
    if not schema_input:
        return
    if schema_input.strip().startswith("t:"):
        name = schema_input.strip()[2:]
        schema_object = None
        try:
            template = load_template(name)
            schema_object = template.schema_object
        except ValueError:
            raise click.ClickException("Invalid template: {}".format(name))
        if not schema_object:
            raise click.ClickException("Template '{}' has no schema".format(name))
        return template.schema_object
    if schema_input.strip().startswith("{"):
        try:
            return json.loads(schema_input)
        except ValueError:
            pass
    if " " in schema_input.strip() or "," in schema_input:
        # Treat it as schema DSL
        return schema_dsl(schema_input)
    # Is it a file on disk?
    path = pathlib.Path(schema_input)
    if path.exists():
        try:
            return json.loads(path.read_text())
        except ValueError:
            raise click.ClickException("Schema file contained invalid JSON")
    # Last attempt: is it an ID in the DB?
    try:
        row = db["schemas"].get(schema_input)
        return json.loads(row["content"])
    except (sqlite_utils.db.NotFoundError, ValueError):
        raise click.BadParameter("Invalid schema")


def schema_summary(schema: dict) -> str:
    """
    Extract property names from a JSON schema and format them in a
    concise way that highlights the array/object structure.

    Args:
        schema (dict): A JSON schema dictionary

    Returns:
        str: A human-friendly summary of the schema structure
    """
    if not schema or not isinstance(schema, dict):
        return ""

    schema_type = schema.get("type", "")

    if schema_type == "object":
        props = schema.get("properties", {})
        prop_summaries = []

        for name, prop_schema in props.items():
            prop_type = prop_schema.get("type", "")

            if prop_type == "array":
                items = prop_schema.get("items", {})
                items_summary = schema_summary(items)
                prop_summaries.append(f"{name}: [{items_summary}]")
            elif prop_type == "object":
                nested_summary = schema_summary(prop_schema)
                prop_summaries.append(f"{name}: {nested_summary}")
            else:
                prop_summaries.append(name)

        return "{" + ", ".join(prop_summaries) + "}"

    elif schema_type == "array":
        items = schema.get("items", {})
        return schema_summary(items)

    return ""


def schema_dsl(schema_dsl: str, multi: bool = False) -> Dict[str, Any]:
    """
    Build a JSON schema from a concise schema string.

    Args:
        schema_dsl: A string representing a schema in the concise format.
            Can be comma-separated or newline-separated.
        multi: Boolean, return a schema for an "items" array of these

    Returns:
        A dictionary representing the JSON schema.
    """
    # Type mapping dictionary
    type_mapping = {
        "int": "integer",
        "float": "number",
        "bool": "boolean",
        "str": "string",
    }

    # Initialize the schema dictionary with required elements
    json_schema: Dict[str, Any] = {"type": "object", "properties": {}, "required": []}

    # Check if the schema is newline-separated or comma-separated
    if "\n" in schema_dsl:
        fields = [field.strip() for field in schema_dsl.split("\n") if field.strip()]
    else:
        fields = [field.strip() for field in schema_dsl.split(",") if field.strip()]

    # Process each field
    for field in fields:
        # Extract field name, type, and description
        if ":" in field:
            field_info, description = field.split(":", 1)
            description = description.strip()
        else:
            field_info = field
            description = ""

        # Process field name and type
        field_parts = field_info.strip().split()
        field_name = field_parts[0].strip()

        # Default type is string
        field_type = "string"

        # If type is specified, use it
        if len(field_parts) > 1:
            type_indicator = field_parts[1].strip()
            if type_indicator in type_mapping:
                field_type = type_mapping[type_indicator]

        # Add field to properties
        json_schema["properties"][field_name] = {"type": field_type}

        # Add description if provided
        if description:
            json_schema["properties"][field_name]["description"] = description

        # Add field to required list
        json_schema["required"].append(field_name)

    if multi:
        return multi_schema(json_schema)
    else:
        return json_schema


def multi_schema(schema: dict) -> dict:
    "Wrap JSON schema in an 'items': [] array"
    return {
        "type": "object",
        "properties": {"items": {"type": "array", "items": schema}},
        "required": ["items"],
    }


def find_unused_key(item: dict, key: str) -> str:
    'Return unused key, e.g. for {"id": "1"} and key "id" returns "id_"'
    while key in item:
        key += "_"
    return key


def truncate_string(
    text: str,
    max_length: int = 100,
    normalize_whitespace: bool = False,
    keep_end: bool = False,
) -> str:
    """
    Truncate a string to a maximum length, with options to normalize whitespace and keep both start and end.

    Args:
        text: The string to truncate
        max_length: Maximum length of the result string
        normalize_whitespace: If True, replace all whitespace with a single space
        keep_end: If True, keep both beginning and end of string

    Returns:
        Truncated string
    """
    if not text:
        return text

    if normalize_whitespace:
        text = re.sub(r"\s+", " ", text)

    if len(text) <= max_length:
        return text

    # Minimum sensible length for keep_end is 9 characters: "a... z"
    min_keep_end_length = 9

    if keep_end and max_length >= min_keep_end_length:
        # Calculate how much text to keep at each end
        # Subtract 5 for the "... " separator
        cutoff = (max_length - 5) // 2
        return text[:cutoff] + "... " + text[-cutoff:]
    else:
        # Fall back to simple truncation for very small max_length
        return text[: max_length - 3] + "..."


def ensure_fragment(db, content):
    sql = """
    insert into fragments (hash, content, datetime_utc, source)
    values (:hash, :content, datetime('now'), :source)
    on conflict(hash) do nothing
    """
    hash_id = hashlib.sha256(content.encode("utf-8")).hexdigest()
    source = None
    if isinstance(content, Fragment):
        source = content.source
    with db.conn:
        db.execute(sql, {"hash": hash_id, "content": content, "source": source})
        return list(
            db.query("select id from fragments where hash = :hash", {"hash": hash_id})
        )[0]["id"]


def ensure_tool(db, tool):
    sql = """
    insert into tools (hash, name, description, input_schema, plugin)
    values (:hash, :name, :description, :input_schema, :plugin)
    on conflict(hash) do nothing
    """
    with db.conn:
        db.execute(
            sql,
            {
                "hash": tool.hash(),
                "name": tool.name,
                "description": tool.description,
                "input_schema": json.dumps(tool.input_schema),
                "plugin": tool.plugin,
            },
        )
        return list(
            db.query("select id from tools where hash = :hash", {"hash": tool.hash()})
        )[0]["id"]


def maybe_fenced_code(content: str) -> str:
    "Return the content as a fenced code block if it looks like code"
    is_code = False
    if content.count("<") > 10:
        is_code = True
    if not is_code:
        # Are 90% of the lines under 120 chars?
        lines = content.splitlines()
        if len(lines) > 3:
            num_short = sum(1 for line in lines if len(line) < 120)
            if num_short / len(lines) > 0.9:
                is_code = True
    if is_code:
        # Find number of backticks not already present
        num_backticks = 3
        while "`" * num_backticks in content:
            num_backticks += 1
        # Add backticks
        content = (
            "\n"
            + "`" * num_backticks
            + "\n"
            + content.strip()
            + "\n"
            + "`" * num_backticks
        )
    return content


_plugin_prefix_re = re.compile(r"^[a-zA-Z0-9_-]+:")


def has_plugin_prefix(value: str) -> bool:
    "Check if value starts with alphanumeric prefix followed by a colon"
    return bool(_plugin_prefix_re.match(value))


def _parse_kwargs(arg_str: str) -> Dict[str, Any]:
    """Parse key=value pairs where each value is valid JSON."""
    tokens = []
    buf = []
    depth = 0
    in_string = False
    string_char = ""
    escape = False

    for ch in arg_str:
        if in_string:
            buf.append(ch)
            if escape:
                escape = False
            elif ch == "\\":
                escape = True
            elif ch == string_char:
                in_string = False
        else:
            if ch in "\"'":
                in_string = True
                string_char = ch
                buf.append(ch)
            elif ch in "{[(":
                depth += 1
                buf.append(ch)
            elif ch in "}])":
                depth -= 1
                buf.append(ch)
            elif ch == "," and depth == 0:
                tokens.append("".join(buf).strip())
                buf = []
            else:
                buf.append(ch)
    if buf:
        tokens.append("".join(buf).strip())

    kwargs: Dict[str, Any] = {}
    for token in tokens:
        if not token:
            continue
        if "=" not in token:
            raise ValueError(f"Invalid keyword spec segment: '{token}'")
        key, value_str = token.split("=", 1)
        key = key.strip()
        value_str = value_str.strip()
        try:
            value = json.loads(value_str)
        except json.JSONDecodeError as e:
            raise ValueError(f"Value for '{key}' is not valid JSON: {value_str}") from e
        kwargs[key] = value
    return kwargs


def instantiate_from_spec(class_map: Dict[str, Type], spec: str):
    """
    Instantiate a class from a specification string with flexible argument formats.

    This function parses a specification string that defines a class name and its
    constructor arguments, then instantiates the class using the provided class
    mapping. The specification supports multiple argument formats for flexibility.

    Parameters
    ----------
    class_map : Dict[str, Type]
        A mapping from class names (strings) to their corresponding class objects.
        Only classes present in this mapping can be instantiated.
    spec : str
        A specification string defining the class to instantiate and its arguments.

        Format: "ClassName" or "ClassName(arguments)"

        Supported argument formats:
        - Empty: ClassName() - calls constructor with no arguments
        - JSON object: ClassName({"key": "value", "other": 42}) - unpacked as **kwargs
        - Single JSON value: ClassName("hello") or ClassName([1,2,3]) - passed as single positional argument
        - Key-value pairs: ClassName(name="test", count=5, items=[1,2]) - parsed as individual kwargs
          where values must be valid JSON

    Returns
    -------
    object
        An instance of the specified class, constructed with the parsed arguments.

    Raises
    ------
    ValueError
        If the spec string format is invalid, if the class name is not found in
        class_map, if JSON parsing fails, or if argument parsing encounters errors.
    """
    m = re.fullmatch(r"\s*([A-Za-z_][A-Za-z0-9_]*)\s*(?:\((.*)\))?\s*$", spec)
    if not m:
        raise ValueError(f"Invalid spec string: '{spec}'")
    class_name, arg_body = m.group(1), (m.group(2) or "").strip()
    if class_name not in class_map:
        raise ValueError(f"Unknown class '{class_name}'")

    cls = class_map[class_name]

    # No arguments at all
    if arg_body == "":
        return cls()

    # Starts with { -> JSON object to kwargs
    if arg_body.lstrip().startswith("{"):
        try:
            kw = json.loads(arg_body)
        except json.JSONDecodeError as e:
            raise ValueError("Argument JSON object is not valid JSON") from e
        if not isinstance(kw, dict):
            raise ValueError("Top-level JSON must be an object when using {} form")
        return cls(**kw)

    # Starts with quote / number / [ / t f n for single positional JSON value
    if re.match(r'\s*(["\[\d\-]|true|false|null)', arg_body, re.I):
        try:
            positional_value = json.loads(arg_body)
        except json.JSONDecodeError as e:
            raise ValueError("Positional argument must be valid JSON") from e
        return cls(positional_value)

    # Otherwise treat as key=value pairs
    kwargs = _parse_kwargs(arg_body)
    return cls(**kwargs)


NANOSECS_IN_MILLISECS = 1000000
TIMESTAMP_LEN = 6
RANDOMNESS_LEN = 10

_lock: Final = threading.Lock()
_last: Optional[bytes] = None  # 16-byte last produced ULID


def monotonic_ulid() -> ULID:
    """
    Return a ULID instance that is guaranteed to be *strictly larger* than every
    other ULID returned by this function inside the same process.

    It works the same way the reference JavaScript `monotonicFactory` does:
    * If the current call happens in the same millisecond as the previous
        one, the 80-bit randomness part is incremented by exactly one.
    * As soon as the system clock moves forward, a brand-new ULID with
        cryptographically secure randomness is generated.
    * If more than 2**80 ULIDs are requested within a single millisecond
        an `OverflowError` is raised (practically impossible).
    """
    global _last

    now_ms = time.time_ns() // NANOSECS_IN_MILLISECS

    with _lock:
        # First call
        if _last is None:
            _last = _fresh(now_ms)
            return ULID(_last)

        # Decode timestamp from the last ULID we handed out
        last_ms = int.from_bytes(_last[:TIMESTAMP_LEN], "big")

        # If the millisecond is the same, increment the randomness
        if now_ms == last_ms:
            rand_int = int.from_bytes(_last[TIMESTAMP_LEN:], "big") + 1
            if rand_int >= 1 << (RANDOMNESS_LEN * 8):
                raise OverflowError(
                    "Randomness overflow: > 2**80 ULIDs requested "
                    "in one millisecond!"
                )
            randomness = rand_int.to_bytes(RANDOMNESS_LEN, "big")
            _last = _last[:TIMESTAMP_LEN] + randomness
            return ULID(_last)

        # New millisecond, start fresh
        _last = _fresh(now_ms)
        return ULID(_last)


def _fresh(ms: int) -> bytes:
    """Build a brand-new 16-byte ULID for the given millisecond."""
    timestamp = int.to_bytes(ms, TIMESTAMP_LEN, "big")
    randomness = os.urandom(RANDOMNESS_LEN)
    return timestamp + randomness


================================================
FILE: mypy.ini
================================================
[mypy]

[mypy-pluggy.*]
ignore_missing_imports = True

[mypy-click_default_group.*]
ignore_missing_imports = True

[mypy-sqlite_migrate.*]
ignore_missing_imports = True


================================================
FILE: pyproject.toml
================================================
[project]
name = "llm"
version = "0.29"
description = "CLI utility and Python library for interacting with Large Language Models from organizations like OpenAI, Anthropic and Gemini plus local models installed on your own machine."
readme = { file = "README.md", content-type = "text/markdown" }
authors = [
    { name = "Simon Willison" },
]
license = "Apache-2.0"
requires-python = ">=3.10"
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "Intended Audience :: End Users/Desktop",
    "Intended Audience :: Science/Research",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Text Processing :: Linguistic",
    "Topic :: Utilities",
]

dependencies = [
    "click",
    "condense-json>=0.1.3",
    "openai>=1.55.3",
    "click-default-group>=1.2.3",
    "sqlite-utils>=3.37",
    "sqlite-migrate>=0.1a2",
    "pydantic>=2.0.0",
    "PyYAML",
    "pluggy",
    "python-ulid",
    "setuptools",
    "pip",
    "pyreadline3; sys_platform == 'win32'",
    "puremagic",
]

[dependency-groups]
dev = [
    "build",
    "click<8.2.0", # https://github.com/simonw/llm/issues/1024
    "pytest",
    "numpy",
    "pytest-httpx>=0.33.0",
    "pytest-asyncio",
    "cogapp",
    "mypy>=1.10.0",
    "black>=25.1.0",
    "pytest-recording",
    "ruff",
    "syrupy",
    "types-click",
    "types-PyYAML",
    "types-setuptools",
    "llm-echo==0.3a3",
    # docs
    "sphinx==7.2.6",
    "furo==2023.9.10",
    "sphinx-autobuild",
    "sphinx-copybutton",
    "sphinx-markdown-builder==0.6.8",
    "myst-parser",
]

[project.urls]
Homepage = "https://github.com/simonw/llm"
Documentation = "https://llm.datasette.io/"
Issues = "https://github.com/simonw/llm/issues"
CI = "https://github.com/simonw/llm/actions"
Changelog = "https://github.com/simonw/llm/releases"

[project.scripts]
llm = "llm.cli:cli"

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"


================================================
FILE: pytest.ini
================================================
[pytest]
asyncio_default_fixture_loop_scope = function

================================================
FILE: ruff.toml
================================================
line-length = 160


================================================
FILE: tests/cassettes/test_tools/test_tool_use_basic.yaml
================================================
interactions:
- request:
    body: '{"messages":[{"role":"user","content":"What is 1231 * 2331?"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"multiply","description":"Multiply
      two numbers.","parameters":{"properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"],"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '351'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.78.0
      x-stainless-read-timeout:
      - '600'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_1EYWDzueHEp8OsB8jJSEp7WB","type":"function","function":{"name":"multiply","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"a"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"123"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"1"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"b"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"233"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"1"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}


        data: {"id":"chatcmpl-BWlJBDk2xe66hjff60joVYpXi1hh4","object":"chat.completion.chunk","created":1747148049,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_dbaca60df0","choices":[],"usage":{"prompt_tokens":54,"completion_tokens":20,"total_tokens":74,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}


        data: [DONE]


        '
    headers:
      CF-RAY:
      - 93f2fd4c4ce5238d-SJC
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 13 May 2025 14:54:09 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=ys1VX4Q4znOtsubzjx.nHCe9_hPEK_9fLmKeIYZd9LE-1747148049-1.0.1.1-c_hjWtmrr1A3GwMaBXzfhWwaX3EZ2E5Iz_5j.KkgJ3qqPA8vdd2tTpYVL1KRgkOWgWKSUHvYx9I62zt4yf.e9GO3PWHu60ji3ZEjh81.uNc;
        path=/; expires=Tue, 13-May-25 15:24:09 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=hnWsKFQeNuxEVe5VU69rl9nk7g.ahx0f.wEzyB.f7Kk-1747148049996-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - user-r3e61fpak04cbaokp5buoae4
      openai-processing-ms:
      - '547'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-envoy-upstream-service-time:
      - '552'
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999993'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_c3e995e7a86953713a6dc1b17e399fd5
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"What is 1231 * 2331?"},{"role":"assistant","content":""},{"role":"assistant","tool_calls":[{"type":"function","id":"call_1EYWDzueHEp8OsB8jJSEp7WB","function":{"name":"multiply","arguments":"{\"a\":
      1231, \"b\": 2331}"}}]},{"role":"tool","tool_call_id":"call_1EYWDzueHEp8OsB8jJSEp7WB","content":"2869461"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"multiply","description":"Multiply
      two numbers.","parameters":{"properties":{"a":{"type":"integer"},"b":{"type":"integer"}},"required":["a","b"],"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '633'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.78.0
      x-stainless-read-timeout:
      - '600'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"The"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        result"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        of"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        \\("},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        "},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"123"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        \\"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"times"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        "},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"233"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        \\"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":")"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        is"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        \\("},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        "},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"869"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"461"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":"
        \\"},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{"content":")."},"logprobs":null,"finish_reason":null}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}


        data: {"id":"chatcmpl-BWlJCN7VZTtSHROczp0AbrjFGhRMA","object":"chat.completion.chunk","created":1747148050,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_0392822090","choices":[],"usage":{"prompt_tokens":87,"completion_tokens":26,"total_tokens":113,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}


        data: [DONE]


        '
    headers:
      CF-RAY:
      - 93f2fd522938eb20-SJC
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 13 May 2025 14:54:10 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=fdLfMrUkpH5wVR0YRkrP2U10JK7jFW._HUYimqZukgg-1747148050-1.0.1.1-aQIjrwFbyIcTr_HW09RscO7okcLFLbvptmCQBFweX4SskJ3FKciprVe5ffCuvcWn03rURb.wLkcTAzQzoZIdOv6OBYcJu5vMutdjPs9t0EI;
        path=/; expires=Tue, 13-May-25 15:24:10 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=X620Mz_MZZuz8JBE23JWZUpAD7vTnI.UtEcQjGZZPBA-1747148050599-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - user-r3e61fpak04cbaokp5buoae4
      openai-processing-ms:
      - '221'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-envoy-upstream-service-time:
      - '225'
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999987'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_51f3397f64a0302e34a4d78ea85e0585
    status:
      code: 200
      message: OK
version: 1


================================================
FILE: tests/cassettes/test_tools/test_tool_use_chain_of_two_calls.yaml
================================================
interactions:
- request:
    body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
      dragons? Answer with only YES or NO"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
      the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
      True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '650'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.78.0
      x-stainless-read-timeout:
      - '600'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAAwAAAP//jFPBjtowEL3nK6w5kyrJ0gI5slXppWzbZbdqyyoyziS4OLZrO1sQ4t+r
        GEjCLpWaQ2TNm/fmzYy9DwgBnkNKgK2pY5UW4fSbns1n+ee7+eLP9uPmx+N29Zu5L/fT99VCwqBh
        qNUvZO7MesNUpQU6rk4wM0gdNqrxaDiK390kb2MPVCpH0dBK7cKhCisueZhEyTCMRmE8PrHXijO0
        kJKfASGE7P2/8Slz3EJKosE5UqG1tERI2yRCwCjRRIBay62j0sGgA5mSDmVjXdZC9ACnlMgYFaIr
        fPz2vXM3LCpEtlh8Hz98mKuRmd/Su+nD3Imv98+fZr16R+md9oaKWrJ2SD28jacvihECklaeK5Ta
        1DrTSteCXhEhBKgp6wqlaxqA/RKYqqUzuyWkS7g1daXRLeEAF7RDcO381JuLwaK2VLweGJVSOW/F
        T+zphBza5QhVaqNW9gUVCi65XWcGqfU990cfnI14C1BfbBe0UZV2mVMb9EUnyVEUugvYgfHoBDrl
        qOjFo8ngilyWo6Pcb7+9cIyyNeYdtbt4tM656gFBr/XXbq5pH9vnsvwf+Q5gDLXDPNMGc84uO+7S
        DDbv819p7ZC9YbBonjnDzHE0zTpyLGgtjq8G7M46rLKCyxKNNtw/HSh0Ft1MknGSRJMIgkPwFwAA
        //8DALof6VxIBAAA
    headers:
      CF-RAY:
      - 93f47072dde6f88d-IAD
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Tue, 13 May 2025 19:07:32 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=vfHkbLfwVTTGPkFT0I4U0xn5CHQZYIpOutDV4z7NRlA-1747163252-1.0.1.1-kj_JiiyNxn9AWCWisV6.pYNShKVqqT0Foicji2.ZLNaAkHm5VEwac0QjxVhCiWQs9Xp_wvkeTzrgVxmD8bkzDwTPn96U.81YERXZda3_m18;
        path=/; expires=Tue, 13-May-25 19:37:32 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=SQgXKMy2qkeOsbwwTl62blvuirTS_TkZSvEOztbYIlI-1747163252293-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - user-r3e61fpak04cbaokp5buoae4
      openai-processing-ms:
      - '574'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-envoy-upstream-service-time:
      - '591'
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999981'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_1e7dabaf1f0dba1ec89a134d3bde8476
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
      dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\":
      \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
      the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
      True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '906'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.78.0
      x-stainless-read-timeout:
      - '600'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAA4xTTYvbMBC9+1eIOcfFH2k+fNyWlEIPLaWkm+5itNLY0UaWVEkOzYb892J7YzvZ
        FOqDEfPmvXkzIx0DQkBwyAiwLfWsMjK8W5tP39a7es+i1YZvNs9fXxb4Jf7A/R1bwaRh6KdnZP7M
        esd0ZSR6oVUHM4vUY6Maz6fzeJYm76ctUGmOsqGVxodTHVZCiTCJkmkYzcN48creasHQQUZ+BYQQ
        cmz/jU/F8Q9kJJqcIxU6R0uErE8iBKyWTQSoc8J5qjxMBpBp5VE11lUt5QjwWsucUSmHwt13HJ2H
        YVEpc/p7+eMgvq92Lz9n68U9Z2n6UX9e3o/qddIH0xoqasX6IY3wPp5dFSMEFK2wK6jyLd1jzi0t
        tXJXGoQAtWVdofKNfzg+gNGmlrTRfYAsTtI4mZ7ggnQKbp0fR0OxWNSOyrfTokpp34q343p8RU79
        ZqQujdVP7ooKhVDCbXOL1LUNj+cenI20FqC+WC0Yqyvjc6932BaN40WnCsP1G6Fn0GtP5SieziY3
        9HKOnop29/11Y5RtkQ/U4drRmgs9AoJR72/d3NLu+heq/B/5AWAMjUeeG4tcsMuOhzSLzev8V1o/
        5dYwOLR7wTD3Am2zD44FrWX3ZsAdnMcqL4Qq0Ror2ocDhcmjdJkskiRaRhCcgr8AAAD//wMAmw02
        QkYEAAA=
    headers:
      CF-RAY:
      - 93f47082ba71d640-IAD
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Tue, 13 May 2025 19:07:35 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=LL6YtOWVW4fA687_GIMcuJC7CM2I.uKx1vGaNkjFTgo-1747163255-1.0.1.1-qML6IsLM49e2bg7zp0uGqn3.JTJP5KlFYfb8o3v9LzyLb.cYoFBXn5te83Wxl5kVjDiXU2vH.QTFQu953KNx87LwsMkI2ZxTvH58oZWAawg;
        path=/; expires=Tue, 13-May-25 19:37:35 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=QOa3sx0F4_nAYKtjmx9ux7qfIsyipGZq94AL_SWd2ac-1747163255176-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - user-r3e61fpak04cbaokp5buoae4
      openai-processing-ms:
      - '575'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-envoy-upstream-service-time:
      - '587'
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999976'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_66cc3b2bbe3be82a37d29fba7672d82b
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
      dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\":
      \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","function":{"name":"can_have_dragons","arguments":"{\"population\":
      123124}"}}]},{"role":"tool","tool_call_id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","content":"true"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
      the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
      True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '1157'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.78.0
      x-stainless-read-timeout:
      - '600'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUCz/HgOGmd5NYW2447bNjQDIWhSLSjThYFiS42FPnv
        g+w0drcO2EUHfXzUexSfMyHAaNgJUEfJqvM2v/3mP37Z31ebq69xb/zdp+Jw8/Sh2lf8qG9gkRR0
        eETFL6p3ijpvkQ25EauAkjF1XVbranm9Kq+qAXSk0SZZ6zlfU94ZZ/KyKNd5UeXLzVl9JKMwwk58
        z4QQ4nk4k0+n8SfsRLF4uekwRtki7C5FQkAgm25AxmgiS8ewmKAix+gG6/fvP89JwKaPMrlzvbUz
        IJ0jlind4OnhTE4XF5ZaH+gQ/5BCY5yJxzqgjOTSi5HJw0BPmRAPQ9r+VQDwgTrPNdMPHJ5brq/H
        fjANeaKrM2Niaeei7eKNdrVGlsbG2bhASXVEPUmn2cpeG5qBbBb6bzNv9R6DG9f+T/sJKIWeUdc+
        oDbqdeCpLGBawX+VXYY8GIaI4ckorNlgSB+hsZG9HRcD4q/I2NWNcS0GH8y4HY2vi9W23JRlsS0g
        O2W/AQAA//8DAFbEZUIrAwAA
    headers:
      CF-RAY:
      - 93f47096cf15d6e9-IAD
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Tue, 13 May 2025 19:07:37 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=EDR.bZeRmrWVNTWef5aAJ2C5NT7yIBHq_6NzNGXNlX0-1747163257-1.0.1.1-YuS4Hj.Ncp4eOrYNT5L7AncdqT5Xn8a2DTxCka1HKKBGKdT8k70yvNTA3wMlQyVPxGD3HSCysY0a1n1zCkNs._TQe9hWOuoIDG9LtD9MBr4;
        path=/; expires=Tue, 13-May-25 19:37:37 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=3Xqq8l5nvU4mfyEz4.llgkHC3jY.IBLFTJrD76P7UsY-1747163257692-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - user-r3e61fpak04cbaokp5buoae4
      openai-processing-ms:
      - '222'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-envoy-upstream-service-time:
      - '227'
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999974'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_d157a5a0f4b64776bc387ccab624e664
    status:
      code: 200
      message: OK
version: 1


================================================
FILE: tests/cassettes/test_tools_streaming/test_tools_streaming_variant_a.yaml
================================================
interactions:
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '315'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"0","type":"function","function":{"name":"llm_version","arguments":""}}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"0","type":"function","function":{"name":"llm_version","arguments":"{}"}}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":57,"completion_tokens":17,"total_tokens":74,"cost":0.00007159,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:09 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"},{"role":"assistant","content":""},{"role":"assistant","tool_calls":[{"type":"function","id":"0","function":{"name":"llm_version","arguments":"{}"}}]},{"role":"tool","tool_call_id":"0","content":"0.fixed-version"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '517'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"The"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        current"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        of"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        *"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"ll"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"m"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"*"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        is"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        **"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"0"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"fixed-version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"**."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop","native_finish_reason":"stop","logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":107,"completion_tokens":15,"total_tokens":122,"cost":0.0001017,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:10 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
version: 1


================================================
FILE: tests/cassettes/test_tools_streaming/test_tools_streaming_variant_b.yaml
================================================
interactions:
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '315'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"0","type":"function","function":{"name":"llm_version","arguments":"{}"}}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":""}


        data: {"id":"gen-1753242299-QZRAt5HJHd1ptY8sdS0s","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242299,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":57,"completion_tokens":17,"total_tokens":74,"cost":0.00007159,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:09 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"},{"role":"assistant","content":""},{"role":"assistant","tool_calls":[{"type":"function","id":"0","function":{"name":"llm_version","arguments":"{}"}}]},{"role":"tool","tool_call_id":"0","content":"0.fixed-version"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '517'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"The"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        current"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        of"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        *"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"ll"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"m"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"*"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        is"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"
        **"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"0"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"fixed-version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":"**."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop","native_finish_reason":"stop","logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753242300-j60LWi6MpN4lMZw1zTHK","provider":"Moonshot AI","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753242300,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":107,"completion_tokens":15,"total_tokens":122,"cost":0.0001017,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:10 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
version: 1


================================================
FILE: tests/cassettes/test_tools_streaming/test_tools_streaming_variant_c.yaml
================================================
interactions:
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '315'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: '
        data: {"id":"gen-1753248108-FGOxpkEzFEwhNKSPpI4a","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248108,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753248108-FGOxpkEzFEwhNKSPpI4a","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248108,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"llm_version:0","type":"function","function":{"name":"llm_version"}}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753248108-FGOxpkEzFEwhNKSPpI4a","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248108,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"function":{"arguments":"{}"},"type":"function"}]},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753248108-FGOxpkEzFEwhNKSPpI4a","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248108,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"tool_calls","native_finish_reason":"tool_calls","logprobs":null}],"system_fingerprint":"fpv0_170758dd"}


        data: {"id":"gen-1753248108-FGOxpkEzFEwhNKSPpI4a","provider":"Novita","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248108,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":56,"completion_tokens":12,"total_tokens":68,"cost":0.00005952,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:09 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
- request:
    body: '{"messages":[{"role":"user","content":"What is the current llm version?"},{"role":"assistant","content":""},{"role":"assistant","tool_calls":[{"type":"function","id":"0","function":{"name":"llm_version","arguments":"{}"}}]},{"role":"tool","tool_call_id":"0","content":"0.fixed-version"}],"model":"gpt-4.1-mini","stream":true,"stream_options":{"include_usage":true},"tools":[{"type":"function","function":{"name":"llm_version","description":"Return
      the installed version of llm","parameters":{"properties":{},"type":"object"}}}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '517'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.78.0
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"The installed"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" of"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" LL"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"M"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" on"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" this"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" system"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" is"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":" "},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"0"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"fixed-version"},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null,"native_finish_reason":null,"logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop","native_finish_reason":"stop","logprobs":null}]}


        data: {"id":"gen-1753248104-uf1xqJDBrAUCJ4g8apK8","provider":"Fireworks","model":"moonshotai/kimi-k2","object":"chat.completion.chunk","created":1753248104,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null,"native_finish_reason":null,"logprobs":null}],"usage":{"prompt_tokens":105,"completion_tokens":16,"total_tokens":121,"cost":0.000103,"is_byok":false,"prompt_tokens_details":{"cached_tokens":0},"cost_details":{"upstream_inference_cost":null},"completion_tokens_details":{"reasoning_tokens":0}}}


        data: [DONE]


        '
    headers:
      Connection:
      - keep-alive
      Content-Type:
      - text/event-stream; charset=utf-8
      Date:
      - Tue, 23 Jul 2025 14:54:10 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
    status:
      code: 200
      message: OK
version: 1


================================================
FILE: tests/conftest.py
================================================
import pytest
import sqlite_utils
import json
import llm
import llm_echo
from llm.plugins import pm
from pydantic import Field
from pytest_httpx import IteratorStream
from typing import Optional


def pytest_configure(config):
    import sys

    sys._called_from_test = True


@pytest.fixture
def user_path(tmpdir):
    dir = tmpdir / "llm.datasette.io"
    dir.mkdir()
    return dir


@pytest.fixture
def logs_db(user_path):
    return sqlite_utils.Database(str(user_path / "logs.db"))


@pytest.fixture
def user_path_with_embeddings(user_path):
    path = str(user_path / "embeddings.db")
    db = sqlite_utils.Database(path)
    collection = llm.Collection("demo", db, model_id="embed-demo")
    collection.embed("1", "hello world", store=True)
    collection.embed("2", "goodbye world", store=True)


@pytest.fixture
def templates_path(user_path):
    dir = user_path / "templates"
    dir.mkdir()
    return dir


@pytest.fixture(autouse=True)
def env_setup(monkeypatch, user_path):
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))


class MockModel(llm.Model):
    model_id = "mock"
    attachment_types = {"image/png", "audio/wav"}
    supports_schema = True
    supports_tools = True

    class Options(llm.Options):
        max_tokens: Optional[int] = Field(
            description="Maximum number of tokens to generate.", default=None
        )

    def __init__(self):
        self.history = []
        self._queue = []
        self.resolved_model_name = None

    def enqueue(self, messages):
        assert isinstance(messages, list)
        self._queue.append(messages)

    def execute(self, prompt, stream, response, conversation):
        self.history.append((prompt, stream, response, conversation))
        gathered = []
        while True:
            try:
                messages = self._queue.pop(0)
                for message in messages:
                    gathered.append(message)
                    yield message
                break
            except IndexError:
                break
        response.set_usage(
            input=len((prompt.prompt or "").split()), output=len(gathered)
        )
        if self.resolved_model_name is not None:
            response.set_resolved_model(self.resolved_model_name)


class MockKeyModel(llm.KeyModel):
    model_id = "mock_key"
    needs_key = "mock"

    def execute(self, prompt, stream, response, conversation, key):
        return [f"key: {key}"]


class MockAsyncKeyModel(llm.AsyncKeyModel):
    model_id = "mock_key"
    needs_key = "mock"

    async def execute(self, prompt, stream, response, conversation, key):
        yield f"async, key: {key}"


class AsyncMockModel(llm.AsyncModel):
    model_id = "mock"
    supports_schema = True

    def __init__(self):
        self.history = []
        self._queue = []
        self.resolved_model_name = None

    def enqueue(self, messages):
        assert isinstance(messages, list)
        self._queue.append(messages)

    async def execute(self, prompt, stream, response, conversation):
        self.history.append((prompt, stream, response, conversation))
        gathered = []
        while True:
            try:
                messages = self._queue.pop(0)
                for message in messages:
                    gathered.append(message)
                    yield message
                break
            except IndexError:
                break
        response.set_usage(
            input=len((prompt.prompt or "").split()), output=len(gathered)
        )
        if self.resolved_model_name is not None:
            response.set_resolved_model(self.resolved_model_name)


class EmbedDemo(llm.EmbeddingModel):
    model_id = "embed-demo"
    batch_size = 10
    supports_binary = True

    def __init__(self):
        self.embedded_content = []

    def embed_batch(self, texts):
        if not hasattr(self, "batch_count"):
            self.batch_count = 0
        self.batch_count += 1
        for text in texts:
            self.embedded_content.append(text)
            words = text.split()[:16]
            embedding = [len(word) for word in words]
            # Pad with 0 up to 16 words
            embedding += [0] * (16 - len(embedding))
            yield embedding


class EmbedBinaryOnly(EmbedDemo):
    model_id = "embed-binary-only"
    supports_text = False
    supports_binary = True


class EmbedTextOnly(EmbedDemo):
    model_id = "embed-text-only"
    supports_text = True
    supports_binary = False


@pytest.fixture
def embed_demo():
    return EmbedDemo()


@pytest.fixture
def mock_model():
    return MockModel()


@pytest.fixture
def async_mock_model():
    return AsyncMockModel()


@pytest.fixture
def mock_key_model():
    return MockKeyModel()


@pytest.fixture
def mock_async_key_model():
    return MockAsyncKeyModel()


@pytest.fixture(autouse=True)
def register_embed_demo_model(embed_demo, mock_model, async_mock_model):
    class MockModelsPlugin:
        __name__ = "MockModelsPlugin"

        @llm.hookimpl
        def register_embedding_models(self, register):
            register(embed_demo)
            register(EmbedBinaryOnly())
            register(EmbedTextOnly())

        @llm.hookimpl
        def register_models(self, register):
            register(mock_model, async_model=async_mock_model)

    pm.register(MockModelsPlugin(), name="undo-mock-models-plugin")
    try:
        yield
    finally:
        pm.unregister(name="undo-mock-models-plugin")


@pytest.fixture(autouse=True)
def register_echo_model():
    class EchoModelPlugin:
        __name__ = "EchoModelPlugin"

        @llm.hookimpl
        def register_models(self, register):
            register(llm_echo.Echo(), llm_echo.EchoAsync())

    pm.register(EchoModelPlugin(), name="undo-EchoModelPlugin")
    try:
        yield
    finally:
        pm.unregister(name="undo-EchoModelPlugin")


@pytest.fixture
def mocked_openai_chat(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


@pytest.fixture
def mocked_openai_chat_returning_fenced_code(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [
                {
                    "message": {
                        "content": "Code:\n\n````javascript\nfunction foo() {\n  return 'bar';\n}\n````\nDone.",
                    }
                }
            ],
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


def stream_events():
    for delta, finish_reason in (
        ({"role": "assistant", "content": ""}, None),
        ({"content": "Hi"}, None),
        ({"content": "."}, None),
        ({}, "stop"),
    ):
        yield "data: {}\n\n".format(
            json.dumps(
                {
                    "id": "chat-1",
                    "object": "chat.completion.chunk",
                    "created": 1695096940,
                    "model": "gpt-3.5-turbo-0613",
                    "choices": [
                        {"index": 0, "delta": delta, "finish_reason": finish_reason}
                    ],
                }
            )
        ).encode("utf-8")
    yield "data: [DONE]\n\n".encode("utf-8")


@pytest.fixture
def mocked_openai_chat_stream(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        stream=IteratorStream(stream_events()),
        headers={"Content-Type": "text/event-stream"},
    )


@pytest.fixture
def mocked_openai_completion(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/completions",
        json={
            "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
            "object": "text_completion",
            "created": 1589478378,
            "model": "gpt-3.5-turbo-instruct",
            "choices": [
                {
                    "text": "\n\nThis is indeed a test",
                    "index": 0,
                    "logprobs": None,
                    "finish_reason": "length",
                }
            ],
            "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12},
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


def stream_completion_events():
    choices_chunks = [
        [
            {
                "text": "\n\n",
                "index": 0,
                "logprobs": {
                    "tokens": ["\n\n"],
                    "token_logprobs": [-0.6],
                    "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}],
                    "text_offset": [16],
                },
                "finish_reason": None,
            }
        ],
        [
            {
                "text": "Hi",
                "index": 0,
                "logprobs": {
                    "tokens": ["Hi"],
                    "token_logprobs": [-1.1],
                    "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}],
                    "text_offset": [18],
                },
                "finish_reason": None,
            }
        ],
        [
            {
                "text": ".",
                "index": 0,
                "logprobs": {
                    "tokens": ["."],
                    "token_logprobs": [-1.1],
                    "top_logprobs": [{".": -1.1, "!": -0.9}],
                    "text_offset": [20],
                },
                "finish_reason": None,
            }
        ],
        [
            {
                "text": "",
                "index": 0,
                "logprobs": {
                    "tokens": [],
                    "token_logprobs": [],
                    "top_logprobs": [],
                    "text_offset": [],
                },
                "finish_reason": "stop",
            }
        ],
    ]

    for choices in choices_chunks:
        yield "data: {}\n\n".format(
            json.dumps(
                {
                    "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
                    "object": "text_completion",
                    "created": 1695097702,
                    "choices": choices,
                    "model": "gpt-3.5-turbo-instruct",
                }
            )
        ).encode("utf-8")
    yield "data: [DONE]\n\n".encode("utf-8")


@pytest.fixture
def mocked_openai_completion_logprobs_stream(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/completions",
        stream=IteratorStream(stream_completion_events()),
        headers={"Content-Type": "text/event-stream"},
    )
    return httpx_mock


@pytest.fixture
def mocked_openai_completion_logprobs(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/completions",
        json={
            "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
            "object": "text_completion",
            "created": 1695097747,
            "model": "gpt-3.5-turbo-instruct",
            "choices": [
                {
                    "text": "\n\nHi.",
                    "index": 0,
                    "logprobs": {
                        "tokens": ["\n\n", "Hi", "1"],
                        "token_logprobs": [-0.6, -1.1, -0.9],
                        "top_logprobs": [
                            {"\n\n": -0.6, "\n": -1.9},
                            {"Hi": -1.1, "Hello": -0.7},
                            {".": -0.9, "!": -1.1},
                        ],
                        "text_offset": [16, 18, 20],
                    },
                    "finish_reason": "stop",
                }
            ],
            "usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8},
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


@pytest.fixture
def mocked_localai(httpx_mock):
    httpx_mock.add_response(
        method="POST",
        url="http://localai.localhost/chat/completions",
        json={
            "model": "orca",
            "usage": {},
            "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
        },
        headers={"Content-Type": "application/json"},
    )
    httpx_mock.add_response(
        method="POST",
        url="http://localai.localhost/completions",
        json={
            "model": "completion-babbage",
            "usage": {},
            "choices": [{"text": "Hello"}],
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


@pytest.fixture
def collection():
    collection = llm.Collection("test", model_id="embed-demo")
    collection.embed(1, "hello world")
    collection.embed(2, "goodbye world")
    return collection


@pytest.fixture(scope="module")
def vcr_config():
    return {"filter_headers": ["Authorization"]}


def extract_braces(s):
    first = s.find("{")
    last = s.rfind("}")
    if first != -1 and last != -1 and first < last:
        return s[first : last + 1]
    return None


================================================
FILE: tests/test-llm-load-plugins.sh
================================================
#!/bin/bash
# This should only run in environments where both
# llm-cluster and llm-mistral are installed

PLUGINS=$(llm plugins)
echo "$PLUGINS" | jq 'any(.[]; .name == "llm-mistral")' | \
  grep -q true || ( \
    echo "Test failed: llm-mistral not found" && \
    exit 1 \
  )
# With the LLM_LOAD_PLUGINS we should not see that
PLUGINS2=$(LLM_LOAD_PLUGINS=llm-cluster llm plugins)
echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-mistral")' | \
  grep -q false || ( \
    echo "Test failed: llm-mistral should not have been loaded" && \
    exit 1 \
  )
echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-cluster")' | \
  grep -q true || ( \
    echo "Test llm-cluster should have been loaded" && \
    exit 1 \
  )
# With LLM_LOAD_PLUGINS='' we should see no plugins
PLUGINS3=$(LLM_LOAD_PLUGINS='' llm plugins)
echo "$PLUGINS3"| \
  grep -q '\[\]' || ( \
    echo "Test failed: plugins should have returned []" && \
    exit 1 \
  )


================================================
FILE: tests/test_aliases.py
================================================
from click.testing import CliRunner
from llm.cli import cli
import llm
import json
import pytest
import re


@pytest.mark.parametrize("model_id_or_alias", ("gpt-3.5-turbo", "chatgpt"))
def test_set_alias(model_id_or_alias):
    with pytest.raises(llm.UnknownModelError):
        llm.get_model("this-is-a-new-alias")
    llm.set_alias("this-is-a-new-alias", model_id_or_alias)
    assert llm.get_model("this-is-a-new-alias").model_id == "gpt-3.5-turbo"


def test_remove_alias():
    with pytest.raises(KeyError):
        llm.remove_alias("some-other-alias")
    llm.set_alias("some-other-alias", "gpt-3.5-turbo")
    assert llm.get_model("some-other-alias").model_id == "gpt-3.5-turbo"
    llm.remove_alias("some-other-alias")
    with pytest.raises(llm.UnknownModelError):
        llm.get_model("some-other-alias")


@pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"]))
def test_cli_aliases_list(args):
    llm.set_alias("e-demo", "embed-demo")
    runner = CliRunner()
    result = runner.invoke(cli, args)
    assert result.exit_code == 0
    for line in (
        "3.5         : gpt-3.5-turbo\n"
        "chatgpt     : gpt-3.5-turbo\n"
        "chatgpt-16k : gpt-3.5-turbo-16k\n"
        "3.5-16k     : gpt-3.5-turbo-16k\n"
        "4           : gpt-4\n"
        "gpt4        : gpt-4\n"
        "4-32k       : gpt-4-32k\n"
        "e-demo      : embed-demo (embedding)\n"
        "ada         : text-embedding-ada-002 (embedding)\n"
    ).split("\n"):
        line = line.strip()
        if not line:
            continue
        # Turn the whitespace into a regex
        regex = r"\s+".join(re.escape(part) for part in line.split())
        assert re.search(regex, result.output)


@pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"]))
def test_cli_aliases_list_json(args):
    llm.set_alias("e-demo", "embed-demo")
    runner = CliRunner()
    result = runner.invoke(cli, args + ["--json"])
    assert result.exit_code == 0
    assert (
        json.loads(result.output).items()
        >= {
            "3.5": "gpt-3.5-turbo",
            "chatgpt": "gpt-3.5-turbo",
            "chatgpt-16k": "gpt-3.5-turbo-16k",
            "3.5-16k": "gpt-3.5-turbo-16k",
            "4": "gpt-4",
            "gpt4": "gpt-4",
            "4-32k": "gpt-4-32k",
            "ada": "text-embedding-ada-002",
            "e-demo": "embed-demo",
        }.items()
    )


@pytest.mark.parametrize(
    "args,expected,expected_error",
    (
        (["foo", "bar"], {"foo": "bar"}, None),
        (["foo", "-q", "mo"], {"foo": "mock"}, None),
        (["foo", "-q", "mog"], None, "No model found matching query: mog"),
    ),
)
def test_cli_aliases_set(user_path, args, expected, expected_error):
    # Should be not aliases.json at start
    assert not (user_path / "aliases.json").exists()
    runner = CliRunner()
    result = runner.invoke(cli, ["aliases", "set"] + args)
    if not expected_error:
        assert result.exit_code == 0
        assert (user_path / "aliases.json").exists()
        assert json.loads((user_path / "aliases.json").read_text("utf-8")) == expected
    else:
        assert result.exit_code == 1
        assert result.output.strip() == f"Error: {expected_error}"


def test_cli_aliases_path(user_path):
    runner = CliRunner()
    result = runner.invoke(cli, ["aliases", "path"])
    assert result.exit_code == 0
    assert result.output.strip() == str(user_path / "aliases.json")


def test_cli_aliases_remove(user_path):
    (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8")
    runner = CliRunner()
    result = runner.invoke(cli, ["aliases", "remove", "foo"])
    assert result.exit_code == 0
    assert json.loads((user_path / "aliases.json").read_text("utf-8")) == {}


def test_cli_aliases_remove_invalid(user_path):
    (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8")
    runner = CliRunner()
    result = runner.invoke(cli, ["aliases", "remove", "invalid"])
    assert result.exit_code == 1
    assert result.output == "Error: No such alias: invalid\n"


@pytest.mark.parametrize("args", (["models"], ["models", "list"]))
def test_cli_aliases_are_registered(user_path, args):
    (user_path / "aliases.json").write_text(
        json.dumps({"foo": "bar", "turbo": "gpt-3.5-turbo"}), "utf-8"
    )
    runner = CliRunner()
    result = runner.invoke(cli, args)
    assert result.exit_code == 0
    # Check for model line only, without keys, as --options is not used
    assert "gpt-3.5-turbo (aliases: 3.5, chatgpt, turbo)" in result.output


================================================
FILE: tests/test_async.py
================================================
import llm
import pytest


@pytest.mark.asyncio
async def test_async_model(async_mock_model):
    gathered = []
    async_mock_model.enqueue(["hello world"])
    async for chunk in async_mock_model.prompt("hello"):
        gathered.append(chunk)
    assert gathered == ["hello world"]
    # Not as an iterator
    async_mock_model.enqueue(["hello world"])
    response = await async_mock_model.prompt("hello")
    text = await response.text()
    assert text == "hello world"
    assert isinstance(response, llm.AsyncResponse)
    usage = await response.usage()
    assert usage.input == 1
    assert usage.output == 1
    assert usage.details is None


@pytest.mark.asyncio
async def test_async_model_conversation(async_mock_model):
    async_mock_model.enqueue(["joke 1"])
    conversation = async_mock_model.conversation()
    response = await conversation.prompt("joke")
    text = await response.text()
    assert text == "joke 1"
    async_mock_model.enqueue(["joke 2"])
    response2 = await conversation.prompt("again")
    text2 = await response2.text()
    assert text2 == "joke 2"


@pytest.mark.asyncio
async def test_async_on_done(async_mock_model):
    async_mock_model.enqueue(["hello world"])
    response = await async_mock_model.prompt(prompt="hello")
    caught = []

    def done(response):
        caught.append(response)

    assert len(caught) == 0
    await response.on_done(done)
    await response.text()
    assert response._done
    assert len(caught) == 1


@pytest.mark.asyncio
async def test_async_conversation(async_mock_model):
    async_mock_model.enqueue(["one"])
    conversation = async_mock_model.conversation()
    response1 = await conversation.prompt("hi").text()
    async_mock_model.enqueue(["two"])
    response2 = await conversation.prompt("hi").text()
    assert response1 == "one"
    assert response2 == "two"


================================================
FILE: tests/test_attachments.py
================================================
from click.testing import CliRunner
import os
import sys
from unittest.mock import ANY
import llm
from llm import cli
import pytest

TINY_PNG = (
    b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xa6\x00\x00\x01\x1a"
    b"\x02\x03\x00\x00\x00\xe6\x99\xc4^\x00\x00\x00\tPLTE\xff\xff\xff"
    b"\x00\xff\x00\xfe\x01\x00\x12t\x01J\x00\x00\x00GIDATx\xda\xed\xd81\x11"
    b"\x000\x08\xc0\xc0.]\xea\xaf&Q\x89\x04V\xe0>\xf3+\xc8\x91Z\xf4\xa2\x08EQ\x14E"
    b"Q\x14EQ\x14EQ\xd4B\x91$I3\xbb\xbf\x08EQ\x14EQ\x14EQ\x14E\xd1\xa5"
    b"\xd4\x17\x91\xc6\x95\x05\x15\x0f\x9f\xc5\t\x9f\xa4\x00\x00\x00\x00IEND\xaeB`"
    b"\x82"
)

TINY_WAV = b"RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00D\xac\x00\x00"


@pytest.mark.parametrize(
    "attachment_type,attachment_content",
    [
        ("image/png", TINY_PNG),
        ("audio/wav", TINY_WAV),
    ],
)
def test_prompt_attachment(mock_model, logs_db, attachment_type, attachment_content):
    runner = CliRunner()
    mock_model.enqueue(["two boxes"])
    result = runner.invoke(
        cli.cli,
        ["prompt", "-m", "mock", "describe file", "-a", "-"],
        input=attachment_content,
        catch_exceptions=False,
    )
    assert result.exit_code == 0, result.output
    assert result.output == "two boxes\n"
    assert mock_model.history[0][0].attachments[0] == llm.Attachment(
        type=attachment_type, path=None, url=None, content=attachment_content, _id=ANY
    )

    # Check it was logged correctly
    conversations = list(logs_db["conversations"].rows)
    assert len(conversations) == 1
    conversation = conversations[0]
    assert conversation["model"] == "mock"
    assert conversation["name"] == "describe file"
    response = list(logs_db["responses"].rows)[0]
    attachment = list(logs_db["attachments"].rows)[0]
    assert attachment == {
        "id": ANY,
        "type": attachment_type,
        "path": None,
        "url": None,
        "content": attachment_content,
    }
    prompt_attachment = list(logs_db["prompt_attachments"].rows)[0]
    assert prompt_attachment["attachment_id"] == attachment["id"]
    assert prompt_attachment["response_id"] == response["id"]


def _count_open_fds():
    """Count open file descriptors (macOS and Linux only)."""
    if sys.platform == "darwin":
        fd_dir = "/dev/fd"
    elif sys.platform == "linux":
        fd_dir = "/proc/self/fd"
    else:
        return None
    return len(os.listdir(fd_dir))


@pytest.mark.skipif(
    sys.platform not in ("darwin", "linux"),
    reason="File descriptor counting only supported on macOS and Linux",
)
def test_attachment_no_file_descriptor_leak(tmp_path):
    """Verify reading attachments from paths doesn't leak file descriptors."""
    test_file = tmp_path / "test.bin"
    test_file.write_bytes(b"x" * 1000)

    # Warm up - first call may open other resources
    attachment = llm.Attachment(path=str(test_file))
    _ = attachment.id()
    _ = attachment.content_bytes()

    baseline = _count_open_fds()

    # Create many attachments and read them
    for _ in range(100):
        a = llm.Attachment(path=str(test_file))
        _ = a.id()
        _ = a.content_bytes()

    # File descriptor count should not have grown significantly
    assert _count_open_fds() <= baseline + 5


================================================
FILE: tests/test_chat.py
================================================
from click.testing import CliRunner
from unittest.mock import ANY
import json
import llm.cli
import pytest
import sqlite_utils
import sys
import textwrap


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_basic(mock_model, logs_db):
    runner = CliRunner()
    mock_model.enqueue(["one world"])
    mock_model.enqueue(["one again"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock"],
        input="Hi\nHi two\nquit\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\nType '!edit' to open your default editor and modify the prompt"
        "\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
        "\n> Hi"
        "\none world"
        "\n> Hi two"
        "\none again"
        "\n> quit"
        "\n"
    )
    # Should have logged
    conversations = list(logs_db["conversations"].rows)
    assert conversations[0] == {
        "id": ANY,
        "name": "Hi",
        "model": "mock",
    }
    conversation_id = conversations[0]["id"]
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Hi",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "one world",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Hi two",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "one again",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 2,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
    ]
    # Now continue that conversation
    mock_model.enqueue(["continued"])
    result2 = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "-c"],
        input="Continue\nquit\n",
        catch_exceptions=False,
    )
    assert result2.exit_code == 0
    assert result2.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\nType '!edit' to open your default editor and modify the prompt"
        "\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
        "\n> Continue"
        "\ncontinued"
        "\n> quit"
        "\n"
    )
    new_responses = list(
        logs_db.query(
            "select * from responses where id not in ({})".format(
                ", ".join("?" for _ in responses)
            ),
            [r["id"] for r in responses],
        )
    )
    assert new_responses == [
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Continue",
            "system": None,
            "prompt_json": None,
            "options_json": "{}",
            "response": "continued",
            "response_json": None,
            "conversation_id": conversation_id,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        }
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_system(mock_model, logs_db):
    runner = CliRunner()
    mock_model.enqueue(["I am mean"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "--system", "You are mean"],
        input="Hi\nquit\n",
    )
    assert result.exit_code == 0
    assert result.output == (
        "Chatting with mock"
        "\nType 'exit' or 'quit' to exit"
        "\nType '!multi' to enter multiple lines, then '!end' to finish"
        "\nType '!edit' to open your default editor and modify the prompt"
        "\nType '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments"
        "\n> Hi"
        "\nI am mean"
        "\n> quit"
        "\n"
    )
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Hi",
            "system": "You are mean",
            "prompt_json": None,
            "options_json": "{}",
            "response": "I am mean",
            "response_json": None,
            "conversation_id": ANY,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        }
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_options(mock_model, logs_db, user_path):
    options_path = user_path / "model_options.json"
    options_path.write_text(json.dumps({"mock": {"max_tokens": "5"}}), "utf-8")

    runner = CliRunner()
    mock_model.enqueue(["Default options response"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock"],
        input="Hi\nquit\n",
    )
    assert result.exit_code == 0
    mock_model.enqueue(["Override options response"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "--option", "max_tokens", "10"],
        input="Hi with override\nquit\n",
    )
    assert result.exit_code == 0
    responses = list(logs_db["responses"].rows)
    assert responses == [
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Hi",
            "system": None,
            "prompt_json": None,
            "options_json": '{"max_tokens": 5}',
            "response": "Default options response",
            "response_json": None,
            "conversation_id": ANY,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 1,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
        {
            "id": ANY,
            "model": "mock",
            "resolved_model": None,
            "prompt": "Hi with override",
            "system": None,
            "prompt_json": None,
            "options_json": '{"max_tokens": 10}',
            "response": "Override options response",
            "response_json": None,
            "conversation_id": ANY,
            "duration_ms": ANY,
            "datetime_utc": ANY,
            "input_tokens": 3,
            "output_tokens": 1,
            "token_details": None,
            "schema_id": None,
        },
    ]


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
@pytest.mark.parametrize(
    "input,expected",
    (
        (
            "Hi\n!multi\nthis is multiple lines\nuntil the !end\n!end\nquit\n",
            [
                {"prompt": "Hi", "response": "One\n"},
                {
                    "prompt": "this is multiple lines\nuntil the !end",
                    "response": "Two\n",
                },
            ],
        ),
        # quit should not work within !multi
        (
            "!multi\nthis is multiple lines\nquit\nuntil the !end\n!end\nquit\n",
            [
                {
                    "prompt": "this is multiple lines\nquit\nuntil the !end",
                    "response": "One\n",
                }
            ],
        ),
        # Try custom delimiter
        (
            "!multi abc\nCustom delimiter\n!end\n!end 123\n!end abc\nquit\n",
            [{"prompt": "Custom delimiter\n!end\n!end 123", "response": "One\n"}],
        ),
    ),
)
def test_chat_multi(mock_model, logs_db, input, expected):
    runner = CliRunner()
    mock_model.enqueue(["One\n"])
    mock_model.enqueue(["Two\n"])
    mock_model.enqueue(["Three\n"])
    result = runner.invoke(
        llm.cli.cli, ["chat", "-m", "mock", "--option", "max_tokens", "10"], input=input
    )
    assert result.exit_code == 0
    rows = list(logs_db["responses"].rows_where(select="prompt, response"))
    assert rows == expected


@pytest.mark.parametrize("custom_database_path", (False, True))
def test_llm_chat_creates_log_database(tmpdir, monkeypatch, custom_database_path):
    user_path = tmpdir / "user"
    custom_db_path = tmpdir / "custom_log.db"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    runner = CliRunner()
    args = ["chat", "-m", "mock"]
    if custom_database_path:
        args.extend(["--database", str(custom_db_path)])
    result = runner.invoke(
        llm.cli.cli,
        args,
        catch_exceptions=False,
        input="Hi\nHi two\nquit\n",
    )
    assert result.exit_code == 0
    # Should have created user_path and put a logs.db in it
    if custom_database_path:
        assert custom_db_path.exists()
        db_path = str(custom_db_path)
    else:
        assert (user_path / "logs.db").exists()
        db_path = str(user_path / "logs.db")
    assert sqlite_utils.Database(db_path)["responses"].count == 2


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_tools(logs_db):
    runner = CliRunner()
    functions = textwrap.dedent("""
    def upper(text: str) -> str:
        "Convert text to upper case"
        return text.upper()                         
    """)
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "echo", "--functions", functions],
        input="\n".join(
            [
                json.dumps(
                    {
                        "prompt": "Convert hello to uppercase",
                        "tool_calls": [
                            {"name": "upper", "arguments": {"text": "hello"}}
                        ],
                    }
                ),
                "quit",
            ]
        ),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == (
        "Chatting with echo\n"
        "Type 'exit' or 'quit' to exit\n"
        "Type '!multi' to enter multiple lines, then '!end' to finish\n"
        "Type '!edit' to open your default editor and modify the prompt\n"
        "Type '!fragment <my_fragment> [<another_fragment> ...]' to insert one or more fragments\n"
        '> {"prompt": "Convert hello to uppercase", "tool_calls": [{"name": "upper", '
        '"arguments": {"text": "hello"}}]}\n'
        "{\n"
        '  "prompt": "Convert hello to uppercase",\n'
        '  "system": "",\n'
        '  "attachments": [],\n'
        '  "stream": true,\n'
        '  "previous": []\n'
        "}{\n"
        '  "prompt": "",\n'
        '  "system": "",\n'
        '  "attachments": [],\n'
        '  "stream": true,\n'
        '  "previous": [\n'
        "    {\n"
        '      "prompt": "{\\"prompt\\": \\"Convert hello to uppercase\\", '
        '\\"tool_calls\\": [{\\"name\\": \\"upper\\", \\"arguments\\": {\\"text\\": '
        '\\"hello\\"}}]}"\n'
        "    }\n"
        "  ],\n"
        '  "tool_results": [\n'
        "    {\n"
        '      "name": "upper",\n'
        '      "output": "HELLO",\n'
        '      "tool_call_id": null\n'
        "    }\n"
        "  ]\n"
        "}\n"
        "> quit\n"
    )


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_fragments(tmpdir):
    path1 = str(tmpdir / "frag1.txt")
    path2 = str(tmpdir / "frag2.txt")
    with open(path1, "w") as fp:
        fp.write("one")
    with open(path2, "w") as fp:
        fp.write("two")
    runner = CliRunner()
    output = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "echo", "-f", path1],
        input=("hi\n!fragment {}\nquit\n".format(path2)),
    ).output
    assert '"prompt": "one' in output
    assert '"prompt": "two"' in output


================================================
FILE: tests/test_chat_templates.py
================================================
from click.testing import CliRunner
import sys
import llm.cli
import pytest


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_template_system_only_no_duplicate_prompt(
    mock_model, logs_db, templates_path
):
    # Template that only sets a system prompt, no user prompt
    (templates_path / "wild-french.yaml").write_text(
        "system: Speak in French\n", "utf-8"
    )

    runner = CliRunner()
    mock_model.enqueue(["Bonjour !"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "-t", "wild-french"],
        input="hi\nquit\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0

    # Ensure the logged prompt is not duplicated (no "hi\nhi")
    rows = list(logs_db["responses"].rows)
    assert len(rows) == 1
    assert rows[0]["prompt"] == "hi"
    assert rows[0]["system"] == "Speak in French"


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_system_fragments_only_first_turn(tmpdir, mock_model, logs_db):
    # Create a system fragment file
    sys_frag_path = str(tmpdir / "sys.txt")
    with open(sys_frag_path, "w", encoding="utf-8") as fp:
        fp.write("System fragment content")

    runner = CliRunner()
    # Two responses queued for two turns
    mock_model.enqueue(["first"])
    mock_model.enqueue(["second"])
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-m", "mock", "--system-fragment", sys_frag_path],
        input="Hi\nHi two\nquit\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0

    # Verify only the first response has the system fragment
    responses = list(logs_db["responses"].rows)
    assert len(responses) == 2
    first_id = responses[0]["id"]
    second_id = responses[1]["id"]

    sys_frags = list(logs_db["system_fragments"].rows)
    # Exactly one system fragment row, attached to the first response only
    assert len(sys_frags) == 1
    assert sys_frags[0]["response_id"] == first_id
    assert sys_frags[0]["response_id"] != second_id


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_chat_template_loads_tools_into_logs(logs_db, templates_path):
    # Template that specifies tools; ensure chat picks them up
    (templates_path / "mytools.yaml").write_text(
        "model: echo\n" "tools:\n" "- llm_version\n" "- llm_time\n",
        "utf-8",
    )

    runner = CliRunner()
    result = runner.invoke(
        llm.cli.cli,
        ["chat", "-t", "mytools"],
        input="hi\nquit\n",
        catch_exceptions=False,
    )
    assert result.exit_code == 0

    # Verify a single response was logged for the conversation
    responses = list(logs_db["responses"].rows)
    assert len(responses) == 1
    assert responses[0]["prompt"] == "hi"
    response_id = responses[0]["id"]

    # Tools from the template should be recorded against that response
    rows = list(
        logs_db.query(
            """
            select tools.name from tools
            join tool_responses tr on tr.tool_id = tools.id
            where tr.response_id = ?
            order by tools.name
            """,
            [response_id],
        )
    )
    assert [r["name"] for r in rows] == ["llm_time", "llm_version"]


================================================
FILE: tests/test_cli_openai_models.py
================================================
from click.testing import CliRunner
from llm.cli import cli
import pytest
import sqlite_utils


@pytest.fixture
def mocked_models(httpx_mock):
    httpx_mock.add_response(
        method="GET",
        url="https://api.openai.com/v1/models",
        json={
            "data": [
                {
                    "id": "ada:2020-05-03",
                    "object": "model",
                    "created": 1588537600,
                    "owned_by": "openai",
                },
                {
                    "id": "babbage:2020-05-03",
                    "object": "model",
                    "created": 1588537600,
                    "owned_by": "openai",
                },
            ]
        },
        headers={"Content-Type": "application/json"},
    )
    return httpx_mock


def test_openai_models(mocked_models):
    runner = CliRunner()
    result = runner.invoke(cli, ["openai", "models", "--key", "x"])
    assert result.exit_code == 0
    assert result.output == (
        "id                    owned_by    created                  \n"
        "ada:2020-05-03        openai      2020-05-03T20:26:40+00:00\n"
        "babbage:2020-05-03    openai      2020-05-03T20:26:40+00:00\n"
    )


def test_openai_options_min_max():
    options = {
        "temperature": [0, 2],
        "top_p": [0, 1],
        "frequency_penalty": [-2, 2],
        "presence_penalty": [-2, 2],
    }
    runner = CliRunner()

    for option, [min_val, max_val] in options.items():
        result = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "-10"])
        assert result.exit_code == 1
        assert f"greater than or equal to {min_val}" in result.output
        result2 = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "10"])
        assert result2.exit_code == 1
        assert f"less than or equal to {max_val}" in result2.output


@pytest.mark.parametrize("model", ("gpt-4o-mini", "gpt-4o-audio-preview"))
@pytest.mark.parametrize("filetype", ("mp3", "wav"))
def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype):
    httpx_mock.add_response(
        method="HEAD",
        url=f"https://www.example.com/example.{filetype}",
        content=b"binary-data",
        headers={"Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav"},
    )
    if model == "gpt-4o-audio-preview":
        httpx_mock.add_response(
            method="POST",
            # chat completion request
            url="https://api.openai.com/v1/chat/completions",
            json={
                "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh",
                "object": "chat.completion",
                "created": 1730871958,
                "model": "gpt-4o-audio-preview-2024-10-01",
                "choices": [
                    {
                        "index": 0,
                        "message": {
                            "role": "assistant",
                            "content": "Why did the pelican get kicked out of the restaurant?\n\nBecause he had a big bill and no way to pay it!",
                            "refusal": None,
                        },
                        "finish_reason": "stop",
                    }
                ],
                "usage": {
                    "prompt_tokens": 55,
                    "completion_tokens": 25,
                    "total_tokens": 80,
                    "prompt_tokens_details": {
                        "cached_tokens": 0,
                        "audio_tokens": 44,
                        "text_tokens": 11,
                        "image_tokens": 0,
                    },
                    "completion_tokens_details": {
                        "reasoning_tokens": 0,
                        "audio_tokens": 0,
                        "text_tokens": 25,
                        "accepted_prediction_tokens": 0,
                        "rejected_prediction_tokens": 0,
                    },
                },
                "system_fingerprint": "fp_49254d0e9b",
            },
            headers={"Content-Type": "application/json"},
        )
        httpx_mock.add_response(
            method="GET",
            url=f"https://www.example.com/example.{filetype}",
            content=b"binary-data",
            headers={
                "Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav"
            },
        )
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "-m",
            model,
            "-a",
            f"https://www.example.com/example.{filetype}",
            "--no-stream",
            "--key",
            "x",
        ],
    )
    if model == "gpt-4o-audio-preview":
        assert result.exit_code == 0
        assert result.output == (
            "Why did the pelican get kicked out of the restaurant?\n\n"
            "Because he had a big bill and no way to pay it!\n"
        )
    else:
        assert result.exit_code == 1
        long = "audio/mpeg" if filetype == "mp3" else "audio/wav"
        assert (
            f"This model does not support attachments of type '{long}'" in result.output
        )


@pytest.mark.parametrize("async_", (False, True))
@pytest.mark.parametrize("usage", (None, "-u", "--usage"))
def test_gpt4o_mini_sync_and_async(monkeypatch, tmpdir, httpx_mock, async_, usage):
    user_path = tmpdir / "user_dir"
    log_db = user_path / "logs.db"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    assert not log_db.exists()
    httpx_mock.add_response(
        method="POST",
        # chat completion request
        url="https://api.openai.com/v1/chat/completions",
        json={
            "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh",
            "object": "chat.completion",
            "created": 1730871958,
            "model": "gpt-4o-mini",
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "Ho ho ho",
                        "refusal": None,
                    },
                    "finish_reason": "stop",
                }
            ],
            "usage": {
                "prompt_tokens": 1000,
                "completion_tokens": 2000,
                "total_tokens": 12,
            },
            "system_fingerprint": "fp_49254d0e9b",
        },
        headers={"Content-Type": "application/json"},
    )
    runner = CliRunner(mix_stderr=False)
    args = ["-m", "gpt-4o-mini", "--key", "x", "--no-stream"]
    if usage:
        args.append(usage)
    if async_:
        args.append("--async")
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == "Ho ho ho\n"
    if usage:
        assert result.stderr == "Token usage: 1,000 input, 2,000 output\n"
    # Confirm it was correctly logged
    assert log_db.exists()
    db = sqlite_utils.Database(str(log_db))
    assert db["responses"].count == 1
    row = next(db["responses"].rows)
    assert row["response"] == "Ho ho ho"


================================================
FILE: tests/test_cli_options.py
================================================
from click.testing import CliRunner
from llm.cli import cli
import pytest
import json


@pytest.mark.parametrize(
    "args,expected_options,expected_error",
    (
        (
            ["gpt-4o-mini", "temperature", "0.5"],
            {"gpt-4o-mini": {"temperature": "0.5"}},
            None,
        ),
        (
            ["gpt-4o-mini", "temperature", "invalid"],
            {},
            "Error: temperature\n  Input should be a valid number",
        ),
        (
            ["gpt-4o-mini", "not-an-option", "invalid"],
            {},
            "Extra inputs are not permitted",
        ),
    ),
)
def test_set_model_default_options(user_path, args, expected_options, expected_error):
    path = user_path / "model_options.json"
    assert not path.exists()
    runner = CliRunner()
    result = runner.invoke(cli, ["models", "options", "set"] + args)
    if not expected_error:
        assert result.exit_code == 0
        assert path.exists()
        data = json.loads(path.read_text("utf-8"))
        assert data == expected_options
    else:
        assert result.exit_code == 1
        assert expected_error in result.output


def test_model_options_list_and_show(user_path):
    (user_path / "model_options.json").write_text(
        json.dumps(
            {"gpt-4o-mini": {"temperature": 0.5}, "gpt-4o": {"temperature": 0.7}}
        ),
        "utf-8",
    )
    runner = CliRunner()
    result = runner.invoke(cli, ["models", "options", "list"])
    assert result.exit_code == 0
    assert (
        result.output
        == "gpt-4o-mini:\n  temperature: 0.5\ngpt-4o:\n  temperature: 0.7\n"
    )
    result = runner.invoke(cli, ["models", "options", "show", "gpt-4o-mini"])
    assert result.exit_code == 0
    assert result.output == "temperature: 0.5\n"


def test_model_options_clear(user_path):
    path = user_path / "model_options.json"
    path.write_text(
        json.dumps(
            {
                "gpt-4o-mini": {"temperature": 0.5},
                "gpt-4o": {"temperature": 0.7, "top_p": 0.9},
            }
        ),
        "utf-8",
    )
    assert path.exists()
    runner = CliRunner()
    # Clear all for gpt-4o-mini
    result = runner.invoke(cli, ["models", "options", "clear", "gpt-4o-mini"])
    assert result.exit_code == 0
    # Clear just top_p for gpt-4o
    result2 = runner.invoke(cli, ["models", "options", "clear", "gpt-4o", "top_p"])
    assert result2.exit_code == 0
    data = json.loads(path.read_text("utf-8"))
    assert data == {"gpt-4o": {"temperature": 0.7}}


def test_prompt_uses_model_options(user_path):
    path = user_path / "model_options.json"
    path.write_text("{}", "utf-8")
    # Prompt should not use an option
    runner = CliRunner()
    result = runner.invoke(cli, ["-m", "echo", "prompt"])
    assert result.exit_code == 0
    assert json.loads(result.output) == {
        "prompt": "prompt",
        "system": "",
        "attachments": [],
        "stream": True,
        "previous": [],
    }

    # Now set an option
    path.write_text(json.dumps({"echo": {"example_bool": True}}), "utf-8")

    result2 = runner.invoke(cli, ["-m", "echo", "prompt"])
    assert result2.exit_code == 0
    assert json.loads(result2.output) == {
        "prompt": "prompt",
        "system": "",
        "attachments": [],
        "stream": True,
        "previous": [],
        "options": {"example_bool": True},
    }

    # Option can be over-ridden
    result3 = runner.invoke(
        cli, ["-m", "echo", "prompt", "-o", "example_bool", "false"]
    )
    assert result3.exit_code == 0
    assert json.loads(result3.output) == {
        "prompt": "prompt",
        "system": "",
        "attachments": [],
        "stream": True,
        "previous": [],
        "options": {"example_bool": False},
    }
    # Using an alias should also pick up that option
    aliases_path = user_path / "aliases.json"
    aliases_path.write_text('{"e": "echo"}', "utf-8")
    result4 = runner.invoke(cli, ["-m", "e", "prompt"])
    assert result4.exit_code == 0
    assert json.loads(result4.output) == {
        "prompt": "prompt",
        "system": "",
        "attachments": [],
        "stream": True,
        "previous": [],
        "options": {"example_bool": True},
    }


================================================
FILE: tests/test_embed.py
================================================
import json
import llm
from llm.embeddings import Entry
import pytest
import sqlite_utils
from unittest.mock import ANY


def test_demo_plugin():
    model = llm.get_embedding_model("embed-demo")
    assert model.embed("hello world") == [5, 5] + [0] * 14


@pytest.mark.parametrize(
    "batch_size,expected_batches",
    (
        (None, 100),
        (10, 100),
    ),
)
def test_embed_huge_list(batch_size, expected_batches):
    model = llm.get_embedding_model("embed-demo")
    huge_list = ("hello {}".format(i) for i in range(1000))
    kwargs = {}
    if batch_size:
        kwargs["batch_size"] = batch_size
    results = model.embed_multi(huge_list, **kwargs)
    assert repr(type(results)) == "<class 'generator'>"
    first_twos = {}
    for result in results:
        key = (result[0], result[1])
        first_twos[key] = first_twos.get(key, 0) + 1
    assert first_twos == {(5, 1): 10, (5, 2): 90, (5, 3): 900}
    assert model.batch_count == expected_batches


def test_embed_store(collection):
    collection.embed("3", "hello world again", store=True)
    assert collection.db["embeddings"].count == 3
    assert (
        next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["content"]
        == "hello world again"
    )


def test_embed_metadata(collection):
    collection.embed("3", "hello yet again", metadata={"foo": "bar"}, store=True)
    assert collection.db["embeddings"].count == 3
    assert json.loads(
        next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["metadata"]
    ) == {"foo": "bar"}
    entry = collection.similar("hello yet again")[0]
    assert entry.id == "3"
    assert entry.metadata == {"foo": "bar"}
    assert entry.content == "hello yet again"


def test_collection(collection):
    assert collection.id == 1
    assert collection.count() == 2
    # Check that the embeddings are there
    rows = list(collection.db["embeddings"].rows)
    assert rows == [
        {
            "collection_id": 1,
            "id": "1",
            "embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
            "content": None,
            "content_blob": None,
            "content_hash": collection.content_hash("hello world"),
            "metadata": None,
            "updated": ANY,
        },
        {
            "collection_id": 1,
            "id": "2",
            "embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
            "content": None,
            "content_blob": None,
            "content_hash": collection.content_hash("goodbye world"),
            "metadata": None,
            "updated": ANY,
        },
    ]
    assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0


def test_similar(collection):
    results = list(collection.similar("hello world"))
    assert results == [
        Entry(id="1", score=pytest.approx(0.9999999999999999)),
        Entry(id="2", score=pytest.approx(0.9863939238321437)),
    ]


def test_similar_prefixed(collection):
    results = list(collection.similar("hello world", prefix="2"))
    assert results == [
        Entry(id="2", score=pytest.approx(0.9863939238321437)),
    ]


def test_similar_by_id(collection):
    results = list(collection.similar_by_id("1"))
    assert results == [
        Entry(id="2", score=pytest.approx(0.9863939238321437)),
    ]


@pytest.mark.parametrize(
    "batch_size,expected_batches",
    (
        (None, 100),
        (5, 200),
    ),
)
@pytest.mark.parametrize("with_metadata", (False, True))
def test_embed_multi(with_metadata, batch_size, expected_batches):
    db = sqlite_utils.Database(memory=True)
    collection = llm.Collection("test", db, model_id="embed-demo")
    model = collection.model()
    assert getattr(model, "batch_count", 0) == 0
    ids_and_texts = ((str(i), "hello {}".format(i)) for i in range(1000))
    kwargs = {}
    if batch_size is not None:
        kwargs["batch_size"] = batch_size
    if with_metadata:
        ids_and_texts = ((id, text, {"meta": id}) for id, text in ids_and_texts)
        collection.embed_multi_with_metadata(ids_and_texts, **kwargs)
    else:
        # Exercise store=True here too
        collection.embed_multi(ids_and_texts, store=True, **kwargs)
    rows = list(db["embeddings"].rows)
    assert len(rows) == 1000
    rows_with_metadata = [row for row in rows if row["metadata"] is not None]
    rows_with_content = [row for row in rows if row["content"] is not None]
    if with_metadata:
        assert len(rows_with_metadata) == 1000
        assert len(rows_with_content) == 0
    else:
        assert len(rows_with_metadata) == 0
        assert len(rows_with_content) == 1000
    # Every row should have content_hash set
    assert all(row["content_hash"] is not None for row in rows)
    # Check batch count
    assert collection.model().batch_count == expected_batches


def test_collection_delete(collection):
    db = collection.db
    assert db["embeddings"].count == 2
    assert db["collections"].count == 1
    collection.delete()
    assert db["embeddings"].count == 0
    assert db["collections"].count == 0


def test_binary_only_and_text_only_embedding_models():
    binary_only = llm.get_embedding_model("embed-binary-only")
    text_only = llm.get_embedding_model("embed-text-only")

    assert binary_only.supports_binary
    assert not binary_only.supports_text
    assert not text_only.supports_binary
    assert text_only.supports_text

    with pytest.raises(ValueError):
        binary_only.embed("hello world")

    binary_only.embed(b"hello world")

    with pytest.raises(ValueError):
        text_only.embed(b"hello world")

    text_only.embed("hello world")

    # Try the multi versions too
    # Have to call list() on this or the generator is not evaluated
    with pytest.raises(ValueError):
        list(binary_only.embed_multi(["hello world"]))

    list(binary_only.embed_multi([b"hello world"]))

    with pytest.raises(ValueError):
        list(text_only.embed_multi([b"hello world"]))

    list(text_only.embed_multi(["hello world"]))


================================================
FILE: tests/test_embed_cli.py
================================================
from click.testing import CliRunner
from llm.cli import cli
from llm import Collection
import json
import pathlib
import pytest
import sqlite_utils
import sys
from unittest.mock import ANY


@pytest.mark.parametrize(
    "format_,expected",
    (
        ("json", "[5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n"),
        (
            "base64",
            (
                "AACgQAAAoEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
                "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==\n"
            ),
        ),
        (
            "hex",
            (
                "0000a0400000a04000000000000000000000000000000000000000000"
                "000000000000000000000000000000000000000000000000000000000"
                "00000000000000\n"
            ),
        ),
        (
            "blob",
            (
                b"\x00\x00\xef\xbf\xbd@\x00\x00\xef\xbf\xbd@\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n"
            ).decode("utf-8"),
        ),
    ),
)
@pytest.mark.parametrize("scenario", ("argument", "file", "stdin"))
def test_embed_output_format(tmpdir, format_, expected, scenario):
    runner = CliRunner()
    args = ["embed", "--format", format_, "-m", "embed-demo"]
    input = None
    if scenario == "argument":
        args.extend(["-c", "hello world"])
    elif scenario == "file":
        path = tmpdir / "input.txt"
        path.write_text("hello world", "utf-8")
        args.extend(["-i", str(path)])
    elif scenario == "stdin":
        input = "hello world"
        args.extend(["-i", "-"])
    result = runner.invoke(cli, args, input=input)
    assert result.exit_code == 0
    assert result.output == expected


@pytest.mark.parametrize(
    "args,expected_error",
    ((["-c", "Content", "stories"], "Must provide both collection and id"),),
)
def test_embed_errors(args, expected_error):
    runner = CliRunner()
    result = runner.invoke(cli, ["embed"] + args)
    assert result.exit_code == 1
    assert expected_error in result.output


@pytest.mark.parametrize(
    "metadata,metadata_error",
    (
        (None, None),
        ('{"foo": "bar"}', None),
        ('{"foo": [1, 2, 3]}', None),
        ("[1, 2, 3]", "metadata must be a JSON object"),  # Must be a dictionary
        ('{"foo": "incomplete}', "metadata must be valid JSON"),
    ),
)
def test_embed_store(user_path, metadata, metadata_error):
    embeddings_db = user_path / "embeddings.db"
    assert not embeddings_db.exists()
    runner = CliRunner()
    result = runner.invoke(cli, ["embed", "-c", "hello", "-m", "embed-demo"])
    assert result.exit_code == 0
    # Should not have created the table
    assert not embeddings_db.exists()
    # Now run it to store
    args = ["embed", "-c", "hello", "-m", "embed-demo", "items", "1"]
    if metadata is not None:
        args.extend(("--metadata", metadata))
    result = runner.invoke(cli, args)
    if metadata_error:
        # Should have returned an error message about invalid metadata
        assert result.exit_code == 2
        assert metadata_error in result.output
        return
    # No error, should have succeeded and stored the data
    assert result.exit_code == 0
    assert embeddings_db.exists()
    # Check the contents
    db = sqlite_utils.Database(str(embeddings_db))
    rows = list(db["collections"].rows)
    assert rows == [{"id": 1, "name": "items", "model": "embed-demo"}]
    expected_metadata = None
    if metadata and not metadata_error:
        expected_metadata = metadata
    rows = list(db["embeddings"].rows)
    assert rows == [
        {
            "collection_id": 1,
            "id": "1",
            "embedding": (
                b"\x00\x00\xa0@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00"
            ),
            "content": None,
            "content_blob": None,
            "content_hash": Collection.content_hash("hello"),
            "metadata": expected_metadata,
            "updated": ANY,
        }
    ]
    # Should show up in 'llm collections list'
    for is_json in (False, True):
        args = ["collections"]
        if is_json:
            args.extend(["list", "--json"])
        result2 = runner.invoke(cli, args)
        assert result2.exit_code == 0
        if is_json:
            assert json.loads(result2.output) == [
                {"name": "items", "model": "embed-demo", "num_embeddings": 1}
            ]
        else:
            assert result2.output == "items: embed-demo\n  1 embedding\n"

    # And test deleting it too
    result = runner.invoke(cli, ["collections", "delete", "items"])
    assert result.exit_code == 0
    assert db["collections"].count == 0
    assert db["embeddings"].count == 0


def test_embed_store_binary(user_path):
    runner = CliRunner()
    args = ["embed", "-m", "embed-demo", "items", "2", "--binary", "--store"]
    result = runner.invoke(cli, args, input=b"\x00\x01\x02")
    assert result.exit_code == 0
    db = sqlite_utils.Database(str(user_path / "embeddings.db"))
    rows = list(db["embeddings"].rows)
    assert rows == [
        {
            "collection_id": 1,
            "id": "2",
            "embedding": (
                b"\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
            ),
            "content": None,
            "content_blob": b"\x00\x01\x02",
            "content_hash": b'\xb9_g\xf6\x1e\xbb\x03a\x96"\xd7\x98\xf4_\xc2\xd3',
            "metadata": None,
            "updated": ANY,
        }
    ]


def test_collection_delete_errors(user_path):
    db = sqlite_utils.Database(str(user_path / "embeddings.db"))
    collection = Collection("items", db, model_id="embed-demo")
    collection.embed("1", "hello")
    assert db["collections"].count == 1
    assert db["embeddings"].count == 1
    runner = CliRunner()
    result = runner.invoke(
        cli, ["collections", "delete", "does-not-exist"], catch_exceptions=False
    )
    assert result.exit_code == 1
    assert "Collection does not exist" in result.output
    assert db["collections"].count == 1


@pytest.mark.parametrize(
    "args,expected_error",
    (
        ([], "Missing argument 'COLLECTION'"),
        (["badcollection", "-c", "content"], "Collection does not exist"),
        (["demo", "bad-id"], "ID not found in collection"),
    ),
)
def test_similar_errors(args, expected_error, user_path_with_embeddings):
    runner = CliRunner()
    result = runner.invoke(cli, ["similar"] + args, catch_exceptions=False)
    assert result.exit_code != 0
    assert expected_error in result.output


def test_similar_by_id_cli(user_path_with_embeddings):
    runner = CliRunner()
    result = runner.invoke(cli, ["similar", "demo", "1"], catch_exceptions=False)
    assert result.exit_code == 0
    assert json.loads(result.output) == {
        "id": "2",
        "score": pytest.approx(0.9863939238321437),
        "content": "goodbye world",
        "metadata": None,
    }


@pytest.mark.parametrize("option", ("-p", "--plain"))
def test_similar_by_id_cli_output_plain(user_path_with_embeddings, option):
    runner = CliRunner()
    result = runner.invoke(
        cli, ["similar", "demo", "1", option], catch_exceptions=False
    )
    assert result.exit_code == 0
    # Replace score with a placeholder
    output = result.output.split("(")[0] + "(score)" + result.output.split(")")[1]
    assert output == "2 (score)\n\n  goodbye world\n\n"


@pytest.mark.parametrize("scenario", ("argument", "file", "stdin"))
def test_similar_by_content_cli(tmpdir, user_path_with_embeddings, scenario):
    runner = CliRunner()
    args = ["similar", "demo"]
    input = None
    if scenario == "argument":
        args.extend(["-c", "hello world"])
    elif scenario == "file":
        path = tmpdir / "content.txt"
        path.write_text("hello world", "utf-8")
        args.extend(["-i", str(path)])
    elif scenario == "stdin":
        input = "hello world"
        args.extend(["-i", "-"])
    result = runner.invoke(cli, args, input=input, catch_exceptions=False)
    assert result.exit_code == 0
    lines = [line for line in result.output.splitlines() if line.strip()]
    assert len(lines) == 2
    assert json.loads(lines[0]) == {
        "id": "1",
        "score": pytest.approx(0.9999999999999999),
        "content": "hello world",
        "metadata": None,
    }
    assert json.loads(lines[1]) == {
        "id": "2",
        "score": pytest.approx(0.9863939238321437),
        "content": "goodbye world",
        "metadata": None,
    }


@pytest.mark.parametrize(
    "prefix,expected_result",
    (
        (
            1,
            {
                "id": "1",
                "score": pytest.approx(0.7071067811865475),
                "content": "hello world",
                "metadata": None,
            },
        ),
        (
            2,
            {
                "id": "2",
                "score": pytest.approx(0.8137334712067349),
                "content": "goodbye world",
                "metadata": None,
            },
        ),
    ),
)
def test_similar_by_content_prefixed(
    user_path_with_embeddings, prefix, expected_result
):
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["similar", "demo", "-c", "world", "--prefix", prefix, "-n", "1"],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert json.loads(result.output) == expected_result


@pytest.mark.parametrize("use_stdin", (False, True))
@pytest.mark.parametrize("prefix", (None, "prefix"))
@pytest.mark.parametrize("prepend", (None, "search_document: "))
@pytest.mark.parametrize(
    "filename,content",
    (
        ("phrases.csv", "id,phrase\n1,hello world\n2,goodbye world"),
        ("phrases.tsv", "id\tphrase\n1\thello world\n2\tgoodbye world"),
        (
            "phrases.jsonl",
            '{"id": 1, "phrase": "hello world"}\n{"id": 2, "phrase": "goodbye world"}',
        ),
        (
            "phrases.json",
            '[{"id": 1, "phrase": "hello world"}, {"id": 2, "phrase": "goodbye world"}]',
        ),
    ),
)
def test_embed_multi_file_input(tmpdir, use_stdin, prefix, prepend, filename, content):
    db_path = tmpdir / "embeddings.db"
    args = ["embed-multi", "phrases", "-d", str(db_path), "-m", "embed-demo"]
    input = None
    if use_stdin:
        input = content
        args.append("-")
    else:
        path = tmpdir / filename
        path.write_text(content, "utf-8")
        args.append(str(path))
    if prefix:
        args.extend(("--prefix", prefix))
    if prepend:
        args.extend(("--prepend", prepend))
    # Auto-detection can't detect JSON-nl, so make that explicit
    if filename.endswith(".jsonl"):
        args.extend(("--format", "nl"))
    runner = CliRunner()
    result = runner.invoke(cli, args, input=input, catch_exceptions=False)
    assert result.exit_code == 0
    # Check that everything was embedded correctly
    db = sqlite_utils.Database(str(db_path))
    assert db["embeddings"].count == 2
    ids = [row["id"] for row in db["embeddings"].rows]
    expected_ids = ["1", "2"]
    if prefix:
        expected_ids = ["prefix1", "prefix2"]
    assert ids == expected_ids


def test_embed_multi_files_binary_store(tmpdir):
    db_path = tmpdir / "embeddings.db"
    args = ["embed-multi", "binfiles", "-d", str(db_path), "-m", "embed-demo"]
    bin_path = tmpdir / "file.bin"
    bin_path.write(b"\x00\x01\x02")
    args.extend(("--files", str(tmpdir), "*.bin", "--store", "--binary"))
    runner = CliRunner()
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    db = sqlite_utils.Database(str(db_path))
    assert db["embeddings"].count == 1
    row = list(db["embeddings"].rows)[0]
    assert row == {
        "collection_id": 1,
        "id": "file.bin",
        "embedding": (
            b"\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
            b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
            b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
            b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        ),
        "content": None,
        "content_blob": b"\x00\x01\x02",
        "content_hash": b'\xb9_g\xf6\x1e\xbb\x03a\x96"\xd7\x98\xf4_\xc2\xd3',
        "metadata": None,
        "updated": ANY,
    }


@pytest.mark.parametrize("use_other_db", (True, False))
@pytest.mark.parametrize("prefix", (None, "prefix"))
@pytest.mark.parametrize("prepend", (None, "search_document: "))
def test_embed_multi_sql(tmpdir, use_other_db, prefix, prepend):
    db_path = str(tmpdir / "embeddings.db")
    db = sqlite_utils.Database(db_path)
    extra_args = []
    if use_other_db:
        db_path2 = str(tmpdir / "other.db")
        db = sqlite_utils.Database(db_path2)
        extra_args = ["--attach", "other", db_path2]

    if prefix:
        extra_args.extend(("--prefix", prefix))
    if prepend:
        extra_args.extend(("--prepend", prepend))

    db["content"].insert_all(
        [
            {"id": 1, "name": "cli", "description": "Command line interface"},
            {"id": 2, "name": "sql", "description": "Structured query language"},
        ],
        pk="id",
    )
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "embed-multi",
            "stuff",
            "-d",
            db_path,
            "--sql",
            "select * from content",
            "-m",
            "embed-demo",
            "--store",
        ]
        + extra_args,
    )
    assert result.exit_code == 0
    embeddings_db = sqlite_utils.Database(db_path)
    assert embeddings_db["embeddings"].count == 2
    rows = list(embeddings_db.query("select id, content from embeddings order by id"))
    assert rows == [
        {
            "id": (prefix or "") + "1",
            "content": (prepend or "") + "cli Command line interface",
        },
        {
            "id": (prefix or "") + "2",
            "content": (prepend or "") + "sql Structured query language",
        },
    ]


def test_embed_multi_batch_size(embed_demo, tmpdir):
    db_path = str(tmpdir / "data.db")
    runner = CliRunner()
    sql = """
    with recursive cte (id) as (
      select 1
      union all
      select id+1 from cte where id < 100
    )
    select id, 'Row ' || cast(id as text) as value from cte
    """
    assert getattr(embed_demo, "batch_count", 0) == 0
    result = runner.invoke(
        cli,
        [
            "embed-multi",
            "rows",
            "--sql",
            sql,
            "-d",
            db_path,
            "-m",
            "embed-demo",
            "--store",
            "--batch-size",
            "8",
        ],
    )
    assert result.exit_code == 0
    db = sqlite_utils.Database(db_path)
    assert db["embeddings"].count == 100
    assert embed_demo.batch_count == 13


@pytest.fixture
def multi_files(tmpdir):
    db_path = str(tmpdir / "files.db")
    files = tmpdir / "files"
    for filename, content in (
        ("file1.txt", b"hello world"),
        ("file2.txt", b"goodbye world"),
        ("nested/one.txt", b"one"),
        ("nested/two.txt", b"two"),
        ("nested/more/three.txt", b"three"),
        # This tests the fallback to latin-1 encoding:
        ("nested/more/ignored.ini", b"Has weird \x96 character"),
    ):
        path = pathlib.Path(files / filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_bytes(content)
    return db_path, tmpdir / "files"


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
@pytest.mark.parametrize("scenario", ("single", "multi"))
@pytest.mark.parametrize("prepend", (None, "search_document: "))
def test_embed_multi_files(multi_files, scenario, prepend):
    db_path, files = multi_files
    for filename, content in (
        ("file1.txt", b"hello world"),
        ("file2.txt", b"goodbye world"),
        ("nested/one.txt", b"one"),
        ("nested/two.txt", b"two"),
        ("nested/more/three.txt", b"three"),
        # This tests the fallback to latin-1 encoding:
        ("nested/more.txt/ignored.ini", b"Has weird \x96 character"),
    ):
        path = pathlib.Path(files / filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_bytes(content)

    extra_args = []

    if prepend:
        extra_args.extend(("--prepend", prepend))
    if scenario == "single":
        extra_args.extend(["--files", str(files), "**/*.txt"])
    else:
        extra_args.extend(
            [
                "--files",
                str(files / "nested" / "more"),
                "**/*.ini",
                "--files",
                str(files / "nested"),
                "*.txt",
            ]
        )

    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "embed-multi",
            "files",
            "-d",
            db_path,
            "-m",
            "embed-demo",
            "--store",
        ]
        + extra_args,
    )
    assert result.exit_code == 0
    embeddings_db = sqlite_utils.Database(db_path)
    rows = list(embeddings_db.query("select id, content from embeddings order by id"))
    if scenario == "single":
        assert rows == [
            {"id": "file1.txt", "content": (prepend or "") + "hello world"},
            {"id": "file2.txt", "content": (prepend or "") + "goodbye world"},
            {"id": "nested/more/three.txt", "content": (prepend or "") + "three"},
            {"id": "nested/one.txt", "content": (prepend or "") + "one"},
            {"id": "nested/two.txt", "content": (prepend or "") + "two"},
        ]
    else:
        assert rows == [
            {
                "id": "ignored.ini",
                "content": (prepend or "") + "Has weird \x96 character",
            },
            {"id": "one.txt", "content": (prepend or "") + "one"},
            {"id": "two.txt", "content": (prepend or "") + "two"},
        ]


@pytest.mark.parametrize(
    "args,expected_error",
    ((["not-a-dir", "*.txt"], "Invalid directory: not-a-dir"),),
)
def test_embed_multi_files_errors(multi_files, args, expected_error):
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["embed-multi", "files", "-m", "embed-demo", "--files"] + args,
    )
    assert result.exit_code == 2
    assert expected_error in result.output


@pytest.mark.parametrize(
    "extra_args,expected_error",
    (
        # With no args default utf-8 with latin-1 fallback should work
        ([], None),
        (["--encoding", "utf-8"], "Could not decode text in file"),
        (["--encoding", "latin-1"], None),
        (["--encoding", "latin-1", "--encoding", "utf-8"], None),
        (["--encoding", "utf-8", "--encoding", "latin-1"], None),
    ),
)
def test_embed_multi_files_encoding(multi_files, extra_args, expected_error):
    db_path, files = multi_files
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "embed-multi",
            "files",
            "-d",
            db_path,
            "-m",
            "embed-demo",
            "--files",
            str(files / "nested" / "more"),
            "*.ini",
            "--store",
        ]
        + extra_args,
    )
    if expected_error:
        # Should still succeed with 0, but show a warning
        assert result.exit_code == 0
        assert expected_error in result.stderr
    else:
        assert result.exit_code == 0
        assert not result.stderr
        embeddings_db = sqlite_utils.Database(db_path)
        rows = list(
            embeddings_db.query("select id, content from embeddings order by id")
        )
        assert rows == [
            {"id": "ignored.ini", "content": "Has weird \x96 character"},
        ]


def test_default_embedding_model():
    runner = CliRunner()
    result = runner.invoke(cli, ["embed-models", "default"])
    assert result.exit_code == 0
    assert result.output == "<No default embedding model set>\n"
    result2 = runner.invoke(cli, ["embed-models", "default", "ada-002"])
    assert result2.exit_code == 0
    result3 = runner.invoke(cli, ["embed-models", "default"])
    assert result3.exit_code == 0
    assert result3.output == "text-embedding-ada-002\n"
    result4 = runner.invoke(cli, ["embed-models", "default", "--remove-default"])
    assert result4.exit_code == 0
    result5 = runner.invoke(cli, ["embed-models", "default"])
    assert result5.exit_code == 0
    assert result5.output == "<No default embedding model set>\n"
    # Now set the default and actually use it
    result6 = runner.invoke(cli, ["embed-models", "default", "embed-demo"])
    assert result6.exit_code == 0
    result7 = runner.invoke(cli, ["embed", "-c", "hello world"])
    assert result7.exit_code == 0
    assert result7.output == "[5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n"


@pytest.mark.parametrize(
    "args,expected_model_id",
    (
        (["-q", "text-embedding-3-large"], "text-embedding-3-large"),
        (["-q", "text", "-q", "3"], "text-embedding-3-large"),
    ),
)
def test_llm_embed_models_query(user_path, args, expected_model_id):
    runner = CliRunner()
    result = runner.invoke(cli, ["embed-models"] + args, catch_exceptions=False)
    assert result.exit_code == 0
    assert expected_model_id in result.output


@pytest.mark.parametrize("default_is_set", (False, True))
@pytest.mark.parametrize("command", ("embed", "embed-multi"))
def test_default_embed_model_errors(user_path, default_is_set, command):
    runner = CliRunner()
    if default_is_set:
        (user_path / "default_embedding_model.txt").write_text(
            "embed-demo", encoding="utf8"
        )
    args = []
    input = None
    if command == "embed-multi":
        args = ["embed-multi", "example", "-"]
        input = "id,name\n1,hello"
    else:
        args = ["embed", "example", "1", "-c", "hello world"]
    result = runner.invoke(cli, args, input=input, catch_exceptions=False)
    if default_is_set:
        assert result.exit_code == 0
    else:
        assert result.exit_code == 1
        assert (
            "You need to specify an embedding model (no default model is set)"
            in result.output
        )
        # Now set the default model and try again
        result2 = runner.invoke(cli, ["embed-models", "default", "embed-demo"])
        assert result2.exit_code == 0
        result3 = runner.invoke(cli, args, input=input, catch_exceptions=False)
        assert result3.exit_code == 0
    # At the end of this, there should be 2 embeddings
    db = sqlite_utils.Database(str(user_path / "embeddings.db"))
    assert db["embeddings"].count == 1


def test_duplicate_content_embedded_only_once(embed_demo):
    # content_hash should avoid embedding the same content twice
    # per collection
    db = sqlite_utils.Database(memory=True)
    assert len(embed_demo.embedded_content) == 0
    collection = Collection("test", db, model_id="embed-demo")
    collection.embed("1", "hello world")
    assert len(embed_demo.embedded_content) == 1
    collection.embed("2", "goodbye world")
    assert db["embeddings"].count == 2
    assert len(embed_demo.embedded_content) == 2
    collection.embed("1", "hello world")
    assert db["embeddings"].count == 2
    assert len(embed_demo.embedded_content) == 2
    # The same string in another collection should be embedded
    c2 = Collection("test2", db, model_id="embed-demo")
    c2.embed("1", "hello world")
    assert db["embeddings"].count == 3
    assert len(embed_demo.embedded_content) == 3

    # Same again for embed_multi
    collection.embed_multi(
        (("1", "hello world"), ("2", "goodbye world"), ("3", "this is new"))
    )
    # Should have only embedded one more thing
    assert db["embeddings"].count == 4
    assert len(embed_demo.embedded_content) == 4


================================================
FILE: tests/test_encode_decode.py
================================================
import llm
import pytest
import numpy as np


@pytest.mark.parametrize(
    "array",
    (
        (0.0, 1.0, 1.5),
        (3423.0, 222.0, -1234.5),
    ),
)
def test_roundtrip(array):
    encoded = llm.encode(array)
    decoded = llm.decode(encoded)
    assert decoded == array
    # Try with numpy as well
    numpy_decoded = np.frombuffer(encoded, "<f4")
    assert tuple(numpy_decoded.tolist()) == array


================================================
FILE: tests/test_fragments_cli.py
================================================
from click.testing import CliRunner
from importlib.metadata import version
from llm.cli import cli
from unittest import mock
import os
import yaml
import sqlite_utils
import textwrap


def test_fragments_set_show_remove(user_path):
    runner = CliRunner()
    with runner.isolated_filesystem():
        with open("fragment1.txt", "w") as f:
            f.write("Hello fragment 1")

        # llm fragments --aliases should return nothing
        assert runner.invoke(cli, ["fragments", "list", "--aliases"]).output == ""
        assert (
            runner.invoke(cli, ["fragments", "set", "f1", "fragment1.txt"]).exit_code
            == 0
        )
        result1 = runner.invoke(cli, ["fragments", "show", "f1"])
        assert result1.exit_code == 0
        assert result1.output == "Hello fragment 1\n"

        # Should be in the list now
        def get_list():
            result2 = runner.invoke(cli, ["fragments", "list"])
            assert result2.exit_code == 0
            return yaml.safe_load(result2.output)

        # And in llm fragments --aliases
        assert "f1" in runner.invoke(cli, ["fragments", "list", "--aliases"]).output

        loaded1 = get_list()
        assert set(loaded1[0].keys()) == {
            "aliases",
            "content",
            "datetime_utc",
            "source",
            "hash",
        }
        assert loaded1[0]["content"] == "Hello fragment 1"
        assert loaded1[0]["aliases"] == ["f1"]

        # Show should work against both alias and hash
        for key in ("f1", loaded1[0]["hash"]):
            result3 = runner.invoke(cli, ["fragments", "show", key])
            assert result3.exit_code == 0
            assert result3.output == "Hello fragment 1\n"

        # But not for an invalid alias
        result4 = runner.invoke(cli, ["fragments", "show", "badalias"])
        assert result4.exit_code == 1
        assert "Fragment 'badalias' not found" in result4.output

        # Remove that alias
        result5 = runner.invoke(cli, ["fragments", "remove", "f1"])
        assert result5.exit_code == 0
        # Should still be in list but no alias
        loaded2 = get_list()
        assert loaded2[0]["aliases"] == []
        assert loaded2[0]["content"] == "Hello fragment 1"

        # And --aliases list should be empty
        assert runner.invoke(cli, ["fragments", "list", "--aliases"]).output == ""


def test_fragments_list(user_path):
    runner = CliRunner()
    with runner.isolated_filesystem():
        # This is just to create the database schema
        with open("fragment1.txt", "w") as f:
            f.write("1")
        assert (
            runner.invoke(cli, ["fragments", "set", "f1", "fragment1.txt"]).exit_code
            == 0
        )
        # Now add the rest directly to the database
        db = sqlite_utils.Database(str(user_path / "logs.db"))
        db["fragments"].delete_where()
        db["fragments"].insert(
            {
                "content": "1",
                "datetime_utc": "2023-10-01T00:00:00Z",
                "source": "file1.txt",
                "hash": "hash1",
            },
        )
        db["fragments"].insert(
            {
                "content": "2",
                "datetime_utc": "2022-10-01T00:00:00Z",
                "source": "file2.txt",
                "hash": "hash2",
            },
        )
        db["fragments"].insert(
            {
                "content": "3",
                "datetime_utc": "2024-10-01T00:00:00Z",
                "source": "file3.txt",
                "hash": "hash3",
            },
        )
        result = runner.invoke(cli, ["fragments", "list"])
        assert result.exit_code == 0
        assert result.output.strip() == (textwrap.dedent("""
                - hash: hash2
                  aliases: []
                  datetime_utc: '2022-10-01T00:00:00Z'
                  source: file2.txt
                  content: '2'
                - hash: hash1
                  aliases:
                  - f1
                  datetime_utc: '2023-10-01T00:00:00Z'
                  source: file1.txt
                  content: '1'
                - hash: hash3
                  aliases: []
                  datetime_utc: '2024-10-01T00:00:00Z'
                  source: file3.txt
                  content: '3'
                """).strip())


@mock.patch.dict(os.environ, {"OPENAI_API_KEY": "X"})
def test_fragment_url_user_agent(mocked_openai_chat, user_path):
    mocked_openai_chat.add_response(
        url="https://example.com/fragment.txt",
        text="Hello from URL",
    )
    runner = CliRunner()
    result = runner.invoke(cli, ["prompt", "-f", "https://example.com/fragment.txt"])
    assert result.exit_code == 0

    # Verify the User-Agent header was sent for the fragment URL request
    requests = mocked_openai_chat.get_requests()
    fragment_request = [r for r in requests if "example.com" in str(r.url)][0]
    llm_version = version("llm")
    expected_user_agent = f"llm/{llm_version} (https://llm.datasette.io/)"
    assert fragment_request.headers["User-Agent"] == expected_user_agent


================================================
FILE: tests/test_keys.py
================================================
from click.testing import CliRunner
import json
from llm.cli import cli
import pathlib
import pytest
import sys


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
@pytest.mark.parametrize("env", ({}, {"LLM_USER_PATH": "/tmp/llm-keys-test"}))
def test_keys_in_user_path(monkeypatch, env, user_path):
    for key, value in env.items():
        monkeypatch.setenv(key, value)
    runner = CliRunner()
    result = runner.invoke(cli, ["keys", "path"])
    assert result.exit_code == 0
    if env:
        expected = env["LLM_USER_PATH"] + "/keys.json"
    else:
        expected = user_path + "/keys.json"
    assert result.output.strip() == expected


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_keys_set(monkeypatch, tmpdir):
    user_path = tmpdir / "user/keys"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    keys_path = user_path / "keys.json"
    assert not keys_path.exists()
    runner = CliRunner()
    result = runner.invoke(cli, ["keys", "set", "openai"], input="foo")
    assert result.exit_code == 0
    assert keys_path.exists()
    # Should be chmod 600
    assert oct(keys_path.stat().mode)[-3:] == "600"
    content = keys_path.read_text("utf-8")
    assert json.loads(content) == {
        "// Note": "This file stores secret API credentials. Do not share!",
        "openai": "foo",
    }


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
def test_keys_get(monkeypatch, tmpdir):
    user_path = tmpdir / "user/keys"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    runner = CliRunner()
    result = runner.invoke(cli, ["keys", "set", "openai"], input="fx")
    assert result.exit_code == 0
    result2 = runner.invoke(cli, ["keys", "get", "openai"])
    assert result2.exit_code == 0
    assert result2.output.strip() == "fx"


@pytest.mark.parametrize("args", (["keys", "list"], ["keys"]))
def test_keys_list(monkeypatch, tmpdir, args):
    user_path = str(tmpdir / "user/keys")
    monkeypatch.setenv("LLM_USER_PATH", user_path)
    runner = CliRunner()
    result = runner.invoke(cli, ["keys", "set", "openai"], input="foo")
    assert result.exit_code == 0
    result2 = runner.invoke(cli, args)
    assert result2.exit_code == 0
    assert result2.output.strip() == "openai"


@pytest.mark.httpx_mock(
    assert_all_requests_were_expected=False, can_send_already_matched_responses=True
)
def test_uses_correct_key(mocked_openai_chat, monkeypatch, tmpdir):
    user_dir = tmpdir / "user-dir"
    pathlib.Path(user_dir).mkdir()
    keys_path = user_dir / "keys.json"
    KEYS = {
        "openai": "from-keys-file",
        "other": "other-key",
    }
    keys_path.write_text(json.dumps(KEYS), "utf-8")
    monkeypatch.setenv("LLM_USER_PATH", str(user_dir))
    monkeypatch.setenv("OPENAI_API_KEY", "from-env")

    def assert_key(key):
        request = mocked_openai_chat.get_requests()[-1]
        assert request.headers["Authorization"] == "Bearer {}".format(key)

    runner = CliRunner()

    # Called without --key uses stored key
    result = runner.invoke(cli, ["hello", "--no-stream"], catch_exceptions=False)
    assert result.exit_code == 0
    assert_key("from-keys-file")

    # Called without --key and without keys.json uses environment variable
    keys_path.write_text("{}", "utf-8")
    result2 = runner.invoke(cli, ["hello", "--no-stream"], catch_exceptions=False)
    assert result2.exit_code == 0
    assert_key("from-env")
    keys_path.write_text(json.dumps(KEYS), "utf-8")

    # Called with --key name-in-keys.json uses that value
    result3 = runner.invoke(
        cli, ["hello", "--key", "other", "--no-stream"], catch_exceptions=False
    )
    assert result3.exit_code == 0
    assert_key("other-key")

    # Called with --key something-else uses exactly that
    result4 = runner.invoke(
        cli, ["hello", "--key", "custom-key", "--no-stream"], catch_exceptions=False
    )
    assert result4.exit_code == 0
    assert_key("custom-key")


================================================
FILE: tests/test_llm.py
================================================
from click.testing import CliRunner
import llm
from llm.cli import cli
from llm.models import Usage
import json
import os
import pathlib
from pydantic import BaseModel
import pytest
import sqlite_utils
from unittest import mock


def test_version():
    runner = CliRunner()
    with runner.isolated_filesystem():
        result = runner.invoke(cli, ["--version"])
        assert result.exit_code == 0
        assert result.output.startswith("cli, version ")


@pytest.mark.parametrize("custom_database_path", (False, True))
def test_llm_prompt_creates_log_database(
    mocked_openai_chat, tmpdir, monkeypatch, custom_database_path
):
    user_path = tmpdir / "user"
    custom_db_path = tmpdir / "custom_log.db"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    runner = CliRunner()
    args = ["three names \nfor a pet pelican", "--no-stream", "--key", "x"]
    if custom_database_path:
        args.extend(["--database", str(custom_db_path)])
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == "Bob, Alice, Eve\n"
    # Should have created user_path and put a logs.db in it
    if custom_database_path:
        assert custom_db_path.exists()
        db_path = str(custom_db_path)
    else:
        assert (user_path / "logs.db").exists()
        db_path = str(user_path / "logs.db")
    assert sqlite_utils.Database(db_path)["responses"].count == 1


@mock.patch.dict(os.environ, {"OPENAI_API_KEY": "X"})
@pytest.mark.parametrize("use_stdin", (True, False, "split"))
@pytest.mark.parametrize(
    "logs_off,logs_args,should_log",
    (
        (True, [], False),
        (False, [], True),
        (False, ["--no-log"], False),
        (False, ["--log"], True),
        (True, ["-n"], False),  # Short for --no-log
        (True, ["--log"], True),
    ),
)
def test_llm_default_prompt(
    mocked_openai_chat, use_stdin, user_path, logs_off, logs_args, should_log
):
    # Reset the log_path database
    log_path = user_path / "logs.db"
    log_db = sqlite_utils.Database(str(log_path))
    log_db["responses"].delete_where()

    logs_off_path = user_path / "logs-off"
    if logs_off:
        # Turn off logging
        assert not logs_off_path.exists()
        CliRunner().invoke(cli, ["logs", "off"])
        assert logs_off_path.exists()
    else:
        # Turn on logging
        CliRunner().invoke(cli, ["logs", "on"])
        assert not logs_off_path.exists()

    # Run the prompt
    runner = CliRunner()
    prompt = "three names \nfor a pet pelican"
    input = None
    args = ["--no-stream"]
    if use_stdin == "split":
        input = "three names"
        args.append("\nfor a pet pelican")
    elif use_stdin:
        input = prompt
    else:
        args.append(prompt)
    args += logs_args
    result = runner.invoke(cli, args, input=input, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == "Bob, Alice, Eve\n"
    last_request = mocked_openai_chat.get_requests()[-1]
    assert last_request.headers["Authorization"] == "Bearer X"

    # Was it logged?
    rows = list(log_db["responses"].rows)

    if not should_log:
        assert len(rows) == 0
        return

    assert len(rows) == 1
    expected = {
        "model": "gpt-4o-mini",
        "prompt": "three names \nfor a pet pelican",
        "system": None,
        "options_json": "{}",
        "response": "Bob, Alice, Eve",
    }
    row = rows[0]
    assert expected.items() <= row.items()
    assert isinstance(row["duration_ms"], int)
    assert isinstance(row["datetime_utc"], str)
    assert json.loads(row["prompt_json"]) == {
        "messages": [{"role": "user", "content": "three names \nfor a pet pelican"}]
    }
    assert json.loads(row["response_json"]) == {
        "choices": [{"message": {"content": {"$": f"r:{row['id']}"}}}],
        "model": "gpt-4o-mini",
    }

    # Test "llm logs"
    log_result = runner.invoke(
        cli, ["logs", "-n", "1", "--json"], catch_exceptions=False
    )
    log_json = json.loads(log_result.output)

    # Should have logged correctly:
    assert (
        log_json[0].items()
        >= {
            "model": "gpt-4o-mini",
            "prompt": "three names \nfor a pet pelican",
            "system": None,
            "prompt_json": {
                "messages": [
                    {"role": "user", "content": "three names \nfor a pet pelican"}
                ]
            },
            "options_json": {},
            "response": "Bob, Alice, Eve",
            "response_json": {
                "model": "gpt-4o-mini",
                "choices": [{"message": {"content": {"$": f"r:{row['id']}"}}}],
            },
            # This doesn't have the \n after three names:
            "conversation_name": "three names for a pet pelican",
            "conversation_model": "gpt-4o-mini",
        }.items()
    )


@mock.patch.dict(os.environ, {"OPENAI_API_KEY": "X"})
@pytest.mark.parametrize("async_", (False, True))
def test_llm_prompt_continue(httpx_mock, user_path, async_):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
        },
        headers={"Content-Type": "application/json"},
    )
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [{"message": {"content": "Terry"}}],
        },
        headers={"Content-Type": "application/json"},
    )

    log_path = user_path / "logs.db"
    log_db = sqlite_utils.Database(str(log_path))
    log_db["responses"].delete_where()

    # First prompt
    runner = CliRunner()
    args = ["three names \nfor a pet pelican", "--no-stream"] + (
        ["--async"] if async_ else []
    )
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.output
    assert result.output == "Bob, Alice, Eve\n"

    # Should be logged
    rows = list(log_db["responses"].rows)
    assert len(rows) == 1

    # Now ask a follow-up
    args2 = ["one more", "-c", "--no-stream"] + (["--async"] if async_ else [])
    result2 = runner.invoke(cli, args2, catch_exceptions=False)
    assert result2.exit_code == 0, result2.output
    assert result2.output == "Terry\n"

    rows = list(log_db["responses"].rows)
    assert len(rows) == 2


@pytest.mark.parametrize(
    "args,expect_just_code",
    (
        (["-x"], True),
        (["--extract"], True),
        (["-x", "--async"], True),
        (["--extract", "--async"], True),
        # Use --no-stream here to ensure it passes test same as -x/--extract cases
        (["--no-stream"], False),
    ),
)
def test_extract_fenced_code(
    mocked_openai_chat_returning_fenced_code, args, expect_just_code
):
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["-m", "gpt-4o-mini", "--key", "x", "Write code"] + args,
        catch_exceptions=False,
    )
    output = result.output
    if expect_just_code:
        assert "```" not in output
    else:
        assert "```" in output


def test_openai_chat_stream(mocked_openai_chat_stream, user_path):
    runner = CliRunner()
    result = runner.invoke(cli, ["-m", "gpt-3.5-turbo", "--key", "x", "Say hi"])
    assert result.exit_code == 0
    assert result.output == "Hi.\n"


def test_openai_completion(mocked_openai_completion, user_path):
    log_path = user_path / "logs.db"
    log_db = sqlite_utils.Database(str(log_path))
    log_db["responses"].delete_where()
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "-m",
            "gpt-3.5-turbo-instruct",
            "Say this is a test",
            "--no-stream",
            "--key",
            "x",
        ],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == "\n\nThis is indeed a test\n"

    # Should have requested 256 tokens
    last_request = mocked_openai_completion.get_requests()[-1]
    assert json.loads(last_request.content) == {
        "model": "gpt-3.5-turbo-instruct",
        "prompt": "Say this is a test",
        "stream": False,
        "max_tokens": 256,
    }

    # Check it was logged
    rows = list(log_db["responses"].rows)
    assert len(rows) == 1
    expected = {
        "model": "gpt-3.5-turbo-instruct",
        "prompt": "Say this is a test",
        "system": None,
        "prompt_json": '{"messages": ["Say this is a test"]}',
        "options_json": "{}",
        "response": "\n\nThis is indeed a test",
    }
    row = rows[0]
    assert expected.items() <= row.items()


def test_openai_completion_system_prompt_error():
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "-m",
            "gpt-3.5-turbo-instruct",
            "Say this is a test",
            "--no-stream",
            "--key",
            "x",
            "--system",
            "system prompts not allowed",
        ],
    )
    assert result.exit_code == 1
    assert (
        "System prompts are not supported for OpenAI completion models" in result.output
    )


def test_openai_completion_logprobs_stream(
    mocked_openai_completion_logprobs_stream, user_path
):
    log_path = user_path / "logs.db"
    log_db = sqlite_utils.Database(str(log_path))
    log_db["responses"].delete_where()
    runner = CliRunner()
    args = [
        "-m",
        "gpt-3.5-turbo-instruct",
        "Say hi",
        "-o",
        "logprobs",
        "2",
        "--key",
        "x",
    ]
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == "\n\nHi.\n"
    rows = list(log_db["responses"].rows)
    assert len(rows) == 1
    row = rows[0]
    assert json.loads(row["response_json"]) == {
        "content": {"$": f'r:{row["id"]}'},
        "logprobs": [
            {"text": "\n\n", "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}]},
            {"text": "Hi", "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}]},
            {"text": ".", "top_logprobs": [{".": -1.1, "!": -0.9}]},
            {"text": "", "top_logprobs": []},
        ],
        "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
        "object": "text_completion",
        "model": "gpt-3.5-turbo-instruct",
        "created": 1695097702,
    }


def test_openai_completion_logprobs_nostream(
    mocked_openai_completion_logprobs, user_path
):
    log_path = user_path / "logs.db"
    log_db = sqlite_utils.Database(str(log_path))
    log_db["responses"].delete_where()
    runner = CliRunner()
    args = [
        "-m",
        "gpt-3.5-turbo-instruct",
        "Say hi",
        "-o",
        "logprobs",
        "2",
        "--key",
        "x",
        "--no-stream",
    ]
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == "\n\nHi.\n"
    rows = list(log_db["responses"].rows)
    assert len(rows) == 1
    row = rows[0]
    assert json.loads(row["response_json"]) == {
        "choices": [
            {
                "finish_reason": "stop",
                "index": 0,
                "logprobs": {
                    "text_offset": [16, 18, 20],
                    "token_logprobs": [-0.6, -1.1, -0.9],
                    "tokens": ["\n\n", "Hi", "1"],
                    "top_logprobs": [
                        {"\n": -1.9, "\n\n": -0.6},
                        {"Hello": -0.7, "Hi": -1.1},
                        {"!": -1.1, ".": -0.9},
                    ],
                },
                "text": {"$": f"r:{row['id']}"},
            }
        ],
        "created": 1695097747,
        "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
        "model": "gpt-3.5-turbo-instruct",
        "object": "text_completion",
        "usage": {"completion_tokens": 3, "prompt_tokens": 5, "total_tokens": 8},
    }


EXTRA_MODELS_YAML = """
- model_id: orca
  model_name: orca-mini-3b
  api_base: "http://localai.localhost"
- model_id: completion-babbage
  model_name: babbage
  api_base: "http://localai.localhost"
  completion: 1
"""


def test_openai_localai_configuration(mocked_localai, user_path):
    log_path = user_path / "logs.db"
    sqlite_utils.Database(str(log_path))
    # Write the configuration file
    config_path = user_path / "extra-openai-models.yaml"
    config_path.write_text(EXTRA_MODELS_YAML, "utf-8")
    # Run the prompt
    runner = CliRunner()
    prompt = "three names \nfor a pet pelican"
    result = runner.invoke(cli, ["--no-stream", "--model", "orca", prompt])
    assert result.exit_code == 0
    assert result.output == "Bob, Alice, Eve\n"
    last_request = mocked_localai.get_requests()[-1]
    assert json.loads(last_request.content) == {
        "model": "orca-mini-3b",
        "messages": [{"role": "user", "content": "three names \nfor a pet pelican"}],
        "stream": False,
    }
    # And check the completion model too
    result2 = runner.invoke(cli, ["--no-stream", "--model", "completion-babbage", "hi"])
    assert result2.exit_code == 0
    assert result2.output == "Hello\n"
    last_request2 = mocked_localai.get_requests()[-1]
    assert json.loads(last_request2.content) == {
        "model": "babbage",
        "prompt": "hi",
        "stream": False,
    }


@pytest.mark.parametrize(
    "args,exit_code",
    (
        (["-q", "mo", "-q", "ck"], 0),
        (["-q", "mock"], 0),
        (["-q", "badmodel"], 1),
        (["-q", "mock", "-q", "badmodel"], 1),
    ),
)
def test_prompt_select_model_with_queries(mock_model, user_path, args, exit_code):
    runner = CliRunner()
    result = runner.invoke(
        cli,
        args + ["hello"],
        catch_exceptions=False,
    )
    assert result.exit_code == exit_code


EXPECTED_OPTIONS = """
OpenAI Chat: gpt-4o (aliases: 4o)
  Options:
    temperature: float
      What sampling temperature to use, between 0 and 2. Higher values like
      0.8 will make the output more random, while lower values like 0.2 will
      make it more focused and deterministic.
    max_tokens: int
      Maximum number of tokens to generate.
    top_p: float
      An alternative to sampling with temperature, called nucleus sampling,
      where the model considers the results of the tokens with top_p
      probability mass. So 0.1 means only the tokens comprising the top 10%
      probability mass are considered. Recommended to use top_p or
      temperature but not both.
    frequency_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on their existing frequency in the text so far, decreasing the model's
      likelihood to repeat the same line verbatim.
    presence_penalty: float
      Number between -2.0 and 2.0. Positive values penalize new tokens based
      on whether they appear in the text so far, increasing the model's
      likelihood to talk about new topics.
    stop: str
      A string where the API will stop generating further tokens.
    logit_bias: dict, str
      Modify the likelihood of specified tokens appearing in the completion.
      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
    seed: int
      Integer seed to attempt to sample deterministically
    json_object: boolean
      Output a valid JSON object {...}. Prompt must mention JSON.
  Attachment types:
    application/pdf, image/gif, image/jpeg, image/png, image/webp
  Keys:
    key: openai
    env_var: OPENAI_API_KEY
"""


def test_llm_models_options(user_path):
    runner = CliRunner()
    result = runner.invoke(cli, ["models", "--options"], catch_exceptions=False)
    assert result.exit_code == 0
    # Check for key components instead of exact string match
    assert "OpenAI Chat: gpt-4o (aliases: 4o)" in result.output
    assert "  Options:" in result.output
    assert "    temperature: float" in result.output
    assert "  Keys:" in result.output
    assert "    key: openai" in result.output
    assert "    env_var: OPENAI_API_KEY" in result.output
    assert "AsyncMockModel (async): mock" not in result.output


def test_llm_models_async(user_path):
    runner = CliRunner()
    result = runner.invoke(cli, ["models", "--async"], catch_exceptions=False)
    assert result.exit_code == 0
    assert "AsyncMockModel (async): mock" in result.output


@pytest.mark.parametrize(
    "args,expected_model_ids,unexpected_model_ids",
    (
        (["-q", "gpt-4o"], ["OpenAI Chat: gpt-4o"], None),
        (["-q", "mock"], ["MockModel: mock"], None),
        (["--query", "mock"], ["MockModel: mock"], None),
        (
            ["-q", "4o", "-q", "mini"],
            ["OpenAI Chat: gpt-4o-mini"],
            ["OpenAI Chat: gpt-4o "],
        ),
        (
            ["-m", "gpt-4o-mini", "-m", "gpt-4.5"],
            ["OpenAI Chat: gpt-4o-mini", "OpenAI Chat: gpt-4.5"],
            ["OpenAI Chat: gpt-4o "],
        ),
    ),
)
def test_llm_models_filter(user_path, args, expected_model_ids, unexpected_model_ids):
    runner = CliRunner()
    result = runner.invoke(cli, ["models"] + args, catch_exceptions=False)
    assert result.exit_code == 0
    if expected_model_ids:
        for expected_model_id in expected_model_ids:
            assert expected_model_id in result.output
    if unexpected_model_ids:
        for unexpected_model_id in unexpected_model_ids:
            assert unexpected_model_id not in result.output


def test_llm_user_dir(tmpdir, monkeypatch):
    user_dir = str(tmpdir / "u")
    monkeypatch.setenv("LLM_USER_PATH", user_dir)
    assert not os.path.exists(user_dir)
    user_dir2 = llm.user_dir()
    assert user_dir == str(user_dir2)
    assert os.path.exists(user_dir)


def test_model_defaults(tmpdir, monkeypatch):
    user_dir = str(tmpdir / "u")
    monkeypatch.setenv("LLM_USER_PATH", user_dir)
    config_path = pathlib.Path(user_dir) / "default_model.txt"
    assert not config_path.exists()
    assert llm.get_default_model() == "gpt-4o-mini"
    assert llm.get_model().model_id == "gpt-4o-mini"
    llm.set_default_model("gpt-4o")
    assert config_path.exists()
    assert llm.get_default_model() == "gpt-4o"
    assert llm.get_model().model_id == "gpt-4o"


def test_get_models():
    models = llm.get_models()
    assert all(isinstance(model, (llm.Model, llm.KeyModel)) for model in models)
    model_ids = [model.model_id for model in models]
    assert "gpt-4o-mini" in model_ids
    assert "gpt-5.4-mini" in model_ids
    assert "gpt-5.4-nano" in model_ids
    # Ensure no model_ids are duplicated
    # https://github.com/simonw/llm/issues/667
    assert len(model_ids) == len(set(model_ids))


def test_get_async_models():
    models = llm.get_async_models()
    assert all(
        isinstance(model, (llm.AsyncModel, llm.AsyncKeyModel)) for model in models
    )
    model_ids = [model.model_id for model in models]
    assert "gpt-4o-mini" in model_ids
    assert "gpt-5.4-mini" in model_ids
    assert "gpt-5.4-nano" in model_ids


def test_mock_model(mock_model):
    mock_model.enqueue(["hello world"])
    mock_model.enqueue(["second"])
    model = llm.get_model("mock")
    response = model.prompt(prompt="hello")
    assert response.text() == "hello world"
    assert str(response) == "hello world"
    assert model.history[0][0].prompt == "hello"
    assert response.usage() == Usage(input=1, output=1, details=None)
    response2 = model.prompt(prompt="hello again")
    assert response2.text() == "second"
    assert response2.usage() == Usage(input=2, output=1, details=None)


class Dog(BaseModel):
    name: str
    age: int


dog_schema = {
    "properties": {
        "name": {"title": "Name", "type": "string"},
        "age": {"title": "Age", "type": "integer"},
    },
    "required": ["name", "age"],
    "title": "Dog",
    "type": "object",
}
dog = {"name": "Cleo", "age": 10}


@pytest.mark.parametrize("use_pydantic", (False, True))
def test_schema(mock_model, use_pydantic):
    assert dog_schema == Dog.model_json_schema()
    mock_model.enqueue([json.dumps(dog)])
    response = mock_model.prompt(
        "invent a dog", schema=Dog if use_pydantic else dog_schema
    )
    assert json.loads(response.text()) == dog
    assert response.prompt.schema == dog_schema


def test_model_environment_variable(monkeypatch):
    monkeypatch.setenv("LLM_MODEL", "echo")
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["--no-stream", "hello", "-s", "sys"],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert json.loads(result.output) == {
        "prompt": "hello",
        "system": "sys",
        "attachments": [],
        "stream": False,
        "previous": [],
    }


@pytest.mark.parametrize("use_filename", (True, False))
def test_schema_via_cli(mock_model, tmpdir, monkeypatch, use_filename):
    user_path = tmpdir / "user"
    schema_path = tmpdir / "schema.json"
    mock_model.enqueue([json.dumps(dog)])
    schema_value = '{"schema": "one"}'
    with open(schema_path, "w") as f:
        f.write(schema_value)
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    if use_filename:
        schema_value = str(schema_path)
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["--schema", schema_value, "prompt", "-m", "mock"],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == '{"name": "Cleo", "age": 10}\n'
    # Should have created user_path and put a logs.db in it
    assert (user_path / "logs.db").exists()
    rows = list(sqlite_utils.Database(str(user_path / "logs.db"))["schemas"].rows)
    assert rows == [
        {"id": "9a8ed2c9b17203f6d8905147234475b5", "content": '{"schema":"one"}'}
    ]
    if use_filename:
        # Run it again to check that the ID option works now it's in the DB
        result2 = runner.invoke(
            cli,
            ["--schema", "9a8ed2c9b17203f6d8905147234475b5", "prompt", "-m", "mock"],
            catch_exceptions=False,
        )
        assert result2.exit_code == 0


@pytest.mark.parametrize(
    "args,expected",
    (
        (
            ["--schema", "name, age int"],
            {
                "type": "object",
                "properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
                "required": ["name", "age"],
            },
        ),
        (
            ["--schema-multi", "name, age int"],
            {
                "type": "object",
                "properties": {
                    "items": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {"type": "string"},
                                "age": {"type": "integer"},
                            },
                            "required": ["name", "age"],
                        },
                    }
                },
                "required": ["items"],
            },
        ),
    ),
)
def test_schema_using_dsl(mock_model, tmpdir, monkeypatch, args, expected):
    user_path = tmpdir / "user"
    mock_model.enqueue([json.dumps(dog)])
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["prompt", "-m", "mock"] + args,
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == '{"name": "Cleo", "age": 10}\n'
    rows = list(sqlite_utils.Database(str(user_path / "logs.db"))["schemas"].rows)
    assert json.loads(rows[0]["content"]) == expected


@pytest.mark.asyncio
@pytest.mark.parametrize("use_pydantic", (False, True))
async def test_schema_async(async_mock_model, use_pydantic):
    async_mock_model.enqueue([json.dumps(dog)])
    response = async_mock_model.prompt(
        "invent a dog", schema=Dog if use_pydantic else dog_schema
    )
    assert json.loads(await response.text()) == dog
    assert response.prompt.schema == dog_schema


def test_mock_key_model(mock_key_model):
    response = mock_key_model.prompt(prompt="hello", key="hi")
    assert response.text() == "key: hi"


@pytest.mark.asyncio
async def test_mock_async_key_model(mock_async_key_model):
    response = mock_async_key_model.prompt(prompt="hello", key="hi")
    output = await response.text()
    assert output == "async, key: hi"


def test_sync_on_done(mock_model):
    mock_model.enqueue(["hello world"])
    model = llm.get_model("mock")
    response = model.prompt(prompt="hello")
    caught = []

    def done(response):
        caught.append(response)

    response.on_done(done)
    assert len(caught) == 0
    str(response)
    assert len(caught) == 1


def test_schemas_dsl():
    runner = CliRunner()
    result = runner.invoke(cli, ["schemas", "dsl", "name, age int, bio: short bio"])
    assert result.exit_code == 0
    assert json.loads(result.output) == {
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "age": {"type": "integer"},
            "bio": {"type": "string", "description": "short bio"},
        },
        "required": ["name", "age", "bio"],
    }
    result2 = runner.invoke(cli, ["schemas", "dsl", "name, age int", "--multi"])
    assert result2.exit_code == 0
    assert json.loads(result2.output) == {
        "type": "object",
        "properties": {
            "items": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "name": {"type": "string"},
                        "age": {"type": "integer"},
                    },
                    "required": ["name", "age"],
                },
            }
        },
        "required": ["items"],
    }


@mock.patch.dict(os.environ, {"OPENAI_API_KEY": "X"})
@pytest.mark.parametrize("custom_database_path", (False, True))
def test_llm_prompt_continue_with_database(
    tmpdir, monkeypatch, httpx_mock, user_path, custom_database_path
):
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
        },
        headers={"Content-Type": "application/json"},
    )
    httpx_mock.add_response(
        method="POST",
        url="https://api.openai.com/v1/chat/completions",
        json={
            "model": "gpt-4o-mini",
            "usage": {},
            "choices": [{"message": {"content": "Terry"}}],
        },
        headers={"Content-Type": "application/json"},
    )

    user_path = tmpdir / "user"
    custom_db_path = tmpdir / "custom_log.db"
    monkeypatch.setenv("LLM_USER_PATH", str(user_path))

    # First prompt
    runner = CliRunner()
    args = ["three names \nfor a pet pelican", "--no-stream"]
    if custom_database_path:
        args.extend(["--database", str(custom_db_path)])
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0, result.output
    assert result.output == "Bob, Alice, Eve\n"

    # Now ask a follow-up
    args2 = ["one more", "-c", "--no-stream"]
    if custom_database_path:
        args2.extend(["--database", str(custom_db_path)])
    result2 = runner.invoke(cli, args2, catch_exceptions=False)
    assert result2.exit_code == 0, result2.output
    assert result2.output == "Terry\n"

    if custom_database_path:
        assert custom_db_path.exists()
        db_path = str(custom_db_path)
    else:
        assert (user_path / "logs.db").exists()
        db_path = str(user_path / "logs.db")
    assert sqlite_utils.Database(db_path)["responses"].count == 2


def test_default_exports():
    "Check key exports in the llm __all__ list"
    for name in ("Model", "AsyncModel", "get_model", "get_async_model", "schema_dsl"):
        assert name in llm.__all__, f"{name} not in llm.__all__"


================================================
FILE: tests/test_llm_logs.py
================================================
from click.testing import CliRunner
from llm.cli import cli
from llm.migrations import migrate
from llm.utils import monotonic_ulid
from llm import Fragment
import datetime
import json
import pathlib
import pytest
import re
import sqlite_utils
import sys
import textwrap
import time
from ulid import ULID
import yaml

SINGLE_ID = "5843577700ba729bb14c327b30441885"
MULTI_ID = "4860edd987df587d042a9eb2b299ce5c"


@pytest.fixture
def log_path(user_path):
    log_path = str(user_path / "logs.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)
    start = datetime.datetime.now(datetime.timezone.utc)
    db["responses"].insert_all(
        {
            "id": str(monotonic_ulid()).lower(),
            "system": "system",
            "prompt": "prompt",
            "response": 'response\n```python\nprint("hello word")\n```',
            "model": "davinci",
            "datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
            "conversation_id": "abc123",
            "input_tokens": 2,
            "output_tokens": 5,
        }
        for i in range(100)
    )
    return log_path


@pytest.fixture
def schema_log_path(user_path):
    log_path = str(user_path / "logs_schema.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)
    start = datetime.datetime.now(datetime.timezone.utc)
    db["schemas"].insert({"id": SINGLE_ID, "content": '{"name": "string"}'})
    db["schemas"].insert({"id": MULTI_ID, "content": '{"name": "array"}'})
    for i in range(2):
        db["responses"].insert(
            {
                "id": str(ULID.from_timestamp(time.time() + i)).lower(),
                "system": "system",
                "prompt": "prompt",
                "response": '{"name": "' + str(i) + '"}',
                "model": "davinci",
                "datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
                "conversation_id": "abc123",
                "input_tokens": 2,
                "output_tokens": 5,
                "schema_id": SINGLE_ID,
            }
        )
    for j in range(4):
        db["responses"].insert(
            {
                "id": str(ULID.from_timestamp(time.time() + j)).lower(),
                "system": "system",
                "prompt": "prompt",
                "response": '{"items": [{"name": "one"}, {"name": "two"}]}',
                "model": "davinci",
                "datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
                "conversation_id": "abc456",
                "input_tokens": 2,
                "output_tokens": 5,
                "schema_id": MULTI_ID,
            }
        )

    return log_path


datetime_re = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
id_re = re.compile(r"id: \w+")


@pytest.mark.parametrize("usage", (False, True))
def test_logs_text(log_path, usage):
    runner = CliRunner()
    args = ["logs", "-p", str(log_path)]
    if usage:
        args.append("-u")
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    output = result.output
    # Replace 2023-08-17T20:53:58 with YYYY-MM-DDTHH:MM:SS
    output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", output)
    # Replace id: whatever with id: xxx
    output = id_re.sub("id: xxx", output)
    expected = (
        (
            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx\n\n"
            "Model: **davinci**\n\n"
            "## Prompt\n\n"
            "prompt\n\n"
            "## System\n\n"
            "system\n\n"
            "## Response\n\n"
            'response\n```python\nprint("hello word")\n```\n\n'
        )
        + ("## Token usage\n\n2 input, 5 output\n\n" if usage else "")
        + (
            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx\n\n"
            "Model: **davinci**\n\n"
            "## Prompt\n\n"
            "prompt\n\n"
            "## Response\n\n"
            'response\n```python\nprint("hello word")\n```\n\n'
        )
        + ("## Token usage\n\n2 input, 5 output\n\n" if usage else "")
        + (
            "# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx\n\n"
            "Model: **davinci**\n\n"
            "## Prompt\n\n"
            "prompt\n\n"
            "## Response\n\n"
            'response\n```python\nprint("hello word")\n```\n\n'
        )
        + ("## Token usage\n\n2 input, 5 output\n\n" if usage else "")
    )
    assert output == expected


def test_logs_text_with_options(user_path):
    """Test that ## Options section appears when options_json is set"""
    log_path = str(user_path / "logs_with_options.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)
    start = datetime.datetime.now(datetime.timezone.utc)

    # Create response with options
    db["responses"].insert(
        {
            "id": str(monotonic_ulid()).lower(),
            "system": "system",
            "prompt": "prompt",
            "response": "response",
            "model": "davinci",
            "datetime_utc": start.isoformat(),
            "conversation_id": "abc123",
            "input_tokens": 2,
            "output_tokens": 5,
            "options_json": json.dumps(
                {"thinking_level": "high", "media_resolution": "low"}
            ),
        }
    )

    runner = CliRunner()
    result = runner.invoke(cli, ["logs", "-p", str(log_path)], catch_exceptions=False)
    assert result.exit_code == 0
    output = result.output

    # Verify ## Options section is present
    assert "## Options\n\n" in output
    assert "- thinking_level: high" in output
    assert "- media_resolution: low" in output


@pytest.mark.parametrize("n", (None, 0, 2))
def test_logs_json(n, log_path):
    "Test that logs command correctly returns requested -n records"
    runner = CliRunner()
    args = ["logs", "-p", str(log_path), "--json"]
    if n is not None:
        args.extend(["-n", str(n)])
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    logs = json.loads(result.output)
    expected_length = 3
    if n is not None:
        if n == 0:
            expected_length = 100
        else:
            expected_length = n
    assert len(logs) == expected_length


@pytest.mark.parametrize(
    "args", (["-r"], ["--response"], ["list", "-r"], ["list", "--response"])
)
def test_logs_response_only(args, log_path):
    "Test that logs -r/--response returns just the last response"
    runner = CliRunner()
    result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == 'response\n```python\nprint("hello word")\n```\n'


@pytest.mark.parametrize(
    "args",
    (
        ["-x"],
        ["--extract"],
        ["list", "-x"],
        ["list", "--extract"],
        # Using -xr together should have same effect as just -x
        ["-xr"],
        ["-x", "-r"],
        ["--extract", "--response"],
    ),
)
def test_logs_extract_first_code(args, log_path):
    "Test that logs -x/--extract returns the first code block"
    runner = CliRunner()
    result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == 'print("hello word")\n\n'


@pytest.mark.parametrize(
    "args",
    (
        ["--xl"],
        ["--extract-last"],
        ["list", "--xl"],
        ["list", "--extract-last"],
        ["--xl", "-r"],
        ["-x", "--xl"],
    ),
)
def test_logs_extract_last_code(args, log_path):
    "Test that logs --xl/--extract-last returns the last code block"
    runner = CliRunner()
    result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False)
    assert result.exit_code == 0
    assert result.output == 'print("hello word")\n\n'


@pytest.mark.parametrize("arg", ("-s", "--short"))
@pytest.mark.parametrize("usage", (None, "-u", "--usage"))
def test_logs_short(log_path, arg, usage):
    runner = CliRunner()
    args = ["logs", arg, "-p", str(log_path)]
    if usage:
        args.append(usage)
    result = runner.invoke(cli, args)
    assert result.exit_code == 0
    output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", result.output)
    expected_usage = ""
    if usage:
        expected_usage = "  usage:\n    input: 2\n    output: 5\n"
    expected = (
        "- model: davinci\n"
        "  datetime: 'YYYY-MM-DDTHH:MM:SS'\n"
        "  conversation: abc123\n"
        "  system: system\n"
        "  prompt: prompt\n"
        "  prompt_fragments: []\n"
        f"  system_fragments: []\n{expected_usage}"
        "- model: davinci\n"
        "  datetime: 'YYYY-MM-DDTHH:MM:SS'\n"
        "  conversation: abc123\n"
        "  system: system\n"
        "  prompt: prompt\n"
        "  prompt_fragments: []\n"
        f"  system_fragments: []\n{expected_usage}"
        "- model: davinci\n"
        "  datetime: 'YYYY-MM-DDTHH:MM:SS'\n"
        "  conversation: abc123\n"
        "  system: system\n"
        "  prompt: prompt\n"
        "  prompt_fragments: []\n"
        f"  system_fragments: []\n{expected_usage}"
    )
    assert output == expected


@pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows")
@pytest.mark.parametrize("env", ({}, {"LLM_USER_PATH": "/tmp/llm-user-path"}))
def test_logs_path(monkeypatch, env, user_path):
    for key, value in env.items():
        monkeypatch.setenv(key, value)
    runner = CliRunner()
    result = runner.invoke(cli, ["logs", "path"])
    assert result.exit_code == 0
    if env:
        expected = env["LLM_USER_PATH"] + "/logs.db"
    else:
        expected = str(user_path) + "/logs.db"
    assert result.output.strip() == expected


@pytest.mark.parametrize("model", ("davinci", "curie"))
@pytest.mark.parametrize("path_option", (None, "-p", "--path", "-d", "--database"))
def test_logs_filtered(user_path, model, path_option):
    log_path = str(user_path / "logs.db")
    if path_option:
        log_path = str(user_path / "logs_alternative.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)
    db["responses"].insert_all(
        {
            "id": str(monotonic_ulid()).lower(),
            "system": "system",
            "prompt": "prompt",
            "response": "response",
            "model": "davinci" if i % 2 == 0 else "curie",
        }
        for i in range(100)
    )
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["logs", "list", "-m", model, "--json"]
        + ([path_option, log_path] if path_option else []),
    )
    assert result.exit_code == 0
    records = json.loads(result.output.strip())
    assert all(record["model"] == model for record in records)


@pytest.mark.parametrize(
    "query,extra_args,expected",
    (
        # With no search term order should be by datetime
        ("", [], ["doc1", "doc2", "doc3"]),
        # With a search it's order by rank instead
        ("llama", [], ["doc1", "doc3"]),
        ("alpaca", [], ["doc2"]),
        # Model filter should work too
        ("llama", ["-m", "davinci"], ["doc1", "doc3"]),
        ("llama", ["-m", "davinci2"], []),
        # Adding -l/--latest should return latest first (order by id desc)
        ("llama", [], ["doc1", "doc3"]),
        ("llama", ["-l"], ["doc3", "doc1"]),
        ("llama", ["--latest"], ["doc3", "doc1"]),
    ),
)
def test_logs_search(user_path, query, extra_args, expected):
    log_path = str(user_path / "logs.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)

    def _insert(id, text):
        db["responses"].insert(
            {
                "id": id,
                "system": "system",
                "prompt": text,
                "response": "response",
                "model": "davinci",
            }
        )

    _insert("doc1", "llama")
    _insert("doc2", "alpaca")
    _insert("doc3", "llama llama")
    runner = CliRunner()
    result = runner.invoke(cli, ["logs", "list", "-q", query, "--json"] + extra_args)
    assert result.exit_code == 0
    records = json.loads(result.output.strip())
    assert [record["id"] for record in records] == expected


@pytest.mark.parametrize(
    "args,expected",
    (
        (["--data", "--schema", SINGLE_ID], '{"name": "1"}\n{"name": "0"}\n'),
        (
            ["--data", "--schema", MULTI_ID],
            (
                '{"items": [{"name": "one"}, {"name": "two"}]}\n'
                '{"items": [{"name": "one"}, {"name": "two"}]}\n'
                '{"items": [{"name": "one"}, {"name": "two"}]}\n'
                '{"items": [{"name": "one"}, {"name": "two"}]}\n'
            ),
        ),
        (
            ["--data-array", "--schema", MULTI_ID],
            (
                '[{"items": [{"name": "one"}, {"name": "two"}]},\n'
                ' {"items": [{"name": "one"}, {"name": "two"}]},\n'
                ' {"items": [{"name": "one"}, {"name": "two"}]},\n'
                ' {"items": [{"name": "one"}, {"name": "two"}]}]\n'
            ),
        ),
        (
            ["--schema", MULTI_ID, "--data-key", "items"],
            (
                '{"name": "one"}\n'
                '{"name": "two"}\n'
                '{"name": "one"}\n'
                '{"name": "two"}\n'
                '{"name": "one"}\n'
                '{"name": "two"}\n'
                '{"name": "one"}\n'
                '{"name": "two"}\n'
            ),
        ),
    ),
)
def test_logs_schema(schema_log_path, args, expected):
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["logs", "-n", "0", "-p", str(schema_log_path)] + args,
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert result.output == expected


def test_logs_schema_data_ids(schema_log_path):
    db = sqlite_utils.Database(schema_log_path)
    ulid = ULID.from_timestamp(time.time() + 100)
    db["responses"].insert(
        {
            "id": str(ulid).lower(),
            "system": "system",
            "prompt": "prompt",
            "response": json.dumps(
                {
                    "name": "three",
                    "response_id": 1,
                    "conversation_id": 2,
                    "conversation_id_": 3,
                }
            ),
            "model": "davinci",
            "datetime_utc": ulid.datetime.isoformat(),
            "conversation_id": "abc123",
            "input_tokens": 2,
            "output_tokens": 5,
            "schema_id": SINGLE_ID,
        }
    )
    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "logs",
            "-n",
            "0",
            "-p",
            str(schema_log_path),
            "--data-ids",
            "--data-key",
            "items",
            "--data-array",
        ],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    rows = json.loads(result.output)
    last_row = rows.pop(-1)
    assert set(last_row.keys()) == {
        "conversation_id_",
        "conversation_id",
        "response_id",
        "response_id_",
        "name",
        "conversation_id__",
    }
    for row in rows:
        assert set(row.keys()) == {"conversation_id", "response_id", "name"}


_expected_yaml_re = r"""- id: [a-f0-9]{32}
  summary: \|
    
  usage: \|
    4 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00
- id: [a-f0-9]{32}
  summary: \|
    
  usage: \|
    2 times, most recently \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}\+00:00"""


@pytest.mark.parametrize(
    "args,expected",
    (
        (["schemas"], _expected_yaml_re),
        (["schemas", "list"], _expected_yaml_re),
    ),
)
def test_schemas_list_yaml(schema_log_path, args, expected):
    result = CliRunner().invoke(cli, args + ["-d", str(schema_log_path)])
    assert result.exit_code == 0
    assert re.match(expected, result.output.strip())


@pytest.mark.parametrize("is_nl", (False, True))
def test_schemas_list_json(schema_log_path, is_nl):
    result = CliRunner().invoke(
        cli,
        ["schemas", "list"]
        + (["--nl"] if is_nl else ["--json"])
        + ["-d", str(schema_log_path)],
    )
    assert result.exit_code == 0
    if is_nl:
        rows = [json.loads(line) for line in result.output.strip().split("\n")]
    else:
        rows = json.loads(result.output)
    assert len(rows) == 2
    assert rows[0]["content"] == {"name": "array"}
    assert rows[0]["times_used"] == 4
    assert rows[1]["content"] == {"name": "string"}
    assert rows[1]["times_used"] == 2
    assert set(rows[0].keys()) == {"id", "content", "recently_used", "times_used"}


@pytest.fixture
def fragments_fixture(user_path):
    log_path = str(user_path / "logs_fragments.db")
    db = sqlite_utils.Database(log_path)
    migrate(db)
    start = datetime.datetime.now(datetime.timezone.utc)
    # Replace everything from here on

    fragment_hashes_by_slug = {}
    # Create fragments
    for i in range(1, 6):
        content = f"This is fragment {i}" * (100 if i == 5 else 1)
        fragment = Fragment(content, "fragment")
        db["fragments"].insert(
            {
                "id": i,
                "hash": fragment.id(),
                # 5 is a long one:
                "content": content,
                "datetime_utc": start.isoformat(),
            }
        )
        db["fragment_aliases"].insert({"alias": f"hash{i}", "fragment_id": i})
        fragment_hashes_by_slug[f"hash{i}"] = fragment.id()

    # Create some more fragment aliases
    db["fragment_aliases"].insert({"alias": "alias_1", "fragment_id": 3})
    db["fragment_aliases"].insert({"alias": "alias_3", "fragment_id": 4})
    db["fragment_aliases"].insert({"alias": "long_5", "fragment_id": 5})

    def make_response(name, prompt_fragment_ids=None, system_fragment_ids=None):
        time.sleep(0.05)  # To ensure ULIDs order predictably
        response_id = str(ULID.from_timestamp(time.time())).lower()
        db["responses"].insert(
            {
                "id": response_id,
                "system": f"system: {name}",
                "prompt": f"prompt: {name}",
                "response": f"response: {name}",
                "model": "davinci",
                "datetime_utc": start.isoformat(),
                "conversation_id": "abc123",
                "input_tokens": 2,
                "output_tokens": 5,
            }
        )
        # Link fragments to this response
        for fragment_id in prompt_fragment_ids or []:
            db["prompt_fragments"].insert(
                {"response_id": response_id, "fragment_id": fragment_id}
            )
        for fragment_id in system_fragment_ids or []:
            db["system_fragments"].insert(
                {"response_id": response_id, "fragment_id": fragment_id}
            )
        return {name: response_id}

    collected = {}
    collected.update(make_response("no_fragments"))
    collected.update(
        single_prompt_fragment_id=make_response("single_prompt_fragment", [1])
    )
    collected.update(
        single_system_fragment_id=make_response("single_system_fragment", None, [2])
    )
    collected.update(
        multi_prompt_fragment_id=make_response("multi_prompt_fragment", [1, 2])
    )
    collected.update(
        multi_system_fragment_id=make_response("multi_system_fragment", None, [1, 2])
    )
    collected.update(both_fragments_id=make_response("both_fragments", [1, 2], [3, 4]))
    collected.update(
        single_long_prompt_fragment_with_alias_id=make_response(
            "single_long_prompt_fragment_with_alias", [5], None
        )
    )
    collected.update(
        single_system_fragment_with_alias_id=make_response(
            "single_system_fragment_with_alias", None, [4]
        )
    )
    return {
        "path": log_path,
        "fragment_hashes_by_slug": fragment_hashes_by_slug,
        "collected": collected,
    }


@pytest.mark.parametrize(
    "fragment_refs,expected",
    (
        (
            ["hash1"],
            [
                {
                    "name": "single_prompt_fragment",
                    "prompt_fragments": ["hash1"],
                    "system_fragments": [],
                },
                {
                    "name": "multi_prompt_fragment",
                    "prompt_fragments": ["hash1", "hash2"],
                    "system_fragments": [],
                },
                {
                    "name": "multi_system_fragment",
                    "prompt_fragments": [],
                    "system_fragments": ["hash1", "hash2"],
                },
                {
                    "name": "both_fragments",
                    "prompt_fragments": ["hash1", "hash2"],
                    "system_fragments": ["hash3", "hash4"],
                },
            ],
        ),
        (
            ["alias_3"],
            [
                {
                    "name": "both_fragments",
                    "prompt_fragments": ["hash1", "hash2"],
                    "system_fragments": ["hash3", "hash4"],
                },
                {
                    "name": "single_system_fragment_with_alias",
                    "prompt_fragments": [],
                    "system_fragments": ["hash4"],
                },
            ],
        ),
        # Testing for AND condition
        (
            ["hash1", "hash4"],
            [
                {
                    "name": "both_fragments",
                    "prompt_fragments": ["hash1", "hash2"],
                    "system_fragments": ["hash3", "hash4"],
                },
            ],
        ),
    ),
)
def test_logs_fragments(fragments_fixture, fragment_refs, expected):
    fragments_log_path = fragments_fixture["path"]
    fragment_hashes_by_slug = fragments_fixture["fragment_hashes_by_slug"]
    runner = CliRunner()
    args = ["logs", "-d", fragments_log_path, "-n", "0"]
    for ref in fragment_refs:
        args.extend(["-f", ref])
    result = runner.invoke(cli, args + ["--json"], catch_exceptions=False)
    assert result.exit_code == 0
    output = result.output
    responses = json.loads(output)
    # Re-shape that to same shape as expected
    reshaped = [
        {
            "name": response["prompt"].replace("prompt: ", ""),
            "prompt_fragments": [
                fragment["hash"] for fragment in response["prompt_fragments"]
            ],
            "system_fragments": [
                fragment["hash"] for fragment in response["system_fragments"]
            ],
        }
        for response in responses
    ]
    # Replace aliases with hash IDs in expected
    for item in expected:
        item["prompt_fragments"] = [
            fragment_hashes_by_slug.get(ref, ref) for ref in item["prompt_fragments"]
        ]
        item["system_fragments"] = [
            fragment_hashes_by_slug.get(ref, ref) for ref in item["system_fragments"]
        ]
    assert reshaped == expected
    # Now test the `-s/--short` option:
    result2 = runner.invoke(cli, args + ["-s"], catch_exceptions=False)
    assert result2.exit_code == 0
    output2 = result2.output
    loaded = yaml.safe_load(output2)
    reshaped2 = [
        {
            "name": item["prompt"].replace("prompt: ", ""),
            "system_fragments": item["system_fragments"],
            "prompt_fragments": item["prompt_fragments"],
        }
        for item in loaded
    ]
    assert reshaped2 == expected


def test_logs_fragments_markdown(fragments_fixture):
    fragments_log_path = fragments_fixture["path"]
    fragment_hashes_by_slug = fragments_fixture["fragment_hashes_by_slug"]
    runner = CliRunner()
    args = ["logs", "-d", fragments_log_path, "-n", "0"]
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    output = result.output
    # Replace dates and IDs
    output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", output)
    output = id_re.sub("id: xxx", output)
    expected_output = """
# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: no_fragments

## System

system: no_fragments

## Response

response: no_fragments

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: single_prompt_fragment

### Prompt fragments

- hash1

## System

system: single_prompt_fragment

## Response

response: single_prompt_fragment

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: single_system_fragment

## System

system: single_system_fragment

### System fragments

- hash2

## Response

response: single_system_fragment

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: multi_prompt_fragment

### Prompt fragments

- hash1
- hash2

## System

system: multi_prompt_fragment

## Response

response: multi_prompt_fragment

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: multi_system_fragment

## System

system: multi_system_fragment

### System fragments

- hash1
- hash2

## Response

response: multi_system_fragment

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: both_fragments

### Prompt fragments

- hash1
- hash2

## System

system: both_fragments

### System fragments

- hash3
- hash4

## Response

response: both_fragments

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: single_long_prompt_fragment_with_alias

### Prompt fragments

- hash5

## System

system: single_long_prompt_fragment_with_alias

## Response

response: single_long_prompt_fragment_with_alias

# YYYY-MM-DDTHH:MM:SS    conversation: abc123 id: xxx

Model: **davinci**

## Prompt

prompt: single_system_fragment_with_alias

## System

system: single_system_fragment_with_alias

### System fragments

- hash4

## Response

response: single_system_fragment_with_alias
    """
    # Replace hash4 etc with their proper IDs
    for key, value in fragment_hashes_by_slug.items():
        expected_output = expected_output.replace(key, value)
    assert output.strip() == expected_output.strip()


@pytest.mark.parametrize("arg", ("-e", "--expand"))
def test_expand_fragment_json(fragments_fixture, arg):
    fragments_log_path = fragments_fixture["path"]
    runner = CliRunner()
    args = ["logs", "-d", fragments_log_path, "-f", "long_5", "--json"]
    # Without -e the JSON is truncated
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    data = json.loads(result.output)
    fragment = data[0]["prompt_fragments"][0]["content"]
    assert fragment.startswith("This is fragment 5This is fragment 5")
    assert len(fragment) < 200
    # With -e the JSON is expanded
    result2 = runner.invoke(cli, args + [arg], catch_exceptions=False)
    assert result2.exit_code == 0
    data2 = json.loads(result2.output)
    fragment2 = data2[0]["prompt_fragments"][0]["content"]
    assert fragment2.startswith("This is fragment 5This is fragment 5")
    assert len(fragment2) > 200


def test_expand_fragment_markdown(fragments_fixture):
    fragments_log_path = fragments_fixture["path"]
    fragment_hashes_by_slug = fragments_fixture["fragment_hashes_by_slug"]
    runner = CliRunner()
    args = ["logs", "-d", fragments_log_path, "-f", "long_5", "--expand"]
    result = runner.invoke(cli, args, catch_exceptions=False)
    assert result.exit_code == 0
    output = result.output
    interesting_bit = (
        output.split("prompt: single_long_prompt_fragment_with_alias")[1]
        .split("## System")[0]
        .strip()
    )
    hash = fragment_hashes_by_slug["hash5"]
    expected_prefix = f"### Prompt fragments\n\n<details><summary>{hash}</summary>\nThis is fragment 5"
    assert interesting_bit.startswith(expected_prefix)
    assert interesting_bit.endswith("</details>")


def test_logs_tools(logs_db):
    runner = CliRunner()
    code = textwrap.dedent("""
    def demo():
        return "one\\ntwo\\nthree"
    """)
    result1 = runner.invoke(
        cli,
        [
            "-m",
            "echo",
            "--functions",
            code,
            json.dumps({"tool_calls": [{"name": "demo"}]}),
        ],
    )
    assert result1.exit_code == 0
    result2 = runner.invoke(cli, ["logs", "-c"])
    assert (
        "### Tool results\n"
        "\n"
        "- **demo**: `None`<br>\n"
        "    one\n"
        "    two\n"
        "    three\n"
        "\n"
    ) in result2.output
    # Log one that did NOT use tools, check that `llm logs --tools` ignores it
    assert runner.invoke(cli, ["-m", "echo", "badger"]).exit_code == 0
    assert "badger" in runner.invoke(cli, ["logs"]).output
    logs_tools_output = runner.invoke(cli, ["logs", "--tools"]).output
    assert "badger" not in logs_tools_output
    assert "three" in logs_tools_output


def test_logs_backup(logs_db):
    assert not logs_db.tables
    runner = CliRunner()
    with runner.isolated_filesystem():
        runner.invoke(cli, ["-m", "echo", "simple prompt"])
        assert logs_db.tables
        expected_path = pathlib.Path("backup.db")
        assert not expected_path.exists()
        # Now back it up
        result = runner.invoke(cli, ["logs", "backup", "backup.db"])
        assert result.exit_code == 0
        assert result.output.startswith("Backed up ")
        assert result.output.endswith("to backup.db\n")
        assert expected_path.exists()


@pytest.mark.parametrize("async_", (False, True))
def test_logs_resolved_model(logs_db, mock_model, async_mock_model, async_):
    mock_model.resolved_model_name = "resolved-mock"
    async_mock_model.resolved_model_name = "resolved-mock"
    runner = CliRunner()
    result = runner.invoke(
        cli, ["-m", "mock", "simple prompt"] + (["--async"] if async_ else [])
    )
    assert result.exit_code == 0
    # Should have logged the resolved model name
    assert logs_db["responses"].count
    response = list(logs_db["responses"].rows)[0]
    assert response["model"] == "mock"
    assert response["resolved_model"] == "resolved-mock"

    # Should show up in the JSON logs
    result2 = runner.invoke(cli, ["logs", "--json"])
    assert result2.exit_code == 0
    logs = json.loads(result2.output.strip())
    assert len(logs) == 1
    assert logs[0]["model"] == "mock"
    assert logs[0]["resolved_model"] == "resolved-mock"

    # And the rendered logs
    result3 = runner.invoke(cli, ["logs"])
    assert "Model: **mock** (resolved: **resolved-mock**)" in result3.output


================================================
FILE: tests/test_migrate.py
================================================
import llm
from llm.migrations import migrate
from llm.embeddings_migrations import embeddings_migrations
import pytest
import sqlite_utils

EXPECTED = {
    "id": str,
    "model": str,
    "resolved_model": str,
    "prompt": str,
    "system": str,
    "prompt_json": str,
    "options_json": str,
    "response": str,
    "response_json": str,
    "conversation_id": str,
    "duration_ms": int,
    "datetime_utc": str,
    "input_tokens": int,
    "output_tokens": int,
    "token_details": str,
    "schema_id": str,
}


def test_migrate_blank():
    db = sqlite_utils.Database(memory=True)
    migrate(db)
    assert set(db.table_names()).issuperset(
        {"_llm_migrations", "conversations", "responses", "responses_fts"}
    )
    assert db["responses"].columns_dict == EXPECTED

    foreign_keys = db["responses"].foreign_keys
    for expected_fk in (
        sqlite_utils.db.ForeignKey(
            table="responses",
            column="conversation_id",
            other_table="conversations",
            other_column="id",
        ),
    ):
        assert expected_fk in foreign_keys

    # Should have FTS configured with triggers on correct tables
    assert {trigger.name for trigger in db.triggers} == {
        "responses_ai",
        "responses_ad",
        "responses_au",
    }


@pytest.mark.parametrize("has_record", [True, False])
def test_migrate_from_original_schema(has_record):
    db = sqlite_utils.Database(memory=True)
    if has_record:
        db["log"].insert(
            {
                "provider": "provider",
                "system": "system",
                "prompt": "prompt",
                "chat_id": None,
                "response": "response",
                "model": "model",
                "timestamp": "timestamp",
            },
        )
    else:
        # Create empty logs table
        db["log"].create(
            {
                "provider": str,
                "system": str,
                "prompt": str,
                "chat_id": str,
                "response": str,
                "model": str,
                "timestamp": str,
            }
        )
    migrate(db)
    expected_tables = {"_llm_migrations", "conversations", "responses", "responses_fts"}
    if has_record:
        expected_tables.add("logs")
    assert set(db.table_names()).issuperset(expected_tables)
    assert {trigger.name for trigger in db.triggers} == {
        "responses_ai",
        "responses_ad",
        "responses_au",
    }


def test_migrations_with_legacy_alter_table():
    # https://github.com/simonw/llm/issues/162
    db = sqlite_utils.Database(memory=True)
    db.execute("pragma legacy_alter_table=on")
    migrate(db)


def test_migrations_for_embeddings():
    db = sqlite_utils.Database(memory=True)
    embeddings_migrations.apply(db)
    assert db["collections"].columns_dict == {"id": int, "name": str, "model": str}
    assert db["embeddings"].columns_dict == {
        "collection_id": int,
        "id": str,
        "embedding": bytes,
        "content": str,
        "content_blob": bytes,
        "content_hash": bytes,
        "metadata": str,
        "updated": int,
    }
    assert db["embeddings"].foreign_keys[0].column == "collection_id"
    assert db["embeddings"].foreign_keys[0].other_table == "collections"


def test_backfill_content_hash():
    db = sqlite_utils.Database(memory=True)
    # Run migrations up to but not including m004_store_content_hash
    embeddings_migrations.apply(db, stop_before="m004_store_content_hash")
    assert "content_hash" not in db["embeddings"].columns_dict
    # Add some some directly directly because llm.Collection would run migrations
    db["embeddings"].insert_all(
        [
            {
                "collection_id": 1,
                "id": "1",
                "embedding": (
                    b"\x00\x00\xa0@\x00\x00\xa0@\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                ),
                "content": None,
                "metadata": None,
                "updated": 1693763088,
            },
            {
                "collection_id": 1,
                "id": "2",
                "embedding": (
                    b"\x00\x00\xe0@\x00\x00\xa0@\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                    b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
                ),
                "content": "goodbye world",
                "metadata": None,
                "updated": 1693763088,
            },
        ]
    )
    # Now finish the migrations
    embeddings_migrations.apply(db)
    row1, row2 = db["embeddings"].rows
    # This one should be random:
    assert row1["content_hash"] is not None
    # This should be a hash of 'goodbye world'
    assert row2["content_hash"] == llm.Collection.content_hash("goodbye world")


================================================
FILE: tests/test_plugins.py
================================================
from click.testing import CliRunner
import click
import importlib
import json
import llm
from llm.tools import llm_version, llm_time
from llm import cli, hookimpl, plugins, get_template_loaders, get_fragment_loaders
import pathlib
import pytest
import textwrap


def test_register_commands():
    importlib.reload(cli)

    def plugin_names():
        return [plugin["name"] for plugin in llm.get_plugins()]

    assert "HelloWorldPlugin" not in plugin_names()

    class HelloWorldPlugin:
        __name__ = "HelloWorldPlugin"

        @hookimpl
        def register_commands(self, cli):
            @cli.command(name="hello-world")
            def hello_world():
                "Print hello world"
                click.echo("Hello world!")

    try:
        plugins.pm.register(HelloWorldPlugin(), name="HelloWorldPlugin")
        importlib.reload(cli)

        assert "HelloWorldPlugin" in plugin_names()

        runner = CliRunner()
        result = runner.invoke(cli.cli, ["hello-world"])
        assert result.exit_code == 0
        assert result.output == "Hello world!\n"

    finally:
        plugins.pm.unregister(name="HelloWorldPlugin")
        importlib.reload(cli)
        assert "HelloWorldPlugin" not in plugin_names()


def test_register_template_loaders():
    assert get_template_loaders() == {}

    def one_loader(template_path):
        return llm.Template(name="one:" + template_path, prompt=template_path)

    def two_loader(template_path):
        "Docs for two"
        return llm.Template(name="two:" + template_path, prompt=template_path)

    def dupe_two_loader(template_path):
        "Docs for two dupe"
        return llm.Template(name="two:" + template_path, prompt=template_path)

    class TemplateLoadersPlugin:
        __name__ = "TemplateLoadersPlugin"

        @hookimpl
        def register_template_loaders(self, register):
            register("one", one_loader)
            register("two", two_loader)
            register("two", dupe_two_loader)

    try:
        plugins.pm.register(TemplateLoadersPlugin(), name="TemplateLoadersPlugin")
        loaders = get_template_loaders()
        assert loaders == {
            "one": one_loader,
            "two": two_loader,
            "two_1": dupe_two_loader,
        }

        # Test the CLI command
        runner = CliRunner()
        result = runner.invoke(cli.cli, ["templates", "loaders"])
        assert result.exit_code == 0
        assert result.output == (
            "one:\n"
            "  Undocumented\n"
            "two:\n"
            "  Docs for two\n"
            "two_1:\n"
            "  Docs for two dupe\n"
        )

    finally:
        plugins.pm.unregister(name="TemplateLoadersPlugin")
        assert get_template_loaders() == {}


def test_register_fragment_loaders(logs_db, httpx_mock):
    httpx_mock.add_response(
        method="HEAD",
        url="https://example.com/attachment.png",
        content=b"attachment",
        headers={"Content-Type": "image/png"},
        is_reusable=True,
    )

    assert get_fragment_loaders() == {}

    def single_fragment(argument):
        "This is the fragment documentation"
        return llm.Fragment("single", "single")

    def three_fragments(argument):
        return [
            llm.Fragment(f"one:{argument}", "one"),
            llm.Fragment(f"two:{argument}", "two"),
            llm.Fragment(f"three:{argument}", "three"),
        ]

    def fragment_and_attachment(argument):
        return [
            llm.Fragment(f"one:{argument}", "one"),
            llm.Attachment(url="https://example.com/attachment.png"),
        ]

    class FragmentLoadersPlugin:
        __name__ = "FragmentLoadersPlugin"

        @hookimpl
        def register_fragment_loaders(self, register):
            register("single", single_fragment)
            register("three", three_fragments)
            register("mixed", fragment_and_attachment)

    try:
        plugins.pm.register(FragmentLoadersPlugin(), name="FragmentLoadersPlugin")
        loaders = get_fragment_loaders()
        assert loaders == {
            "single": single_fragment,
            "three": three_fragments,
            "mixed": fragment_and_attachment,
        }

        # Test the CLI command
        runner = CliRunner()
        result = runner.invoke(
            cli.cli, ["-m", "echo", "-f", "three:x"], catch_exceptions=False
        )
        assert result.exit_code == 0
        assert json.loads(result.output) == {
            "prompt": "one:x\ntwo:x\nthree:x",
            "system": "",
            "attachments": [],
            "stream": True,
            "previous": [],
        }
        # And the llm fragments loaders command:
        result2 = runner.invoke(cli.cli, ["fragments", "loaders"])
        assert result2.exit_code == 0
        expected2 = (
            "single:\n"
            "  This is the fragment documentation\n"
            "\n"
            "three:\n"
            "  Undocumented\n"
            "\n"
            "mixed:\n"
            "  Undocumented\n"
        )
        assert result2.output == expected2

        # Test the one that includes an attachment
        result3 = runner.invoke(
            cli.cli, ["-m", "echo", "-f", "mixed:x"], catch_exceptions=False
        )
        assert result3.exit_code == 0
        result3.output.strip == textwrap.dedent("""\
            system:


            prompt:
            one:x

            attachments:
            - https://example.com/attachment.png
            """).strip()

    finally:
        plugins.pm.unregister(name="FragmentLoadersPlugin")
        assert get_fragment_loaders() == {}

    # Let's check the database
    assert list(logs_db.query("select content, source from fragments")) == [
        {"content": "one:x", "source": "one"},
        {"content": "two:x", "source": "two"},
        {"content": "three:x", "source": "three"},
    ]


def test_register_tools(tmpdir, logs_db):
    def upper(text: str) -> str:
        """Convert text to uppercase."""
        return text.upper()

    def count_character_in_word(text: str, character: str) -> int:
        """Count the number of occurrences of a character in a word."""
        return text.count(character)

    def output_as_json(text: str):
        return {"this_is_in_json": {"nested": text}}

    class ToolsPlugin:
        __name__ = "ToolsPlugin"

        @hookimpl
        def register_tools(self, register):
            register(llm.Tool.function(upper))
            register(count_character_in_word, name="count_chars")
            register(output_as_json)

    try:
        plugins.pm.register(ToolsPlugin(), name="ToolsPlugin")
        tools = llm.get_tools()
        assert tools == {
            "upper": llm.Tool(
                name="upper",
                description="Convert text to uppercase.",
                input_schema={
                    "properties": {"text": {"type": "string"}},
                    "required": ["text"],
                    "type": "object",
                },
                implementation=upper,
                plugin="ToolsPlugin",
            ),
            "count_chars": llm.Tool(
                name="count_chars",
                description="Count the number of occurrences of a character in a word.",
                input_schema={
                    "properties": {
                        "text": {"type": "string"},
                        "character": {"type": "string"},
                    },
                    "required": ["text", "character"],
                    "type": "object",
                },
                implementation=count_character_in_word,
                plugin="ToolsPlugin",
            ),
            "llm_version": llm.Tool(
                name="llm_version",
                description="Return the installed version of llm",
                input_schema={"properties": {}, "type": "object"},
                implementation=llm_version,
                plugin="llm.default_plugins.default_tools",
            ),
            "output_as_json": llm.Tool(
                name="output_as_json",
                description=None,
                input_schema={
                    "properties": {"text": {"type": "string"}},
                    "required": ["text"],
                    "type": "object",
                },
                implementation=output_as_json,
                plugin="ToolsPlugin",
            ),
            "llm_time": llm.Tool(
                name="llm_time",
                description="Returns the current time, as local time and UTC",
                input_schema={"properties": {}, "type": "object"},
                implementation=llm_time,
                plugin="llm.default_plugins.default_tools",
            ),
        }

        # Test the CLI command
        runner = CliRunner()
        result = runner.invoke(cli.cli, ["tools", "list"])
        assert result.exit_code == 0
        assert result.output == (
            "count_chars(text: str, character: str) -> int (plugin: ToolsPlugin)\n\n"
            "  Count the number of occurrences of a character in a word.\n\n"
            "llm_time() -> dict (plugin: llm.default_plugins.default_tools)\n\n"
            "  Returns the current time, as local time and UTC\n\n"
            "llm_version() -> str (plugin: llm.default_plugins.default_tools)\n\n"
            "  Return the installed version of llm\n\n"
            "output_as_json(text: str) (plugin: ToolsPlugin)\n\n"
            "upper(text: str) -> str (plugin: ToolsPlugin)\n\n"
            "  Convert text to uppercase.\n\n"
        )
        # And --json
        result2 = runner.invoke(cli.cli, ["tools", "list", "--json"])
        assert result2.exit_code == 0
        assert json.loads(result2.output) == {
            "tools": [
                {
                    "name": "count_chars",
                    "description": "Count the number of occurrences of a character in a word.",
                    "arguments": {
                        "properties": {
                            "text": {"type": "string"},
                            "character": {"type": "string"},
                        },
                        "required": ["text", "character"],
                        "type": "object",
                    },
                    "plugin": "ToolsPlugin",
                },
                {
                    "arguments": {
                        "properties": {},
                        "type": "object",
                    },
                    "description": "Returns the current time, as local time and UTC",
                    "name": "llm_time",
                    "plugin": "llm.default_plugins.default_tools",
                },
                {
                    "name": "llm_version",
                    "description": "Return the installed version of llm",
                    "arguments": {"properties": {}, "type": "object"},
                    "plugin": "llm.default_plugins.default_tools",
                },
                {
                    "name": "output_as_json",
                    "description": None,
                    "arguments": {
                        "properties": {"text": {"type": "string"}},
                        "required": ["text"],
                        "type": "object",
                    },
                    "plugin": "ToolsPlugin",
                },
                {
                    "name": "upper",
                    "description": "Convert text to uppercase.",
                    "arguments": {
                        "properties": {"text": {"type": "string"}},
                        "required": ["text"],
                        "type": "object",
                    },
                    "plugin": "ToolsPlugin",
                },
            ],
            "toolboxes": [],
        }

        # And test the --tools option
        functions_path = str(tmpdir / "functions.py")
        with open(functions_path, "w") as fp:
            fp.write("def example(s: str, i: int):\n    return s + '-' + str(i)")
        result3 = runner.invoke(
            cli.cli,
            [
                "tools",
                "--functions",
                "def reverse(s: str): return s[::-1]",
                "--functions",
                functions_path,
            ],
        )
        assert result3.exit_code == 0
        assert "reverse(s: str)" in result3.output
        assert "example(s: str, i: int)" in result3.output
        # Now run a prompt using a plugin tool and to check it gets logged correctly
        result4 = runner.invoke(
            cli.cli,
            [
                "-m",
                "echo",
                "--tool",
                "upper",
                json.dumps(
                    {"tool_calls": [{"name": "upper", "arguments": {"text": "hi"}}]}
                ),
            ],
            catch_exceptions=False,
        )
        assert result4.exit_code == 0
        assert '"output": "HI"' in result4.output

        # Now check in the database
        tool_row = [row for row in logs_db["tools"].rows][0]
        assert tool_row["name"] == "upper"
        assert tool_row["plugin"] == "ToolsPlugin"

        # The llm logs command should return that, including with the -T upper option
        for args in ([], ["-T", "upper"]):
            logs_result = runner.invoke(cli.cli, ["logs"] + args)
            assert logs_result.exit_code == 0
            assert "HI" in logs_result.output
        # ... but not for -T reverse
        logs_empty_result = runner.invoke(cli.cli, ["logs", "-T", "count_chars"])
        assert logs_empty_result.exit_code == 0
        assert "HI" not in logs_empty_result.output

        # Start with a tool, use llm -c to reuse the same tool
        result5 = runner.invoke(
            cli.cli,
            [
                "prompt",
                "-m",
                "echo",
                "--tool",
                "upper",
                json.dumps(
                    {"tool_calls": [{"name": "upper", "arguments": {"text": "one"}}]}
                ),
            ],
        )
        assert result5.exit_code == 0
        assert (
            runner.invoke(
                cli.cli,
                [
                    "-c",
                    json.dumps(
                        {
                            "tool_calls": [
                                {"name": "upper", "arguments": {"text": "two"}}
                            ]
                        }
                    ),
                ],
            ).exit_code
            == 0
        )
        # Now do it again with llm chat -c
        assert (
            runner.invoke(
                cli.cli,
                ["chat", "-c"],
                input=(
                    json.dumps(
                        {
                            "tool_calls": [
                                {"name": "upper", "arguments": {"text": "three"}}
                            ]
                        }
                    )
                    + "\nquit\n"
                ),
                catch_exceptions=False,
            ).exit_code
            == 0
        )
        # Should have logged those three tool uses in llm logs -c -n 0
        log_rows = json.loads(
            runner.invoke(cli.cli, ["logs", "-c", "-n", "0", "--json"]).output
        )
        results = tuple(
            (log_row["prompt"], json.dumps(log_row["tool_results"]))
            for log_row in log_rows
        )
        assert results == (
            ('{"tool_calls": [{"name": "upper", "arguments": {"text": "one"}}]}', "[]"),
            (
                "",
                '[{"id": 2, "tool_id": 1, "name": "upper", "output": "ONE", "tool_call_id": null, "exception": null, "attachments": []}]',
            ),
            ('{"tool_calls": [{"name": "upper", "arguments": {"text": "two"}}]}', "[]"),
            (
                "",
                '[{"id": 3, "tool_id": 1, "name": "upper", "output": "TWO", "tool_call_id": null, "exception": null, "attachments": []}]',
            ),
            (
                '{"tool_calls": [{"name": "upper", "arguments": {"text": "three"}}]}',
                "[]",
            ),
            (
                "",
                '[{"id": 4, "tool_id": 1, "name": "upper", "output": "THREE", "tool_call_id": null, "exception": null, "attachments": []}]',
            ),
        )
        # Test the --td option
        result6 = runner.invoke(
            cli.cli,
            [
                "prompt",
                "-m",
                "echo",
                "--tool",
                "output_as_json",
                json.dumps(
                    {
                        "tool_calls": [
                            {"name": "output_as_json", "arguments": {"text": "hi"}}
                        ]
                    }
                ),
                "--td",
            ],
        )
        assert result6.exit_code == 0
        assert (
            "Tool call: output_as_json({'text': 'hi'})\n"
            "  {\n"
            '    "this_is_in_json": {\n'
            '      "nested": "hi"\n'
            "    }\n"
            "  }"
        ) in result6.output
    finally:
        plugins.pm.unregister(name="ToolsPlugin")


class Memory(llm.Toolbox):
    _memory = None

    def _get_memory(self):
        if self._memory is None:
            self._memory = {}
        return self._memory

    def set(self, key: str, value: str):
        "Set something as a key"
        self._get_memory()[key] = value

    def get(self, key: str):
        "Get something from a key"
        return self._get_memory().get(key) or ""

    def append(self, key: str, value: str):
        "Append something as a key"
        memory = self._get_memory()
        memory[key] = (memory.get(key) or "") + "\n" + value

    def keys(self):
        "Return a list of keys"
        return list(self._get_memory().keys())


class Filesystem(llm.Toolbox):
    def __init__(self, path: str):
        self.path = path

    async def list_files(self):
        # async here just to confirm that works
        return [str(item) for item in pathlib.Path(self.path).glob("*")]


class ToolboxPlugin:
    __name__ = "ToolboxPlugin"

    @hookimpl
    def register_tools(self, register):
        register(Memory)
        register(Filesystem)


def test_register_toolbox(tmpdir, logs_db):
    # Test the Python API
    model = llm.get_model("echo")
    memory = Memory()
    conversation = model.conversation(tools=[memory])
    accumulated = []

    def after_call(tool, tool_call, tool_result):
        accumulated.append((tool.name, tool_call.arguments, tool_result.output))

    conversation.chain(
        json.dumps(
            {
                "tool_calls": [
                    {
                        "name": "Memory_set",
                        "arguments": {"key": "hello", "value": "world"},
                    }
                ]
            }
        ),
        after_call=after_call,
    ).text()
    conversation.chain(
        json.dumps(
            {"tool_calls": [{"name": "Memory_get", "arguments": {"key": "hello"}}]}
        ),
        after_call=after_call,
    ).text()
    assert accumulated == [
        ("Memory_set", {"key": "hello", "value": "world"}, "null"),
        ("Memory_get", {"key": "hello"}, "world"),
    ]
    assert memory._memory == {"hello": "world"}

    # And for the Filesystem with state
    my_dir = pathlib.Path(tmpdir / "mine")
    my_dir.mkdir()
    (my_dir / "doc.txt").write_text("hi", "utf-8")
    conversation = model.conversation(tools=[Filesystem(my_dir)])
    accumulated.clear()
    conversation.chain(
        json.dumps(
            {
                "tool_calls": [
                    {
                        "name": "Filesystem_list_files",
                    }
                ]
            }
        ),
        after_call=after_call,
    ).text()
    assert accumulated == [
        ("Filesystem_list_files", {}, json.dumps([str(my_dir / "doc.txt")]))
    ]

    # Now register them with a plugin and use it through the CLI
    try:
        plugins.pm.register(ToolboxPlugin(), name="ToolboxPlugin")
        tools = llm.get_tools()
        assert tools["Memory"] is Memory

        runner = CliRunner()
        # llm tools --json
        result = runner.invoke(cli.cli, ["tools", "--json"])
        assert result.exit_code == 0
        assert json.loads(result.output) == {
            "tools": [
                {
                    "description": "Returns the current time, as local time and UTC",
                    "name": "llm_time",
                    "plugin": "llm.default_plugins.default_tools",
                    "arguments": {
                        "properties": {},
                        "type": "object",
                    },
                },
                {
                    "name": "llm_version",
                    "description": "Return the installed version of llm",
                    "arguments": {"properties": {}, "type": "object"},
                    "plugin": "llm.default_plugins.default_tools",
                },
            ],
            "toolboxes": [
                {
                    "name": "Filesystem",
                    "tools": [
                        {
                            "name": "Filesystem_list_files",
                            "description": None,
                            "arguments": {"properties": {}, "type": "object"},
                        }
                    ],
                },
                {
                    "name": "Memory",
                    "tools": [
                        {
                            "name": "Memory_append",
                            "description": "Append something as a key",
                            "arguments": {
                                "properties": {
                                    "key": {"type": "string"},
                                    "value": {"type": "string"},
                                },
                                "required": ["key", "value"],
                                "type": "object",
                            },
                        },
                        {
                            "name": "Memory_get",
                            "description": "Get something from a key",
                            "arguments": {
                                "properties": {"key": {"type": "string"}},
                                "required": ["key"],
                                "type": "object",
                            },
                        },
                        {
                            "name": "Memory_keys",
                            "description": "Return a list of keys",
                            "arguments": {"properties": {}, "type": "object"},
                        },
                        {
                            "name": "Memory_set",
                            "description": "Set something as a key",
                            "arguments": {
                                "properties": {
                                    "key": {"type": "string"},
                                    "value": {"type": "string"},
                                },
                                "required": ["key", "value"],
                                "type": "object",
                            },
                        },
                    ],
                },
            ],
        }

        # llm tools (no JSON)
        result = runner.invoke(cli.cli, ["tools"])
        assert result.exit_code == 0
        assert result.output == (
            "llm_time() -> dict (plugin: llm.default_plugins.default_tools)\n\n"
            "  Returns the current time, as local time and UTC\n\n"
            "llm_version() -> str (plugin: llm.default_plugins.default_tools)\n\n"
            "  Return the installed version of llm\n\n"
            "Filesystem:\n\n"
            "  Filesystem_list_files()\n\n"
            "Memory:\n\n"
            "  Memory_append(key: str, value: str)\n\n"
            "    Append something as a key\n\n"
            "  Memory_get(key: str)\n\n"
            "    Get something from a key\n\n"
            "  Memory_keys()\n\n"
            "    Return a list of keys\n\n"
            "  Memory_set(key: str, value: str)\n\n"
            "    Set something as a key\n\n"
        )

        # Test the CLI running a toolbox prompt
        result3 = runner.invoke(
            cli.cli,
            [
                "prompt",
                "-T",
                "Memory",
                json.dumps(
                    {
                        "tool_calls": [
                            {
                                "name": "Memory_set",
                                "arguments": {"key": "hi", "value": "two"},
                            },
                            {"name": "Memory_get", "arguments": {"key": "hi"}},
                        ]
                    }
                ),
                "-m",
                "echo",
            ],
        )
        assert result3.exit_code == 0
        tool_results = json.loads(
            "[" + result3.output.split('"tool_results": [')[1].split("]")[0] + "]"
        )
        assert tool_results == [
            {"name": "Memory_set", "output": "null", "tool_call_id": None},
            {"name": "Memory_get", "output": "two", "tool_call_id": None},
        ]

        # Test the CLI running a configured toolbox prompt
        my_dir2 = pathlib.Path(tmpdir / "mine2")
        my_dir2.mkdir()
        other_path = my_dir2 / "other.txt"
        other_path.write_text("hi", "utf-8")
        result4 = runner.invoke(
            cli.cli,
            [
                "prompt",
                "-T",
                "Filesystem({})".format(json.dumps(str(my_dir2))),
                json.dumps({"tool_calls": [{"name": "Filesystem_list_files"}]}),
                "-m",
                "echo",
            ],
        )
        assert result4.exit_code == 0
        tool_results = json.loads(
            "[" + result4.output.split('"tool_results": [')[1].rsplit("]", 1)[0] + "]"
        )
        assert tool_results == [
            {
                "name": "Filesystem_list_files",
                "output": json.dumps([str(other_path)]),
                "tool_call_id": None,
            }
        ]

        # Should show an error if you attempt to llm -c with configured toolboxes
        result5 = runner.invoke(
            cli.cli,
            ["-c", "list them again"],
        )
        assert result5.exit_code == 1
        assert (
            "Error: Tool(s) Filesystem_list_files not found. Available tools:"
            in result5.output
        )

        # Test the logging worked
        rows = list(logs_db.query(TOOL_RESULTS_SQL))
        # JSON decode things in rows
        for row in rows:
            row["tool_calls"] = json.loads(row["tool_calls"])
            row["tool_results"] = json.loads(row["tool_results"])
        assert rows == [
            {
                "model": "echo",
                "tool_calls": [
                    {
                        "name": "Memory_set",
                        "arguments": '{"key": "hi", "value": "two"}',
                    },
                    {"name": "Memory_get", "arguments": '{"key": "hi"}'},
                ],
                "tool_results": [],
            },
            {
                "model": "echo",
                "tool_calls": [],
                "tool_results": [
                    {
                        "name": "Memory_set",
                        "output": "null",
                        "instance": {
                            "name": "Memory",
                            "plugin": "ToolboxPlugin",
                            "arguments": "{}",
                        },
                    },
                    {
                        "name": "Memory_get",
                        "output": "two",
                        "instance": {
                            "name": "Memory",
                            "plugin": "ToolboxPlugin",
                            "arguments": "{}",
                        },
                    },
                ],
            },
            {
                "model": "echo",
                "tool_calls": [{"name": "Filesystem_list_files", "arguments": "{}"}],
                "tool_results": [],
            },
            {
                "model": "echo",
                "tool_calls": [],
                "tool_results": [
                    {
                        "name": "Filesystem_list_files",
                        "output": json.dumps([str(other_path)]),
                        "instance": {
                            "name": "Filesystem",
                            "plugin": "ToolboxPlugin",
                            "arguments": json.dumps({"path": str(my_dir2)}),
                        },
                    }
                ],
            },
        ]

    finally:
        plugins.pm.unregister(name="ToolboxPlugin")


def test_register_toolbox_fails_on_bad_class():
    class BadTools:
        def bad(self):
            return "this is bad"

    class BadToolsPlugin:
        __name__ = "BadToolsPlugin"

        @hookimpl
        def register_tools(self, register):
            # This should fail because BadTools is not a subclass of llm.Toolbox
            register(BadTools)

    try:
        plugins.pm.register(BadToolsPlugin(), name="BadToolsPlugin")
        with pytest.raises(TypeError):
            llm.get_tools()
    finally:
        plugins.pm.unregister(name="BadToolsPlugin")


def test_toolbox_logging_async(logs_db, tmpdir):
    path = pathlib.Path(tmpdir / "path")
    path.mkdir()
    runner = CliRunner()
    try:
        plugins.pm.register(ToolboxPlugin(), name="ToolboxPlugin")

        # Run Memory and Filesystem tests --async
        result = runner.invoke(
            cli.cli,
            [
                "prompt",
                "--async",
                "-T",
                "Memory",
                "--tool",
                "Filesystem({})".format(json.dumps(str(path))),
                json.dumps(
                    {
                        "tool_calls": [
                            {
                                "name": "Memory_set",
                                "arguments": {"key": "hi", "value": "two"},
                            },
                            {"name": "Memory_get", "arguments": {"key": "hi"}},
                            {"name": "Filesystem_list_files"},
                        ]
                    }
                ),
                "-m",
                "echo",
            ],
        )
        assert result.exit_code == 0
        tool_results = json.loads(
            "[" + result.output.split('"tool_results": [')[1].rsplit("]", 1)[0] + "]"
        )
        assert tool_results == [
            {"name": "Memory_set", "output": "null", "tool_call_id": None},
            {"name": "Memory_get", "output": "two", "tool_call_id": None},
            {"name": "Filesystem_list_files", "output": "[]", "tool_call_id": None},
        ]
    finally:
        plugins.pm.unregister(name="ToolboxPlugin")

    # Check the database
    rows = list(logs_db.query(TOOL_RESULTS_SQL))
    # JSON decode things in rows
    for row in rows:
        row["tool_calls"] = json.loads(row["tool_calls"])
        row["tool_results"] = json.loads(row["tool_results"])
    assert rows == [
        {
            "model": "echo",
            "tool_calls": [
                {"name": "Memory_set", "arguments": '{"key": "hi", "value": "two"}'},
                {"name": "Memory_get", "arguments": '{"key": "hi"}'},
                {"name": "Filesystem_list_files", "arguments": "{}"},
            ],
            "tool_results": [],
        },
        {
            "model": "echo",
            "tool_calls": [],
            "tool_results": [
                {
                    "name": "Memory_set",
                    "output": "null",
                    "instance": {
                        "name": "Filesystem",
                        "plugin": "ToolboxPlugin",
                        "arguments": "{}",
                    },
                },
                {
                    "name": "Memory_get",
                    "output": "two",
                    "instance": {
                        "name": "Filesystem",
                        "plugin": "ToolboxPlugin",
                        "arguments": "{}",
                    },
                },
                {
                    "name": "Filesystem_list_files",
                    "output": "[]",
                    "instance": {
                        "name": "Filesystem",
                        "plugin": "ToolboxPlugin",
                        "arguments": json.dumps({"path": str(path)}),
                    },
                },
            ],
        },
    ]


def test_plugins_command():
    runner = CliRunner()
    result = runner.invoke(cli.cli, ["plugins"])
    assert result.exit_code == 0
    expected = [
        {"name": "EchoModelPlugin", "hooks": ["register_models"]},
        {
            "name": "MockModelsPlugin",
            "hooks": ["register_embedding_models", "register_models"],
        },
    ]
    actual = json.loads(result.output)
    actual.sort(key=lambda p: p["name"])
    assert actual == expected
    # Test the --hook option
    result2 = runner.invoke(cli.cli, ["plugins", "--hook", "register_embedding_models"])
    assert result2.exit_code == 0
    assert json.loads(result2.output) == [
        {
            "name": "MockModelsPlugin",
            "hooks": ["register_embedding_models", "register_models"],
        },
    ]


TOOL_RESULTS_SQL = """
-- First, create ordered subqueries for tool_calls and tool_results
with ordered_tool_calls as (
    select
        tc.response_id,
        json_group_array(
            json_object(
                'name', tc.name,
                'arguments', tc.arguments
            )
        ) as tool_calls_json
    from (
        select * from tool_calls order by id
    ) tc
    where tc.id is not null
    group by tc.response_id
),
ordered_tool_results as (
    select
        tr.response_id,
        json_group_array(
            json_object(
                'name', tr.name,
                'output', tr.output,
                'instance', case
                    when ti.id is not null then json_object(
                        'name', ti.name,
                        'plugin', ti.plugin,
                        'arguments', ti.arguments
                    )
                    else null
                end
            )
        ) as tool_results_json
    from (
        select distinct tr.*, ti.id as ti_id, ti.name as ti_name,
               ti.plugin, ti.arguments as ti_arguments
        from tool_results tr
        left join tool_instances ti on tr.instance_id = ti.id
        order by tr.id
    ) tr
    left join tool_instances ti on tr.instance_id = ti.id
    where tr.id is not null
    group by tr.response_id
)
select
    r.model,
    coalesce(otc.tool_calls_json, '[]') as tool_calls,
    coalesce(otr.tool_results_json, '[]') as tool_results
from responses r
left join ordered_tool_calls otc on r.id = otc.response_id
left join ordered_tool_results otr on r.id = otr.response_id
group by r.id, r.model
order by r.id"""


================================================
FILE: tests/test_templates.py
================================================
from click.testing import CliRunner
from importlib.metadata import version
import json
from llm import Template, Toolbox, hookimpl, user_dir
from llm.cli import cli
from llm.plugins import pm
import os
from unittest import mock
import pathlib
import pytest
import textwrap
import yaml


@pytest.mark.parametrize(
    "prompt,system,defaults,params,expected_prompt,expected_system,expected_error",
    (
        ("S: $input", None, None, {}, "S: input", None, None),
        ("S: $input", "system", None, {}, "S: input", "system", None),
        ("No vars", None, None, {}, "No vars", None, None),
        ("$one and $two", None, None, {}, None, None, "Missing variables: one, two"),
        ("$one and $two", None, None, {"one": 1, "two": 2}, "1 and 2", None, None),
        ("$one and $two", None, {"one": 1}, {"two": 2}, "1 and 2", None, None),
        ("$one and $$2", None, None, {"one": 1}, "1 and $2", None, None),
        (
            "$one and $two",
            None,
            {"one": 99},
            {"one": 1, "two": 2},
            "1 and 2",
            None,
            None,
        ),
    ),
)
def test_template_evaluate(
    prompt, system, defaults, params, expected_prompt, expected_system, expected_error
):
    t = Template(name="t", prompt=prompt, system=system, defaults=defaults)
    if expected_error:
        with pytest.raises(Template.MissingVariables) as ex:
            prompt, system = t.evaluate("input", params)
        assert ex.value.args[0] == expected_error
    else:
        prompt, system = t.evaluate("input", params)
        assert prompt == expected_prompt
        assert system == expected_system


def test_templates_list_no_templates_found():
    runner = CliRunner()
    result = runner.invoke(cli, ["templates", "list"])
    assert result.exit_code == 0
    assert result.output == ""


@pytest.mark.parametrize("args", (["templates", "list"], ["templates"]))
def test_templates_list(templates_path, args):
    (templates_path / "one.yaml").write_text("template one", "utf-8")
    (templates_path / "two.yaml").write_text("template two", "utf-8")
    (templates_path / "three.yaml").write_text(
        "template three is very long " * 4, "utf-8"
    )
    (templates_path / "four.yaml").write_text(
        "'this one\n\nhas newlines in it'", "utf-8"
    )
    (templates_path / "both.yaml").write_text(
        "system: summarize this\nprompt: $input", "utf-8"
    )
    (templates_path / "sys.yaml").write_text("system: Summarize this", "utf-8")
    (templates_path / "invalid.yaml").write_text("system2: This is invalid", "utf-8")
    runner = CliRunner()
    result = runner.invoke(cli, args)
    assert result.exit_code == 0
    assert result.output == (
        "both  : system: summarize this prompt: $input\n"
        "four  : this one has newlines in it\n"
        "one   : template one\n"
        "sys   : system: Summarize this\n"
        "three : template three is very long template three is very long template thre...\n"
        "two   : template two\n"
    )


@pytest.mark.parametrize(
    "args,expected,expected_error",
    (
        (["-m", "gpt4", "hello"], {"model": "gpt-4", "prompt": "hello"}, None),
        (["hello $foo"], {"prompt": "hello $foo"}, None),
        (["--system", "system"], {"system": "system"}, None),
        (["-t", "template"], None, "--save cannot be used with --template"),
        (["--continue"], None, "--save cannot be used with --continue"),
        (["--cid", "123"], None, "--save cannot be used with --cid"),
        (["--conversation", "123"], None, "--save cannot be used with --cid"),
        (
            ["Say hello as $name", "-p", "name", "default-name"],
            {"prompt": "Say hello as $name", "defaults": {"name": "default-name"}},
            None,
        ),
        # Options
        (
            ["-o", "temperature", "0.5", "--system", "in french"],
            {"system": "in french", "options": {"temperature": 0.5}},
            None,
        ),
        # -x/--extract should be persisted:
        (
            ["--system", "write python", "--extract"],
            {"system": "write python", "extract": True},
            None,
        ),
        # So should schemas (and should not sort properties)
        (
            [
                "--schema",
                '{"properties": {"b": {"type": "string"}, "a": {"type": "string"}}}',
            ],
            {
                "schema_object": {
                    "properties": {"b": {"type": "string"}, "a": {"type": "string"}}
                }
            },
            None,
        ),
        # And fragments and system_fragments
        (
            ["--fragment", "f1.txt", "--system-fragment", "https://example.com/f2.txt"],
            {
                "fragments": ["f1.txt"],
                "system_fragments": ["https://example.com/f2.txt"],
            },
            None,
        ),
        # And attachments and attachment_types
        (
            ["--attachment", "a.txt", "--attachment-type", "b.txt", "text/plain"],
            {
                "attachments": ["a.txt"],
                "attachment_types": [{"type": "text/plain", "value": "b.txt"}],
            },
            None,
        ),
        # Model option using an enum: https://github.com/simonw/llm/issues/1237
        (
            ["-m", "gpt-5", "-o", "reasoning_effort", "minimal"],
            {
                "model": "gpt-5",
                "options": {"reasoning_effort": "minimal"},
            },
            None,
        ),
    ),
)
def test_templates_prompt_save(templates_path, args, expected, expected_error):
    assert not (templates_path / "saved.yaml").exists()
    runner = CliRunner()
    with runner.isolated_filesystem():
        # Create a file to test attachment
        pathlib.Path("a.txt").write_text("attachment", "utf-8")
        pathlib.Path("b.txt").write_text("attachment type", "utf-8")
        result = runner.invoke(cli, args + ["--save", "saved"], catch_exceptions=False)
    if not expected_error:
        assert result.exit_code == 0
        yaml_data = yaml.safe_load((templates_path / "saved.yaml").read_text("utf-8"))
        # Adjust attachment and attachment_types paths to be just the filename
        if "attachments" in yaml_data:
            yaml_data["attachments"] = [
                os.path.basename(path) for path in yaml_data["attachments"]
            ]
        for item in yaml_data.get("attachment_types", []):
            item["value"] = os.path.basename(item["value"])
        assert yaml_data == expected
    else:
        assert result.exit_code == 1
        assert expected_error in result.output


def test_templates_error_on_missing_schema(templates_path):
    runner = CliRunner()
    runner.invoke(
        cli, ["the-prompt", "--save", "prompt_no_schema"], catch_exceptions=False
    )
    # This should complain about no schema
    result = runner.invoke(
        cli, ["hi", "--schema", "t:prompt_no_schema"], catch_exceptions=False
    )
    assert result.output == "Error: Template 'prompt_no_schema' has no schema\n"
    # And this is just an invalid template
    result2 = runner.invoke(
        cli, ["hi", "--schema", "t:bad_template"], catch_exceptions=False
    )
    assert result2.output == "Error: Invalid template: bad_template\n"


@mock.patch.dict(os.environ, {"OPENAI_API_KEY": "X"})
@pytest.mark.parametrize(
    "template,input_text,extra_args,expected_model,expected_input,expected_error,expected_options",
    (
        (
            "'Summarize this: $input'",
            "Input text",
            [],
            "gpt-4o-mini",
            "Summarize this: Input text",
            None,
            None,
        ),
        (
            "prompt: 'Summarize this: $input'\nmodel: gpt-4",
            "Input text",
            [],
            "gpt-4",
            "Summarize this: Input text",
            None,
            None,
        ),
        (
            "prompt: 'Summarize this: $input'",
            "Input text",
            ["-m", "4"],
            "gpt-4",
            "Summarize this: Input text",
            None,
            None,
        ),
        # -s system prompt should over-ride template system prompt
        pytest.param(
            "boo",
            "Input text",
            ["-s", "custom system"],
            "gpt-4o-mini",
            [
                {"role": "system", "content": "custom system"},
                {"role": "user", "content": "boo\nInput text"},
            ],
            None,
            None,
            marks=pytest.mark.httpx_mock(),
        ),
        pytest.param(
            "prompt: 'Say $hello'",
            "Input text",
            [],
            None,
            None,
            "Error: Missing variables: hello",
            None,
            marks=pytest.mark.httpx_mock(),
        ),
        # Template generated prompt should combine with CLI prompt
        (
            "prompt: 'Say $hello'",
            "Input text",
            ["-p", "hello", "Blah"],
            "gpt-4o-mini",
            "Say Blah\nInput text",
            None,
            None,
        ),
        (
            "prompt: 'Say pelican'",
            "",
            [],
            "gpt-4o-mini",
            "Say pelican",
            None,
            None,
        ),
        # Template with just a system prompt
        (
            "system: 'Summarize this'",
            "Input text",
            [],
            "gpt-4o-mini",
            [
                {"content": "Summarize this", "role": "system"},
                {"content": "Input text", "role": "user"},
            ],
            None,
            None,
        ),
        # Options
        (
            "prompt: 'Summarize this: $input'\noptions:\n  temperature: 0.5",
            "Input text",
            [],
            "gpt-4o-mini",
            "Summarize this: Input text",
            None,
            {"temperature": 0.5},
        ),
        # Should be over-ridden by CLI
        (
            "prompt: 'Summarize this: $input'\noptions:\n  temperature: 0.5",
            "Input text",
            ["-o", "temperature", "0.7"],
            "gpt-4o-mini",
            "Summarize this: Input text",
            None,
            {"temperature": 0.7},
        ),
    ),
)
def test_execute_prompt_with_a_template(
    templates_path,
    mocked_openai_chat,
    template,
    input_text,
    extra_args,
    expected_model,
    expected_input,
    expected_error,
    expected_options,
):
    (templates_path / "template.yaml").write_text(template, "utf-8")
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["--no-stream", "-t", "template"]
        + ([input_text] if input_text else [])
        + extra_args,
        catch_exceptions=False,
    )
    if isinstance(expected_input, str):
        expected_messages = [{"role": "user", "content": expected_input}]
    else:
        expected_messages = expected_input

    if expected_error is None:
        assert result.exit_code == 0
        last_request = mocked_openai_chat.get_requests()[-1]
        expected_data = {
            "model": expected_model,
            "messages": expected_messages,
            "stream": False,
        }
        if expected_options:
            expected_data.update(expected_options)
        assert json.loads(last_request.content) == expected_data
    else:
        assert result.exit_code == 1
        assert result.output.strip() == expected_error
        mocked_openai_chat.reset()


@pytest.mark.parametrize(
    "template,expected",
    (
        (
            "system: system\nprompt: prompt",
            {
                "prompt": "prompt",
                "system": "system",
                "attachments": [],
                "stream": True,
                "previous": [],
            },
        ),
        (
            "prompt: |\n  This is\n  ```\n  code to extract\n  ```",
            {
                "prompt": "This is\n```\ncode to extract\n```",
                "system": "",
                "attachments": [],
                "stream": True,
                "previous": [],
            },
        ),
        # Now try that with extract: true
        (
            'extract: true\nprompt: |\n  {"raw": "This is\\n```\\ncode to extract\\n```"}',
            "code to extract",
        ),
    ),
)
def test_execute_prompt_from_template_url(httpx_mock, template, expected):
    httpx_mock.add_response(
        url="https://example.com/prompt.yaml",
        method="GET",
        text=template,
        status_code=200,
    )
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["-t", "https://example.com/prompt.yaml", "-m", "echo"],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    if isinstance(expected, dict):
        assert json.loads(result.output.strip()) == expected
    else:
        assert result.output.strip() == expected


def test_execute_prompt_from_template_path():
    runner = CliRunner()
    with runner.isolated_filesystem() as temp_dir:
        path = pathlib.Path(temp_dir) / "my-template.yaml"
        path.write_text("system: system\nprompt: prompt", "utf-8")
        result = runner.invoke(
            cli,
            ["-t", str(path), "-m", "echo"],
            catch_exceptions=False,
        )
        assert result.exit_code == 0, result.output
        assert json.loads(result.output) == {
            "prompt": "prompt",
            "system": "system",
            "attachments": [],
            "stream": True,
            "previous": [],
        }


def test_template_respects_cli_extract_flag(
    mocked_openai_chat_returning_fenced_code, templates_path
):
    (templates_path / "code.yaml").write_text("prompt: Write code", "utf-8")
    runner = CliRunner()
    result = runner.invoke(
        cli,
        ["-t", "code", "-m", "gpt-4o-mini", "--key", "x", "-x"],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert "```" not in result.output
    assert result.output.strip() == "function foo() {\n  return 'bar';\n}"


FUNCTIONS_EXAMPLE = """
def greet(name: str) -> str:
    return f"Hello, {name}!"
"""


class Greeting(Toolbox):
    def __init__(self, greeting: str):
        self.greeting = greeting

    def greet(self, name: str) -> str:
        "Greet name with a greeting"
        return f"{self.greeting}, {name}!"


class GreetingsPlugin:
    __name__ = "GreetingsPlugin"

    @hookimpl
    def register_tools(self, register):
        register(Greeting)


@pytest.mark.parametrize(
    "source,expected_tool_success,expected_functions_success",
    (
        ("alias", True, True),
        ("file", True, True),
        # Loaded from URL or plugin = functions: should not work
        ("url", True, False),
        ("plugin", True, False),
    ),
)
def test_tools_in_templates(
    source, expected_tool_success, expected_functions_success, httpx_mock, tmpdir
):
    template_yaml = textwrap.dedent("""
    name: test
    tools:
    - llm_version
    - Greeting("hi")
    functions: |
      def demo():
          return "Demo"
    """)
    args = []

    def before():
        pass

    def after():
        pass

    if source == "alias":
        args = ["-t", "test"]
        (user_dir() / "templates").mkdir(parents=True, exist_ok=True)
        (user_dir() / "templates" / "test.yaml").write_text(template_yaml, "utf-8")
    elif source == "file":
        (tmpdir / "test.yaml").write_text(template_yaml, "utf-8")
        args = ["-t", str(tmpdir / "test.yaml")]
    elif source == "url":
        httpx_mock.add_response(
            url="https://example.com/test.yaml",
            method="GET",
            text=template_yaml,
            status_code=200,
            is_reusable=True,
        )
        args = ["-t", "https://example.com/test.yaml"]
    elif source == "plugin":

        class LoadTemplatePlugin:
            __name__ = "LoadTemplatePlugin"

            @hookimpl
            def register_template_loaders(self, register):
                register(
                    "tool-template",
                    lambda s: Template(
                        name="tool-template",
                        tools=["llm_version", 'Greeting("hi")'],
                        functions=FUNCTIONS_EXAMPLE,
                    ),
                )

        def before():
            pm.register(LoadTemplatePlugin(), name="test-tools-in-templates")

        def after():
            pm.unregister(name="test-tools-in-templates")

        args = ["-t", "tool-template:"]

    before()
    pm.register(GreetingsPlugin(), name="greetings-plugin")
    try:
        runner = CliRunner()
        # Test llm_version, then Greeting, then demo
        for tool_call, text, should_be_present in (
            ({"name": "llm_version"}, version("llm"), True),
            (
                {"name": "Greeting_greet", "arguments": {"name": "Alice"}},
                "hi, Alice",
                expected_tool_success,
            ),
            (
                {"name": "Greeting_greet", "arguments": {"name": "Bob"}},
                "hi, Bob!",
                expected_tool_success,
            ),
            ({"name": "demo"}, '"output": "Demo"', expected_functions_success),
        ):
            result = runner.invoke(
                cli,
                args
                + [
                    "-m",
                    "echo",
                    "--no-stream",
                    json.dumps({"tool_calls": [tool_call]}),
                ],
                catch_exceptions=False,
            )
            assert result.exit_code == 0
            if should_be_present:
                assert text in result.output
            else:
                assert text not in result.output
    finally:
        after()
        pm.unregister(name="greetings-plugin")


================================================
FILE: tests/test_tools.py
================================================
import asyncio
from click.testing import CliRunner
from importlib.metadata import version
import json
import llm
from llm import cli, CancelToolCall
from llm.migrations import migrate
from llm.tools import llm_time
import os
import pytest
import sqlite_utils
import time

API_KEY = os.environ.get("PYTEST_OPENAI_API_KEY", None) or "badkey"


@pytest.mark.vcr
def test_tool_use_basic(vcr):
    model = llm.get_model("gpt-4o-mini")

    def multiply(a: int, b: int) -> int:
        """Multiply two numbers."""
        return a * b

    chain_response = model.chain("What is 1231 * 2331?", tools=[multiply], key=API_KEY)

    output = "".join(chain_response)

    assert output == "The result of \\( 1231 \\times 2331 \\) is \\( 2,869,461 \\)."

    first, second = chain_response._responses

    assert first.prompt.prompt == "What is 1231 * 2331?"
    assert first.prompt.tools[0].name == "multiply"

    assert len(second.prompt.tool_results) == 1
    assert second.prompt.tool_results[0].name == "multiply"
    assert second.prompt.tool_results[0].output == "2869461"

    # Test writing to the database
    db = sqlite_utils.Database(memory=True)
    migrate(db)
    chain_response.log_to_db(db)
    assert set(db.table_names()).issuperset(
        {"tools", "tool_responses", "tool_calls", "tool_results"}
    )

    responses = list(db["responses"].rows)
    assert len(responses) == 2
    first_response, second_response = responses

    tools = list(db["tools"].rows)
    assert len(tools) == 1
    assert tools[0]["name"] == "multiply"
    assert tools[0]["description"] == "Multiply two numbers."
    assert tools[0]["plugin"] is None

    tool_results = list(db["tool_results"].rows)
    tool_calls = list(db["tool_calls"].rows)

    assert len(tool_calls) == 1
    assert tool_calls[0]["response_id"] == first_response["id"]
    assert tool_calls[0]["name"] == "multiply"
    assert tool_calls[0]["arguments"] == '{"a": 1231, "b": 2331}'

    assert len(tool_results) == 1
    assert tool_results[0]["response_id"] == second_response["id"]
    assert tool_results[0]["output"] == "2869461"
    assert tool_results[0]["tool_call_id"] == tool_calls[0]["tool_call_id"]


@pytest.mark.vcr
def test_tool_use_chain_of_two_calls(vcr):
    model = llm.get_model("gpt-4o-mini")

    def lookup_population(country: str) -> int:
        "Returns the current population of the specified fictional country"
        return 123124

    def can_have_dragons(population: int) -> bool:
        "Returns True if the specified population can have dragons, False otherwise"
        return population > 10000

    chain_response = model.chain(
        "Can the country of Crumpet have dragons? Answer with only YES or NO",
        tools=[lookup_population, can_have_dragons],
        stream=False,
        key=API_KEY,
    )

    output = chain_response.text()
    assert output == "YES"
    assert len(chain_response._responses) == 3

    first, second, third = chain_response._responses
    assert first.tool_calls()[0].arguments == {"country": "Crumpet"}
    assert first.prompt.tool_results == []
    assert second.prompt.tool_results[0].output == "123124"
    assert second.tool_calls()[0].arguments == {"population": 123124}
    assert third.prompt.tool_results[0].output == "true"
    assert third.tool_calls() == []


def test_tool_use_async_tool_function():
    async def hello():
        return "world"

    model = llm.get_model("echo")
    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "hello"}]}), tools=[hello]
    )
    output = chain_response.text()
    # That's two JSON objects separated by '\n}{\n'
    bits = output.split("\n}{\n")
    assert len(bits) == 2
    objects = [json.loads(bits[0] + "}"), json.loads("{" + bits[1])]
    assert objects == [
        {"prompt": "", "system": "", "attachments": [], "stream": True, "previous": []},
        {
            "prompt": "",
            "system": "",
            "attachments": [],
            "stream": True,
            "previous": [{"prompt": '{"tool_calls": [{"name": "hello"}]}'}],
            "tool_results": [
                {"name": "hello", "output": "world", "tool_call_id": None}
            ],
        },
    ]


@pytest.mark.asyncio
async def test_async_tools_run_tools_in_parallel():
    start_timestamps = []

    start_ns = time.monotonic_ns()

    async def hello():
        start_timestamps.append(("hello", time.monotonic_ns() - start_ns))
        await asyncio.sleep(0.2)
        return "world"

    async def hello2():
        start_timestamps.append(("hello2", time.monotonic_ns() - start_ns))
        await asyncio.sleep(0.2)
        return "world2"

    model = llm.get_async_model("echo")
    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "hello"}, {"name": "hello2"}]}),
        tools=[hello, hello2],
    )
    output = await chain_response.text()
    # That's two JSON objects separated by '\n}{\n'
    bits = output.split("\n}{\n")
    assert len(bits) == 2
    objects = [json.loads(bits[0] + "}"), json.loads("{" + bits[1])]
    assert objects == [
        {"prompt": "", "system": "", "attachments": [], "stream": True, "previous": []},
        {
            "prompt": "",
            "system": "",
            "attachments": [],
            "stream": True,
            "previous": [
                {"prompt": '{"tool_calls": [{"name": "hello"}, {"name": "hello2"}]}'}
            ],
            "tool_results": [
                {"name": "hello", "output": "world", "tool_call_id": None},
                {"name": "hello2", "output": "world2", "tool_call_id": None},
            ],
        },
    ]
    delta_ns = start_timestamps[1][1] - start_timestamps[0][1]
    # They should have run in parallel so it should be less than 0.02s difference
    assert delta_ns < (100_000_000 * 0.2)


@pytest.mark.asyncio
async def test_async_toolbox():
    class Tools(llm.Toolbox):
        def __init__(self):
            self.prepared = False

        async def go(self):
            await asyncio.sleep(0)
            return "This was async"

        async def prepare_async(self):
            await asyncio.sleep(0)
            self.prepared = True

    instance = Tools()
    assert instance.prepared is False

    model = llm.get_async_model("echo")
    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "Tools_go"}]}),
        tools=[instance],
    )
    output = await chain_response.text()
    assert '"output": "This was async"' in output
    assert instance.prepared is True


def test_toolbox_add_tool():
    model = llm.get_model("echo")

    class Tools(llm.Toolbox):
        def __init__(self):
            self.prepared = False

        def original(self):
            return "Original method"

        def prepare(self):
            self.prepared = True

    def new_method():
        return "New method"

    tools = Tools()
    tools.add_tool(new_method)
    assert not tools.prepared

    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "new_method"}]}),
        tools=[tools],
    )
    output = chain_response.text()
    assert '"output": "New method"' in output
    assert tools.prepared


def test_toolbox_add_tool_with_pass_self():
    model = llm.get_model("echo")

    class Tools(llm.Toolbox):
        def __init__(self, hotdog):
            self.hotdog = hotdog

        def original(self):
            return "Original method"

    def new_method(self):
        return self.hotdog

    tools = Tools("doghot")
    tools.add_tool(new_method, pass_self=True)

    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "new_method"}]}),
        tools=[tools],
    )
    output = chain_response.text()
    assert '"output": "doghot"' in output


@pytest.mark.vcr
def test_conversation_with_tools(vcr):
    import llm

    def add(a: int, b: int) -> int:
        return a + b

    def multiply(a: int, b: int) -> int:
        return a * b

    model = llm.get_model("echo")
    conversation = model.conversation(tools=[add, multiply])

    output1 = conversation.chain(
        json.dumps(
            {"tool_calls": [{"name": "multiply", "arguments": {"a": 5324, "b": 23233}}]}
        )
    ).text()
    assert "123692492" in output1
    output2 = conversation.chain(
        json.dumps(
            {
                "tool_calls": [
                    {"name": "add", "arguments": {"a": 841758375, "b": 123123}}
                ]
            }
        )
    ).text()
    assert "841881498" in output2


def test_default_tool_llm_version():
    runner = CliRunner()
    result = runner.invoke(
        cli.cli,
        [
            "-m",
            "echo",
            "-T",
            "llm_version",
            json.dumps({"tool_calls": [{"name": "llm_version"}]}),
        ],
    )
    assert result.exit_code == 0
    assert '"output": "{}"'.format(version("llm")) in result.output


def test_cli_tools_with_options():
    runner = CliRunner()
    result = runner.invoke(
        cli.cli,
        [
            "-m",
            "mock",
            "-o",
            "max_tokens",
            "10",
            "-T",
            "llm_version",
            json.dumps({"tool_calls": [{"name": "llm_version"}]}),
        ],
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    # It just needs not to crash
    # https://github.com/simonw/llm/issues/1233


def test_functions_tool_locals():
    # https://github.com/simonw/llm/issues/1107
    runner = CliRunner()
    result = runner.invoke(
        cli.cli,
        [
            "-m",
            "echo",
            "--functions",
            "my_locals = locals",
            "-T",
            "llm_version",
            json.dumps({"tool_calls": [{"name": "locals"}]}),
        ],
    )
    assert result.exit_code == 0


def test_default_tool_llm_time():
    runner = CliRunner()
    result = runner.invoke(
        cli.cli,
        [
            "-m",
            "echo",
            "-T",
            "llm_time",
            json.dumps({"tool_calls": [{"name": "llm_time"}]}),
        ],
    )
    assert result.exit_code == 0
    assert "timezone_offset" in result.output

    # Test it by calling it directly
    info = llm_time()
    assert set(info.keys()) == {
        "timezone_offset",
        "utc_time_iso",
        "local_time",
        "local_timezone",
        "utc_time",
        "is_dst",
    }


def test_incorrect_tool_usage():
    model = llm.get_model("echo")

    def simple(name: str):
        return name

    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "bad_tool"}]}),
        tools=[simple],
    )
    output = chain_response.text()
    assert 'Error: tool \\"bad_tool\\" does not exist' in output


def test_tool_returning_attachment():
    model = llm.get_model("echo")

    def return_attachment() -> llm.Attachment:
        return llm.ToolOutput(
            "Output",
            attachments=[
                llm.Attachment(
                    content=b"This is a test attachment",
                    type="image/png",
                )
            ],
        )

    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "return_attachment"}]}),
        tools=[return_attachment],
    )
    output = chain_response.text()
    assert '"type": "image/png"' in output
    assert '"output": "Output"' in output


@pytest.mark.asyncio
async def test_async_tool_returning_attachment():
    model = llm.get_async_model("echo")

    async def return_attachment() -> llm.Attachment:
        return llm.ToolOutput(
            "Output",
            attachments=[
                llm.Attachment(
                    content=b"This is a test attachment",
                    type="image/png",
                )
            ],
        )

    chain_response = model.chain(
        json.dumps({"tool_calls": [{"name": "return_attachment"}]}),
        tools=[return_attachment],
    )
    output = await chain_response.text()
    assert '"type": "image/png"' in output
    assert '"output": "Output"' in output


def test_tool_conversation_settings():
    model = llm.get_model("echo")
    before_collected = []
    after_collected = []

    def before(*args):
        before_collected.append(args)

    def after(*args):
        after_collected.append(args)

    conversation = model.conversation(
        tools=[llm_time], before_call=before, after_call=after
    )
    # Run two things
    conversation.chain(json.dumps({"tool_calls": [{"name": "llm_time"}]})).text()
    conversation.chain(json.dumps({"tool_calls": [{"name": "llm_time"}]})).text()
    assert len(before_collected) == 2
    assert len(after_collected) == 2


@pytest.mark.asyncio
async def test_tool_conversation_settings_async():
    model = llm.get_async_model("echo")
    before_collected = []
    after_collected = []

    async def before(*args):
        before_collected.append(args)

    async def after(*args):
        after_collected.append(args)

    conversation = model.conversation(
        tools=[llm_time], before_call=before, after_call=after
    )
    await conversation.chain(json.dumps({"tool_calls": [{"name": "llm_time"}]})).text()
    await conversation.chain(json.dumps({"tool_calls": [{"name": "llm_time"}]})).text()
    assert len(before_collected) == 2
    assert len(after_collected) == 2


ERROR_FUNCTION = """
def trigger_error(msg: str):
    raise Exception(msg)
"""


@pytest.mark.parametrize("async_", (False, True))
def test_tool_errors(async_):
    # https://github.com/simonw/llm/issues/1107
    runner = CliRunner()
    result = runner.invoke(
        cli.cli,
        (
            [
                "-m",
                "echo",
                "--functions",
                ERROR_FUNCTION,
                json.dumps(
                    {
                        "tool_calls": [
                            {"name": "trigger_error", "arguments": {"msg": "Error!"}}
                        ]
                    }
                ),
            ]
            + (["--async"] if async_ else [])
        ),
    )
    assert result.exit_code == 0
    assert '"output": "Error: Error!"' in result.output
    # llm logs --json output
    log_json_result = runner.invoke(cli.cli, ["logs", "--json", "-c"])
    assert log_json_result.exit_code == 0
    log_data = json.loads(log_json_result.output)
    assert len(log_data) == 2
    assert log_data[1]["tool_results"][0]["exception"] == "Exception: Error!"
    # llm logs -c output
    log_text_result = runner.invoke(cli.cli, ["logs", "-c"])
    assert log_text_result.exit_code == 0
    assert (
        "- **trigger_error**: `None`<br>\n"
        "    Error: Error!<br>\n"
        "    **Error**: Exception: Error!\n"
    ) in log_text_result.output


def test_chain_sync_cancel_only_first_of_two():
    model = llm.get_model("echo")

    def t1() -> str:
        return "ran1"

    def t2() -> str:
        return "ran2"

    def before(tool, tool_call):
        if tool.name == "t1":
            raise CancelToolCall("skip1")
        # allow t2
        return None

    calls = [
        {"name": "t1"},
        {"name": "t2"},
    ]
    payload = json.dumps({"tool_calls": calls})
    chain = model.chain(payload, tools=[t1, t2], before_call=before)
    _ = chain.text()

    # second response has two results
    second = chain._responses[1]
    results = second.prompt.tool_results
    assert len(results) == 2

    # first cancelled, second executed
    assert results[0].name == "t1"
    assert results[0].output == "Cancelled: skip1"
    assert isinstance(results[0].exception, CancelToolCall)

    assert results[1].name == "t2"
    assert results[1].output == "ran2"
    assert results[1].exception is None


# 2c async equivalent
@pytest.mark.asyncio
async def test_chain_async_cancel_only_first_of_two():
    async_model = llm.get_async_model("echo")

    def t1() -> str:
        return "ran1"

    async def t2() -> str:
        return "ran2"

    async def before(tool, tool_call):
        if tool.name == "t1":
            raise CancelToolCall("skip1")
        return None

    calls = [
        {"name": "t1"},
        {"name": "t2"},
    ]
    payload = json.dumps({"tool_calls": calls})
    chain = async_model.chain(payload, tools=[t1, t2], before_call=before)
    _ = await chain.text()

    second = chain._responses[1]
    results = second.prompt.tool_results
    assert len(results) == 2

    assert results[0].name == "t1"
    assert results[0].output == "Cancelled: skip1"
    assert isinstance(results[0].exception, CancelToolCall)

    assert results[1].name == "t2"
    assert results[1].output == "ran2"
    assert results[1].exception is None


================================================
FILE: tests/test_tools_streaming.py
================================================
import llm
from llm.tools import llm_version
import os
import pytest

API_KEY = os.environ.get("PYTEST_OPENAI_API_KEY", None) or "badkey"


# This response contains streaming variant "a" where arguments="" is followed by arguments="{}"
@pytest.mark.vcr(record_mode="none")
def test_tools_streaming_variant_a():
    model = llm.get_model("gpt-4.1-mini")
    chain = model.chain(
        "What is the current llm version?", tools=[llm_version], key=API_KEY
    )
    assert "".join(chain) == "The current version of *llm* is **0.fixed-version**."


# This response contains streaming variant "b" where arguments="{}" is the first partial stream received.
@pytest.mark.vcr(record_mode="none")
def test_tools_streaming_variant_b():
    model = llm.get_model("gpt-4.1-mini")
    chain = model.chain(
        "What is the current llm version?", tools=[llm_version], key=API_KEY
    )
    assert "".join(chain) == "The current version of *llm* is **0.fixed-version**."


# This response contains streaming variant "c".
@pytest.mark.vcr(record_mode="none")
def test_tools_streaming_variant_c():
    model = llm.get_model("gpt-4.1-mini")
    chain = model.chain(
        "What is the current llm version?", tools=[llm_version], key=API_KEY
    )
    assert (
        "".join(chain)
        == "The installed version of LLM on this system is 0.fixed-version."
    )


================================================
FILE: tests/test_utils.py
================================================
import json
import pytest
from llm.utils import (
    extract_fenced_code_block,
    instantiate_from_spec,
    maybe_fenced_code,
    schema_dsl,
    simplify_usage_dict,
    truncate_string,
    monotonic_ulid,
)
from llm import get_key, Toolbox


@pytest.mark.parametrize(
    "input_data,expected_output",
    [
        (
            {
                "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
                "completion_tokens_details": {
                    "reasoning_tokens": 0,
                    "audio_tokens": 1,
                    "accepted_prediction_tokens": 0,
                    "rejected_prediction_tokens": 0,
                },
            },
            {"completion_tokens_details": {"audio_tokens": 1}},
        ),
        (
            {
                "details": {"tokens": 5, "audio_tokens": 2},
                "more_details": {"accepted_tokens": 3},
            },
            {
                "details": {"tokens": 5, "audio_tokens": 2},
                "more_details": {"accepted_tokens": 3},
            },
        ),
        ({"details": {"tokens": 0, "audio_tokens": 0}, "more_details": {}}, {}),
        ({"level1": {"level2": {"value": 0, "another_value": {}}}}, {}),
        (
            {
                "level1": {"level2": {"value": 0, "another_value": 1}},
                "level3": {"empty_dict": {}, "valid_token": 10},
            },
            {"level1": {"level2": {"another_value": 1}}, "level3": {"valid_token": 10}},
        ),
    ],
)
def test_simplify_usage_dict(input_data, expected_output):
    # This utility function is used by at least one plugin - llm-openai-plugin
    assert simplify_usage_dict(input_data) == expected_output


@pytest.mark.parametrize(
    "input,last,expected",
    [
        ["This is a sample text without any code blocks.", False, None],
        [
            "Here is some text.\n\n```\ndef foo():\n    return 'bar'\n```\n\nMore text.",
            False,
            "def foo():\n    return 'bar'\n",
        ],
        [
            "Here is some text.\n\n```python\ndef foo():\n    return 'bar'\n```\n\nMore text.",
            False,
            "def foo():\n    return 'bar'\n",
        ],
        [
            "Here is some text.\n\n````\ndef foo():\n    return 'bar'\n````\n\nMore text.",
            False,
            "def foo():\n    return 'bar'\n",
        ],
        [
            "Here is some text.\n\n````javascript\nfunction foo() {\n    return 'bar';\n}\n````\n\nMore text.",
            False,
            "function foo() {\n    return 'bar';\n}\n",
        ],
        [
            "Here is some text.\n\n```python\ndef foo():\n    return 'bar'\n````\n\nMore text.",
            False,
            None,
        ],
        [
            "First code block:\n\n```python\ndef foo():\n    return 'bar'\n```\n\n"
            "Second code block:\n\n```javascript\nfunction foo() {\n    return 'bar';\n}\n```",
            False,
            "def foo():\n    return 'bar'\n",
        ],
        [
            "First code block:\n\n```python\ndef foo():\n    return 'bar'\n```\n\n"
            "Second code block:\n\n```javascript\nfunction foo() {\n    return 'bar';\n}\n```",
            True,
            "function foo() {\n    return 'bar';\n}\n",
        ],
        [
            "First code block:\n\n```python\ndef foo():\n    return 'bar'\n```\n\n"
            # This one has trailing whitespace after the second code block:
            # https://github.com/simonw/llm/pull/718#issuecomment-2613177036
            "Second code block:\n\n```javascript\nfunction foo() {\n    return 'bar';\n}\n``` ",
            True,
            "function foo() {\n    return 'bar';\n}\n",
        ],
        [
            "Here is some text.\n\n```python\ndef foo():\n    return `bar`\n```\n\nMore text.",
            False,
            "def foo():\n    return `bar`\n",
        ],
    ],
)
def test_extract_fenced_code_block(input, last, expected):
    actual = extract_fenced_code_block(input, last=last)
    assert actual == expected


@pytest.mark.parametrize(
    "schema, expected",
    [
        # Test case 1: Basic comma-separated fields, default string type
        (
            "name, bio",
            {
                "type": "object",
                "properties": {"name": {"type": "string"}, "bio": {"type": "string"}},
                "required": ["name", "bio"],
            },
        ),
        # Test case 2: Comma-separated fields with types
        (
            "name, age int, balance float, active bool",
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "age": {"type": "integer"},
                    "balance": {"type": "number"},
                    "active": {"type": "boolean"},
                },
                "required": ["name", "age", "balance", "active"],
            },
        ),
        # Test case 3: Comma-separated fields with descriptions
        (
            "name: full name, age int: years old",
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "full name"},
                    "age": {"type": "integer", "description": "years old"},
                },
                "required": ["name", "age"],
            },
        ),
        # Test case 4: Newline-separated fields
        (
            """
        name
        bio
        age int
        """,
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "bio": {"type": "string"},
                    "age": {"type": "integer"},
                },
                "required": ["name", "bio", "age"],
            },
        ),
        # Test case 5: Newline-separated with descriptions containing commas
        (
            """
        name: the person's name
        age int: their age in years, must be positive
        bio: a short bio, no more than three sentences
        """,
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "the person's name"},
                    "age": {
                        "type": "integer",
                        "description": "their age in years, must be positive",
                    },
                    "bio": {
                        "type": "string",
                        "description": "a short bio, no more than three sentences",
                    },
                },
                "required": ["name", "age", "bio"],
            },
        ),
        # Test case 6: Empty schema
        ("", {"type": "object", "properties": {}, "required": []}),
        # Test case 7: Explicit string type
        (
            "name str, description str",
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "description": {"type": "string"},
                },
                "required": ["name", "description"],
            },
        ),
        # Test case 8: Extra whitespace
        (
            "  name  ,  age   int  :  person's age  ",
            {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "age": {"type": "integer", "description": "person's age"},
                },
                "required": ["name", "age"],
            },
        ),
    ],
)
def test_schema_dsl(schema, expected):
    result = schema_dsl(schema)
    assert result == expected


def test_schema_dsl_multi():
    result = schema_dsl("name, age int: The age", multi=True)
    assert result == {
        "type": "object",
        "properties": {
            "items": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "name": {"type": "string"},
                        "age": {"type": "integer", "description": "The age"},
                    },
                    "required": ["name", "age"],
                },
            }
        },
        "required": ["items"],
    }


@pytest.mark.parametrize(
    "text, max_length, normalize_whitespace, keep_end, expected",
    [
        # Basic truncation tests
        ("Hello, world!", 100, False, False, "Hello, world!"),
        ("Hello, world!", 5, False, False, "He..."),
        ("", 10, False, False, ""),
        (None, 10, False, False, None),
        # Normalize whitespace tests
        ("Hello   world!", 100, True, False, "Hello world!"),
        ("Hello \n\t world!", 100, True, False, "Hello world!"),
        ("Hello   world!", 5, True, False, "He..."),
        # Keep end tests
        ("Hello, world!", 10, False, True, "He... d!"),
        ("Hello, world!", 7, False, False, "Hell..."),  # Now using regular truncation
        ("1234567890", 7, False, False, "1234..."),  # Now using regular truncation
        # Combinations of parameters
        ("Hello   world!", 10, True, True, "He... d!"),
        # Note: After normalization, "Hello world!" is exactly 12 chars, so no truncation
        ("Hello \n\t world!", 12, True, True, "Hello world!"),
        # Edge cases
        ("12345", 5, False, False, "12345"),
        ("123456", 5, False, False, "12..."),
        ("12345", 5, False, True, "12345"),  # Unchanged for exact fit
        ("123456", 5, False, False, "12..."),  # Regular truncation for small max_length
        # Very long string
        ("A" * 200, 10, False, False, "AAAAAAA..."),
        ("A" * 200, 10, False, True, "AA... AA"),  # keep_end with adequate length
        # Exact boundary cases
        ("123456789", 9, False, False, "123456789"),  # Exact fit
        ("1234567890", 9, False, False, "123456..."),  # Simple truncation
        ("123456789", 9, False, True, "123456789"),  # Exact fit with keep_end
        ("1234567890", 9, False, True, "12... 90"),  # keep_end truncation
        # Minimum sensible length tests for keep_end
        (
            "1234567890",
            8,
            False,
            True,
            "12345...",
        ),  # Too small for keep_end, use regular
        ("1234567890", 9, False, True, "12... 90"),  # Just enough for keep_end
    ],
)
def test_truncate_string(text, max_length, normalize_whitespace, keep_end, expected):
    """Test the truncate_string function with various inputs and parameters."""
    result = truncate_string(
        text=text,
        max_length=max_length,
        normalize_whitespace=normalize_whitespace,
        keep_end=keep_end,
    )
    assert result == expected


@pytest.mark.parametrize(
    "text, max_length, keep_end, prefix_len, expected_full",
    [
        # Test cases when the length is just right (string fits)
        ("0123456789", 10, True, None, "0123456789"),
        # Test cases with enough room for the ellipsis
        ("012345678901234", 14, True, 4, "0123... 1234"),
        # Test cases with different cutoffs
        ("abcdefghijklmnopqrstuvwxyz", 10, True, 2, "ab... yz"),
        ("abcdefghijklmnopqrstuvwxyz", 12, True, 3, "abc... xyz"),
        # Test cases below minimum threshold
        ("abcdefghijklmnopqrstuvwxyz", 8, True, None, "abcde..."),
    ],
)
def test_test_truncate_string_keep_end(
    text, max_length, keep_end, prefix_len, expected_full
):
    """Test the specific behavior of the keep_end parameter."""
    result = truncate_string(
        text=text,
        max_length=max_length,
        keep_end=keep_end,
    )

    assert result == expected_full

    # Only check prefix/suffix when we expect truncation with keep_end
    if prefix_len is not None and len(text) > max_length and max_length >= 9:
        assert result[:prefix_len] == text[:prefix_len]
        assert result[-prefix_len:] == text[-prefix_len:]
        assert "... " in result


@pytest.mark.parametrize(
    "content,expected_fenced",
    [
        # Case 1: Contains many angle brackets (>10)
        (
            "<div><p>Test</p><span>Test</span><a>Test</a><b>Test</b><i>Test</i><u>Test</u>",
            True,
        ),
        # Case 2: Short content with few angle brackets
        ("<p>Just a paragraph</p>", False),
        # Case 3: Many short lines (>3 lines, 90% under 120 chars)
        ("line1\nline2\nline3\nline4\nline5", True),
        # Case 4: Many long lines (>3 lines, <90% under 120 chars)
        ("x" * 130 + "\n" + "x" * 130 + "\n" + "x" * 130 + "\n" + "x" * 50, False),
        # Case 5: Mixed case (many angle brackets and short lines)
        ("<div>\n<p>Line 1</p>\n<p>Line 2</p>\n<p>Line 3</p>\n</div>", True),
        # Case 6: Mixed case with few lines
        ("<div><p>Only two</p></div>", False),
        # Case 7: Empty string
        ("", False),
        # Case 8: Content with existing backticks (should use more backticks)
        ("```\ndef test():\n    pass\n```", True),
    ],
)
def test_maybe_fenced_code(content: str, expected_fenced: bool):
    result = maybe_fenced_code(content)

    if expected_fenced:
        # Should be wrapped in fenced code block
        assert result != content
        assert result.strip().startswith("```")
        assert result.strip().endswith("```")
        assert content.strip() in result
    else:
        # Should remain unchanged
        assert result == content


@pytest.mark.parametrize(
    "content,backtick_count",
    [
        # Content with no backticks should use 3 backticks
        ("def test():\n    pass", 3),
        # Content with 3 backticks should use 4 backticks
        ("```\ndef test():\n    pass\n```", 4),
        # Content with 4 backticks should use 5 backticks
        ("````\ndef test():\n    pass\n````", 5),
    ],
)
def test_backtick_count_adjustment(content: str, backtick_count: int):
    # Force the content to be treated as code by adding many angle brackets
    content_with_brackets = content + "<" * 11

    result = maybe_fenced_code(content_with_brackets)

    # Check if the correct number of backticks is used
    expected_start = "\n" + "`" * backtick_count + "\n"
    expected_end = "\n" + "`" * backtick_count

    assert result.startswith(expected_start)
    assert result.endswith(expected_end)


class Files:
    def __init__(self, dir="."):
        self.dir = dir


class ValueFlag:
    def __init__(self, value=None, flag=False):
        self.value = value
        self.flag = flag


@pytest.mark.parametrize(
    "spec, expected_cls, expected_attrs",
    [
        ("Files", Files, {"dir": "."}),
        ("Files()", Files, {"dir": "."}),
        ('Files("tmp")', Files, {"dir": "tmp"}),
        ('Files({"dir": "/tmp"})', Files, {"dir": "/tmp"}),
        ('Files(dir="/data")', Files, {"dir": "/data"}),
        (
            'ValueFlag({"value": 123, "flag": true})',
            ValueFlag,
            {"value": 123, "flag": True},
        ),
        ("ValueFlag(flag=true)", ValueFlag, {"flag": True}),
        ("ValueFlag(value=123, flag=false)", ValueFlag, {"value": 123, "flag": False}),
    ],
)
def test_instantiate_valid(spec, expected_cls, expected_attrs):
    obj = instantiate_from_spec({"Files": Files, "ValueFlag": ValueFlag}, spec)
    assert isinstance(obj, expected_cls)
    for key, val in expected_attrs.items():
        assert getattr(obj, key) == val


@pytest.mark.parametrize(
    "spec",
    [
        'Files({"dir":})',
        "Files(",
        "Files(dir=)",
        'Files({"dir": [})',
        "Files(.)",
        "Files(this is invalid)",
        "ValueFlag(value=123, flag=falseTypo)",
    ],
)
def test_instantiate_invalid(spec):
    with pytest.raises(ValueError):
        instantiate_from_spec({"Files": Files, "ValueFlag": ValueFlag}, spec)


def test_get_key(user_path, monkeypatch):
    monkeypatch.setenv("ENV", "from-env")
    (user_path / "keys.json").write_text(json.dumps({"testkey": "TEST"}), "utf-8")
    assert get_key(alias="testkey") == "TEST"
    assert get_key(input="testkey") == "TEST"
    assert get_key(alias="missing", env="ENV") == "from-env"
    assert get_key(alias="missing") is None
    # found key should over-ride env
    assert get_key(input="testkey", env="ENV") == "TEST"
    # explicit key should over-ride alias
    assert get_key(input="explicit", alias="testkey") == "explicit"
    assert get_key(input="explicit", alias="testkey", env="ENV") == "explicit"


def test_monotonic_ulids():
    ulids = [monotonic_ulid() for i in range(1000)]
    assert ulids == sorted(ulids)


def test_toolbox_config_capture():
    """Test that Toolbox captures __init__ parameters in _config"""

    # Single positional arg
    class Tool1(Toolbox):
        def __init__(self, value):
            pass

    assert Tool1(42)._config == {"value": 42}

    # Multiple positional args
    class Tool2(Toolbox):
        def __init__(self, a, b, c):
            pass

    assert Tool2(1, 2, 3)._config == {"a": 1, "b": 2, "c": 3}

    # Keyword args with defaults
    class Tool3(Toolbox):
        def __init__(self, name="default", count=10):
            pass

    assert Tool3()._config == {"name": "default", "count": 10}
    assert Tool3(name="custom", count=20)._config == {"name": "custom", "count": 20}

    # Mixed args
    class Tool4(Toolbox):
        def __init__(self, required, optional="default"):
            pass

    assert Tool4("hello")._config == {"required": "hello", "optional": "default"}
    assert Tool4("world", optional="custom")._config == {
        "required": "world",
        "optional": "custom",
    }

    # Var args excluded
    class Tool5(Toolbox):
        def __init__(self, regular, *args, **kwargs):
            pass

    assert Tool5("test", 1, 2, extra="value")._config == {"regular": "test"}

    # No init
    class Tool6(Toolbox):
        pass

    assert Tool6()._config == {}