[
  {
    "path": ".devcontainer/devcontainer.json",
    "content": "{\n  \"name\": \"dottxt-ai\",\n  \"image\": \"mcr.microsoft.com/devcontainers/python:3.12\",\n  \"runArgs\": [\n    \"--device=nvidia.com/gpu=all\"\n  ],\n  \"hostRequirements\": {\n    \"gpu\": \"optional\"\n  },\n  \"features\": {\n    \"ghcr.io/devcontainers/features/conda:1\": {},\n    \"ghcr.io/devcontainers/features/nvidia-cuda:1\": {\n      \"installCudnn\": true,\n      \"installToolkit\": true,\n      \"cudaVersion\": \"12.4\"\n    },\n    \"ghcr.io/devcontainers/features/rust:1\": {}\n  }\n}\n"
  },
  {
    "path": ".editorconfig",
    "content": "# EditorConfig is awesome: https://EditorConfig.org\n\n# top-most EditorConfig file\nroot = true\n\n[*]\nindent_style = space\nindent_size = 4\nend_of_line = lf\ncharset = utf-8\ntrim_trailing_whitespace = true\ninsert_final_newline = true\n\n[*.yaml]\nindent_size = 2\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.yml",
    "content": "# Issue template inspired by NumPy's excellent template:\n# https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLATE/bug-report.yml\nname: 🐞 Bug report\ndescription: Create a bug report to help us reproduce and fix it.\ntitle: \"<Please write a descriptive title>\"\nlabels: [\"bug\"]\n\nbody:\n  - type: markdown\n    attributes:\n      value: >-\n        Thank you for taking the time to file a bug report. First, carefully read\n        the following before everything else:\n\n          - Does your issue only arise in a library that uses Outlines? If so,\n            submit your issue to this library's issue tracker.\n          - Did you check the issue tracker for open and closed issues that may be\n            related to your bug?\n\n  - type: textarea\n    attributes:\n      label: \"Describe the issue as clearly as possible:\"\n    validations:\n      required: true\n\n  - type: textarea\n    attributes:\n      label: \"Steps/code to reproduce the bug:\"\n      description: >\n        A short code example that reproduces the problem/missing feature. It\n        should be self-contained, i.e., can be copy-pasted into the Python\n        interpreter or run as-is via `python myproblem.py`.\n      placeholder: |\n        import outlines\n\n        << your code here >>\n      render: python\n    validations:\n      required: true\n\n  - type: textarea\n    attributes:\n      label: \"Expected result:\"\n      description: >\n        Please describe what you expect the above example to output.\n      placeholder: |\n        << the expected result here >>\n      render: shell\n    validations:\n      required: true\n\n  - type: textarea\n    attributes:\n      label: \"Error message:\"\n      description: >\n        Please include the full error message, if any.\n      placeholder: |\n        << Full traceback starting from `Traceback: ...` >>\n      render: shell\n\n  - type: textarea\n    attributes:\n      label: \"Outlines/Python version information:\"\n      description: |\n          Please run the following code and paste the output here.\n          python -c \"from outlines import _version; print(_version.__version__)\";\n          python -c \"import sys; print('Python', sys.version)\";\n          pip freeze;\n      value: |\n          Version information\n          <details>\n          ```\n          (command output here)\n          ```\n          </details>\n    validations:\n      required: true\n\n  - type: textarea\n    attributes:\n      label: \"Context for the issue:\"\n      description: |\n        Please explain how this issue affects your work or why it should be prioritized.\n      placeholder: |\n        << your explanation here >>\n    validations:\n      required: false\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "contact_links:\n  - name: 🤔 Questions & Help\n    url: https://github.com/dottxt-ai/outlines/discussions/new\n    about: \"If you have a question about how to use Outlines, please start a discussion.\"\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE/pull_request_template.md",
    "content": "# 🚧 Thank you for opening a PR!\n\nA few important guidelines and requirements before we can merge your PR:\n\n- [ ] We should be able to understand what the PR does from its title only;\n- [ ] There is a high-level description of the changes;\n- [ ] *If I add a new feature*, there is an [issue][issues] discussing it already;\n- [ ] There are links to *all* the relevant issues, discussions and PRs;\n- [ ] The branch is rebased on the latest `main` commit;\n- [ ] **Commit messages** follow these [guidelines][git-guidelines];\n- [ ] One commit per logical change;\n- [ ] The code respects the current **naming conventions**;\n- [ ] Docstrings follow the [numpy style guide][docstring-guidelines];\n- [ ] `pre-commit` is installed and configured on your machine, and you ran it before opening the PR;\n- [ ] There are tests covering the changes;\n- [ ] The documentation is up-to-date;\n\nConsider opening a **Draft PR** if your work is still in progress but you would\nlike some feedback from other contributors.\n\n[issues]: https://github.com/dottxt-ai/outlines/issues\n[git-guidelines]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html\n[docstring-guidelines]: https://numpydoc.readthedocs.io/en/latest/format.html\n"
  },
  {
    "path": ".github/scripts/build_sdist_and_wheel.sh",
    "content": "#!/bin/bash\n\n# Build sdist and wheel\npython -m pip install -U pip\npython -m pip install build\npython -m build\n\n# Check sdist install and imports\nmkdir -p test-sdist\ncd test-sdist\npython -m venv venv-sdist\nvenv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz\nvenv-sdist/bin/python -c \"import outlines\"\ncd ..\n\n# Check wheel install and imports\nmkdir -p test-wheel\ncd test-wheel\npython -m venv venv-wheel\nvenv-wheel/bin/python -m pip install ../dist/outlines-*.whl\nvenv-wheel/bin/python -c \"import outlines\"\ncd ..\n"
  },
  {
    "path": ".github/workflows/build_documentation.yml",
    "content": "name: Build the documentation\n\non:\n  pull_request:\n    types: [opened, synchronize, reopened, closed]\n    branches: [main]\n  workflow_dispatch:\n\npermissions:\n  contents: write\n  pull-requests: write\n\njobs:\n  build:\n    name: Build and Deploy Documentation Preview\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n\n      - uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n\n      - name: Install dependencies\n        if: github.event.action != 'closed'\n        run: pip install -r requirements-doc.txt\n\n      - name: Build the documentation\n        if: github.event.action != 'closed'\n        env:\n          GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}\n          PR_NUMBER: ${{ github.event.pull_request.number }}\n        run: |\n          sed -i \"1i site_url: https://dottxt-ai.github.io/outlines/pr-preview/pr-${PR_NUMBER}/\" mkdocs.yml\n          mkdocs build\n\n      - name: Deploy to PR preview\n        if: github.event_name == 'pull_request'\n        uses: rossjrw/pr-preview-action@v1\n        with:\n          source-dir: site/\n          preview-branch: gh-pages\n          umbrella-dir: pr-preview\n          comment: false\n\n      - name: Comment PR with preview link\n        if: github.event_name == 'pull_request' && github.event.action != 'closed'\n        uses: actions/github-script@v7\n        with:\n          script: |\n            const prNumber = context.issue.number;\n            const previewUrl = `https://dottxt-ai.github.io/outlines/pr-preview/pr-${prNumber}/`;\n\n            // Find existing preview comment\n            const comments = await github.rest.issues.listComments({\n              issue_number: prNumber,\n              owner: context.repo.owner,\n              repo: context.repo.repo,\n            });\n\n            const botComment = comments.data.find(comment =>\n              comment.user.type === 'Bot' &&\n              comment.body.includes('Documentation preview')\n            );\n\n            const commentBody = `📚 **Documentation preview**: ${previewUrl}\\n\\n*Preview updates automatically with each commit.*`;\n\n            // Update existing comment or create new one\n            if (botComment) {\n              await github.rest.issues.updateComment({\n                comment_id: botComment.id,\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                body: commentBody\n              });\n            } else {\n              await github.rest.issues.createComment({\n                issue_number: prNumber,\n                owner: context.repo.owner,\n                repo: context.repo.repo,\n                body: commentBody\n              });\n            }\n"
  },
  {
    "path": ".github/workflows/publish_documentation.yml",
    "content": "name: Publish the documentation\n\non:\n  workflow_dispatch:\n  push:\n    branches:\n      - main\n  release:\n    types:\n      - created\n\npermissions:\n  contents: write\n\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n      - uses: actions/setup-python@v4\n        with:\n          python-version: 3.x\n      - run: echo \"cache_id=$(date --utc '+%V')\" >> $GITHUB_ENV\n      - uses: actions/cache@v3\n        with:\n          key: mkdocs-material-${{ env.cache_id }}\n          path: .cache\n          restore-keys: |\n            mkdocs-material-\n      - run: pip install -r requirements-doc.txt\n      - run: mkdocs build\n\n      - name: Set up Git\n        run: |\n          git config user.name ${{ github.actor }}\n          git config user.email ${{ github.actor }}@users.noreply.github.com\n\n      - name: Publish Tag as latest\n        env:\n          GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}\n        if: github.event_name == 'release'\n        run: |\n          mike deploy --push --update-aliases ${{ github.ref_name }} latest\n          mike set-default --push latest\n\n      - name: Publish main as unstable\n        env:\n          GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}\n        if: github.event_name == 'push'\n        run: |\n          mike deploy --push --update-aliases ${{ github.ref_name }} unstable\n"
  },
  {
    "path": ".github/workflows/release_pypi.yaml",
    "content": "name: Release PyPi\n\non:\n  release:\n    types:\n      - created\njobs:\n  release-job:\n    name: Build and publish on PyPi\n    runs-on: ubuntu-latest\n    steps:\n    - name: Checkout\n      uses: actions/checkout@v2\n    - name: Set up Python\n      uses: actions/setup-python@v2\n      with:\n        python-version: \"3.10\"\n    - name: Build SDist and Wheel\n      run: ./.github/scripts/build_sdist_and_wheel.sh\n    - name: Check that the package version matches the Release name\n      run: |\n        grep -Rq \"^Version: ${GITHUB_REF:10}$\" outlines.egg-info/PKG-INFO\n    - name: Publish to PyPi\n      uses: pypa/gh-action-pypi-publish@v1.4.2\n      with:\n        user: __token__\n        password: ${{ secrets.PYPI_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/tests.yml",
    "content": "name: Tests\n\non:\n  pull_request:\n    branches: [main,v1.0]\n  push:\n    branches: [main]\n\njobs:\n  style:\n    name: Check the code style\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v3\n    - uses: actions/setup-python@v4\n      with:\n        python-version: \"3.13\"\n    - uses: pre-commit/action@v3.0.0\n\n  tests:\n    name: Run the tests\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [\"3.10\", \"3.13\"]\n    steps:\n    - uses: actions/checkout@v3\n    - name: Set up Python ${{ matrix.python-version }}\n      uses: actions/setup-python@v4\n      with:\n        python-version: ${{ matrix.python-version }}\n        cache: 'pip'\n        cache-dependency-path: 'pyproject.toml'\n    - name: Free disk space\n      run: |\n        set -eux\n        sudo rm -rf /usr/share/dotnet || true\n        sudo rm -rf /opt/ghc || true\n        sudo rm -rf /usr/local/lib/android || true\n        sudo apt-get clean\n        df -h\n    - name: Install Ollama\n      run: |\n        curl -fsSL https://ollama.com/install.sh | sh\n        ollama --version\n        ollama pull tinyllama\n    - name: Set up test environment\n      run: |\n        python -m pip install --upgrade pip\n        pip install uv\n        uv sync --no-group test-gpu --extra test\n    - name: cache HuggingFace models\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/huggingface\n        key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }}\n        restore-keys: |\n          hf-${{ runner.os }}-\n    - name: Create matrix id\n      id: matrix-id\n      env:\n        MATRIX_CONTEXT: ${{ toJson(matrix) }}\n      run: |\n        echo $MATRIX_CONTEXT\n        export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32`\n        echo $MATRIX_ID\n        echo \"::set-output name=id::$MATRIX_ID\"\n    - name: Run tests\n      run: |\n        rm -f .coverage*\n        uv run coverage erase\n        uv run python -m coverage run --branch --source=outlines --parallel-mode -m pytest -x -m 'not api_call'\n    - name: Upload coverage data\n      uses: actions/upload-artifact@v4\n      with:\n        name: coverage-data-${{ matrix.python-version }}\n        path: .coverage.*\n        if-no-files-found: ignore\n        include-hidden-files: true\n\n  coverage:\n    name: Combine & check coverage.\n    needs: tests\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v3\n        with:\n          fetch-depth: 0\n\n      - uses: actions/setup-python@v4\n        with:\n          cache: pip\n          python-version: \"3.11\"\n\n      - name: Set up environment\n        run: |\n          pip install --upgrade \"coverage[toml]>=5.1\" diff-cover\n\n      - uses: actions/download-artifact@v4\n        with:\n          pattern: coverage-data-*\n          merge-multiple: true\n\n      - name: Combine coverage & fail if it's <100%.\n        run: |\n          python -m coverage combine\n          python -m coverage html --skip-covered --skip-empty\n          python -m coverage xml\n          python -m coverage report --fail-under=100 || (python -m coverage report && exit 1)\n\n      - name: Upload HTML report if check failed.\n        uses: actions/upload-artifact@v4\n        with:\n          name: html-report\n          path: htmlcov\n          overwrite: true\n        if: ${{ failure() }}\n\n  build-wheel:\n    name: Build Wheel and Test SDist\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v3\n      - name: Build SDist and Wheel\n        run: ./.github/scripts/build_sdist_and_wheel.sh\n"
  },
  {
    "path": ".github/workflows/tests_api_models.yml",
    "content": "name: API Models Tests\n\non:\n  workflow_dispatch:\n\njobs:\n  tests:\n    name: Run API Models Tests\n    runs-on: ubuntu-latest\n    env:\n      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}\n      GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}\n      DOTTXT_API_KEY: ${{ secrets.DOTTXT_API_KEY }}\n    strategy:\n      fail-fast: false\n      matrix:\n        python-version: [\"3.10\"]\n\n    steps:\n    - uses: actions/checkout@v3\n      with:\n        ref: ${{ github.ref }}\n\n    - name: Set up Python ${{ matrix.python-version }}\n      uses: actions/setup-python@v4\n      with:\n        python-version: ${{ matrix.python-version }}\n        cache: 'pip'\n        cache-dependency-path: 'pyproject.toml'\n\n    - name: Free disk space\n      run: |\n        set -eux\n        sudo rm -rf /usr/share/dotnet || true\n        sudo rm -rf /opt/ghc || true\n        sudo rm -rf /usr/local/lib/android || true\n        sudo apt-get clean\n        df -h\n\n    - name: Install Ollama\n      run: |\n        curl -fsSL https://ollama.com/install.sh | sh\n        ollama --version\n        ollama pull tinyllama\n\n    - name: Set up test environment\n      run: |\n        python -m pip install --upgrade pip\n        pip install uv\n        uv sync --no-group test-gpu --extra test\n\n    - name: cache HuggingFace models\n      uses: actions/cache@v4\n      with:\n        path: ~/.cache/huggingface\n        key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }}\n        restore-keys: |\n          hf-${{ runner.os }}-\n\n    - name: Create matrix id\n      id: matrix-id\n      env:\n        MATRIX_CONTEXT: ${{ toJson(matrix) }}\n      run: |\n        echo $MATRIX_CONTEXT\n        export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32`\n        echo $MATRIX_ID\n        echo \"::set-output name=id::$MATRIX_ID\"\n\n    - name: Run tests\n      run: |\n        uv run pytest -m 'api_call' --ignore=tests/models/test_dottxt.py\n      env:\n        COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }}\n"
  },
  {
    "path": ".gitignore",
    "content": "__pycache__\n.benchmarks\n.cache\n.coverage\n.direnv\n.env\n.idea\n.pytest_cache\n.python-version\n.venv\n*_version.py\n*.egg-info\n*.gguf\nbenchmarks/results\nbuild\ndocs/build\nlogs\n.worktrees/\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n  rev: v5.0.0\n  hooks:\n    -   id: check-merge-conflict\n    -   id: debug-statements\n    -   id: end-of-file-fixer\n    -   id: trailing-whitespace\n- repo: https://github.com/pre-commit/mirrors-mypy\n  rev: v1.14.1\n  hooks:\n    - id: mypy\n      args: [--allow-redefinition]\n      exclude: ^examples/\n      additional_dependencies: [types-tqdm, types-Pillow]\n- repo: https://github.com/astral-sh/ruff-pre-commit\n  rev: v0.9.1\n  hooks:\n    - id: ruff\n      args: [\"--config=pyproject.toml\"]\n"
  },
  {
    "path": ".pydocstyle",
    "content": "[pydocstyle]\nconvention = numpy\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "version: 2\n\npython:\n  version: \"3.8\"\n  install:\n      - method: pip\n        path: .\n        extra_requirements:\n          - rtd\n      - requirements: requirements-doc.txt\n\nsphinx:\n  builder: html\n  configuration: docs/source/conf.py\n  fail_on_warning: true\n"
  },
  {
    "path": ".vscode/settings.json",
    "content": "{\n    \"python.testing.pytestArgs\": [\n        \"tests\"\n    ],\n    \"python.testing.unittestEnabled\": false,\n    \"python.testing.pytestEnabled\": true\n}\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2023- The Outlines developers\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\" style=\"margin-bottom: 1em;\">\n\n<img src=\"./docs/assets/images/logo-light-mode.svg#gh-light-mode-only\" alt=\"Outlines Logo\" width=300></img>\n<img src=\"./docs/assets/images/logo-dark-mode.svg#gh-dark-mode-only\" alt=\"Outlines Logo\" width=300></img>\n\n\n 🗒️ *Structured outputs for LLMs* 🗒️\n\nMade with ❤👷️ by the team at [.txt](https://dottxt.co)\n<br>Trusted by NVIDIA, Cohere, HuggingFace, vLLM, etc.\n\n<!-- Project Badges -->\n[![PyPI Version][pypi-version-badge]][pypi]\n[![Downloads][downloads-badge]][pypistats]\n[![Stars][stars-badge]][stars]\n\n<!-- Community Badges -->\n[![Discord][discord-badge]][discord]\n[![Blog][dottxt-blog-badge]][dottxt-blog]\n[![Twitter][twitter-badge]][twitter]\n\n</div>\n\n## 🚀 Building the future of structured generation\n\nWe're working with select partners to develop new interfaces to structured generation.\n\nNeed XML, FHIR, custom schemas or grammars? Let's talk.\n\nAudit your schema: share one schema, we show you what breaks under generation, the constraints that fix it, and compliance rates before and after. Sign up [here](https://h1xbpbfsf0w.typeform.com/to/rtFUraA2?typeform).\n\n## Table of Contents\n\n- [Why Outlines?](#why-outlines)\n- [Quickstart](#quickstart)\n- [Real-World Examples](#real-world-examples)\n  - [🙋‍♂️ Customer Support Triage](#customer-support-triage)\n  - [📦 E-commerce Product Categorization](#e-commerce-product-categorization)\n  - [📊 Parse Event Details with Incomplete Data](#parse-event-details-with-incomplete-data)\n  - [🗂️ Categorize Documents into Predefined Types](#categorize-documents-into-predefined-types)\n  - [📅 Schedule a Meeting with Function Calling](#schedule-a-meeting-with-function-calling)\n  - [📝 Dynamically Generate Prompts with Re-usable Templates](#dynamically-generate-prompts-with-re-usable-templates)\n- [They Use Outlines](#they-use-outlines)\n- [Model Integrations](#model-integrations)\n- [Core Features](#core-features)\n- [Other Features](#other-features)\n- [About .txt](#about-txt)\n- [Community](#community)\n\n<div align=\"center\"><img src=\"./docs/assets/images/install.png\" width=300></img></div>\n\n## Why Outlines?\n\nLLMs are powerful but their outputs are unpredictable. Most solutions attempt to fix bad outputs after generation using parsing, regex, or fragile code that breaks easily.\n\nOutlines guarantees structured outputs during generation — directly from any LLM.\n\n- **Works with any model** - Same code runs across OpenAI, Ollama, vLLM, and more\n- **Simple integration** - Just pass your desired output type: `model(prompt, output_type)`\n- **Guaranteed valid structure** - No more parsing headaches or broken JSON\n- **Provider independence** - Switch models without changing code\n\n\n### The Outlines Philosophy\n\n<div align=\"center\"><img src=\"./docs/assets/images/use_philosophy.png\" width=300></img></div>\n\nOutlines follows a simple pattern that mirrors Python's own type system. Simply specify the desired output type, and Outlines will ensure your data matches that structure exactly:\n\n- For a yes/no response, use `Literal[\"Yes\", \"No\"]`\n- For numerical values, use `int`\n- For complex objects, define a structure with a [Pydantic model](https://docs.pydantic.dev/latest/)\n\n## Quickstart\n\nGetting started with outlines is simple:\n\n### 1. Install outlines\n\n``` shell\npip install outlines\n```\n\n### 2. Connect to your preferred model\n\n``` python\nimport outlines\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n```\n\n### 3. Start with simple structured outputs\n\n``` python\nfrom typing import Literal\nfrom pydantic import BaseModel\n\n\n# Simple classification\nsentiment = model(\n    \"Analyze: 'This product completely changed my life!'\",\n    Literal[\"Positive\", \"Negative\", \"Neutral\"]\n)\nprint(sentiment)  # \"Positive\"\n\n# Extract specific types\ntemperature = model(\"What's the boiling point of water in Celsius?\", int)\nprint(temperature)  # 100\n```\n\n### 4. Create complex structures\n\n``` python\nfrom pydantic import BaseModel\nfrom enum import Enum\n\nclass Rating(Enum):\n    poor = 1\n    fair = 2\n    good = 3\n    excellent = 4\n\nclass ProductReview(BaseModel):\n    rating: Rating\n    pros: list[str]\n    cons: list[str]\n    summary: str\n\nreview = model(\n    \"Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.\",\n    ProductReview,\n    max_new_tokens=200,\n)\n\nreview = ProductReview.model_validate_json(review)\nprint(f\"Rating: {review.rating.name}\")  # \"Rating: good\"\nprint(f\"Pros: {review.pros}\")           # \"Pros: ['great battery life', 'stunning display']\"\nprint(f\"Summary: {review.summary}\")     # \"Summary: Good laptop with great display but thermal issues\"\n```\n\n## Real-world examples\n\nHere are production-ready examples showing how Outlines solves common problems:\n\n<details id=\"customer-support-triage\"><summary><b>🙋‍♂️ Customer Support Triage</b>\n<br>This example shows how to convert a free-form customer email into a structured service ticket. By parsing attributes like priority, category, and escalation flags, the code enables automated routing and handling of support issues.\n</summary>\n\n``` python\nimport outlines\nfrom enum import Enum\nfrom pydantic import BaseModel\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\nfrom typing import List\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\n\ndef alert_manager(ticket):\n    print(\"Alert!\", ticket)\n\n\nclass TicketPriority(str, Enum):\n    low = \"low\"\n    medium = \"medium\"\n    high = \"high\"\n    urgent = \"urgent\"\n\nclass ServiceTicket(BaseModel):\n    priority: TicketPriority\n    category: str\n    requires_manager: bool\n    summary: str\n    action_items: List[str]\n\n\ncustomer_email = \"\"\"\nSubject: URGENT - Cannot access my account after payment\n\nI paid for the premium plan 3 hours ago and still can't access any features.\nI've tried logging out and back in multiple times. This is unacceptable as I\nhave a client presentation in an hour and need the analytics dashboard.\nPlease fix this immediately or refund my payment.\n\"\"\"\n\nprompt = f\"\"\"\n<|im_start|>user\nAnalyze this customer email:\n\n{customer_email}\n<|im_end|>\n<|im_start|>assistant\n\"\"\"\n\nticket = model(\n    prompt,\n    ServiceTicket,\n    max_new_tokens=500\n)\n\n# Use structured data to route the ticket\nticket = ServiceTicket.model_validate_json(ticket)\nif ticket.priority == \"urgent\" or ticket.requires_manager:\n    alert_manager(ticket)\n```\n</details>\n\n<details id=\"e-commerce-product-categorization\"><summary><b>📦 E-commerce product categorization</b>\n<br>This use case demonstrates how outlines can transform product descriptions into structured categorization data (e.g., main category, sub-category, and attributes) to streamline tasks such as inventory management. Each product description is processed automatically, reducing manual categorization overhead.\n</summary>\n\n```python\nimport outlines\nfrom pydantic import BaseModel\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\nfrom typing import List, Optional\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\n\ndef update_inventory(product, category, sub_category):\n    print(f\"Updated {product.split(',')[0]} in category {category}/{sub_category}\")\n\n\nclass ProductCategory(BaseModel):\n    main_category: str\n    sub_category: str\n    attributes: List[str]\n    brand_match: Optional[str]\n\n# Process product descriptions in batches\nproduct_descriptions = [\n    \"Apple iPhone 15 Pro Max 256GB Titanium, 6.7-inch Super Retina XDR display with ProMotion\",\n    \"Organic Cotton T-Shirt, Men's Medium, Navy Blue, 100% Sustainable Materials\",\n    \"KitchenAid Stand Mixer, 5 Quart, Red, 10-Speed Settings with Dough Hook Attachment\"\n]\n\ntemplate = outlines.Template.from_string(\"\"\"\n<|im_start|>user\nCategorize this product:\n\n{{ description }}\n<|im_end|>\n<|im_start|>assistant\n\"\"\")\n\n# Get structured categorization for all products\ncategories = model(\n    [template(description=desc) for desc in product_descriptions],\n    ProductCategory,\n    max_new_tokens=200\n)\n\n# Use categorization for inventory management\ncategories = [\n    ProductCategory.model_validate_json(category) for category in categories\n]\nfor product, category in zip(product_descriptions, categories):\n    update_inventory(product, category.main_category, category.sub_category)\n```\n</details>\n\n<details id=\"parse-event-details-with-incomplete-data\"><summary><b>📊 Parse event details with incomplete data</b>\n<br>This example uses outlines to parse event descriptions into structured information (like event name, date, location, type, and topics), even handling cases where the data is incomplete. It leverages union types to return either structured event data or a fallback “I don’t know” answer, ensuring robust extraction in varying scenarios.\n</summary>\n\n```python\nimport outlines\nfrom typing import Union, List, Literal\nfrom pydantic import BaseModel\nfrom enum import Enum\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\nclass EventType(str, Enum):\n    conference = \"conference\"\n    webinar = \"webinar\"\n    workshop = \"workshop\"\n    meetup = \"meetup\"\n    other = \"other\"\n\n\nclass EventInfo(BaseModel):\n    \"\"\"Structured information about a tech event\"\"\"\n    name: str\n    date: str\n    location: str\n    event_type: EventType\n    topics: List[str]\n    registration_required: bool\n\n# Create a union type that can either be a structured EventInfo or \"I don't know\"\nEventResponse = Union[EventInfo, Literal[\"I don't know\"]]\n\n# Sample event descriptions\nevent_descriptions = [\n    # Complete information\n    \"\"\"\n    Join us for DevCon 2023, the premier developer conference happening on November 15-17, 2023\n    at the San Francisco Convention Center. Topics include AI/ML, cloud infrastructure, and web3.\n    Registration is required.\n    \"\"\",\n\n    # Insufficient information\n    \"\"\"\n    Tech event next week. More details coming soon!\n    \"\"\"\n]\n\n# Process events\nresults = []\nfor description in event_descriptions:\n    prompt = f\"\"\"\n<|im_start>system\nYou are a helpful assistant\n<|im_end|>\n<|im_start>user\nExtract structured information about this tech event:\n\n{description}\n\nIf there is enough information, return a JSON object with the following fields:\n\n- name: The name of the event\n- date: The date where the event is taking place\n- location: Where the event is taking place\n- event_type: either 'conference', 'webinar', 'workshop', 'meetup' or 'other'\n- topics: a list of topics of the conference\n- registration_required: a boolean that indicates whether registration is required\n\nIf the information available does not allow you to fill this JSON, and only then, answer 'I don't know'.\n<|im_end|>\n<|im_start|>assistant\n\"\"\"\n    # Union type allows the model to return structured data or \"I don't know\"\n    result = model(prompt, EventResponse, max_new_tokens=200)\n    results.append(result)\n\n# Display results\nfor i, result in enumerate(results):\n    print(f\"Event {i+1}:\")\n    if isinstance(result, str):\n        print(f\"  {result}\")\n    else:\n        # It's an EventInfo object\n        print(f\"  Name: {result.name}\")\n        print(f\"  Type: {result.event_type}\")\n        print(f\"  Date: {result.date}\")\n        print(f\"  Topics: {', '.join(result.topics)}\")\n    print()\n\n# Use structured data in downstream processing\nstructured_count = sum(1 for r in results if isinstance(r, EventInfo))\nprint(f\"Successfully extracted data for {structured_count} of {len(results)} events\")\n```\n</details>\n\n<details id=\"categorize-documents-into-predefined-types\"><summary><b>🗂️ Categorize documents into predefined types</b>\n<br>In this case, outlines classifies documents into predefined categories (e.g., “Financial Report,” “Legal Contract”) using a literal type specification. The resulting classifications are displayed in both a table format and through a category distribution summary, illustrating how structured outputs can simplify content management.\n</summary>\n\n```python\nimport outlines\nfrom typing import Literal, List\nimport pandas as pd\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\n\n# Define classification categories using Literal\nDocumentCategory = Literal[\n    \"Financial Report\",\n    \"Legal Contract\",\n    \"Technical Documentation\",\n    \"Marketing Material\",\n    \"Personal Correspondence\"\n]\n\n# Sample documents to classify\ndocuments = [\n    \"Q3 Financial Summary: Revenue increased by 15% year-over-year to $12.4M. EBITDA margin improved to 23% compared to 19% in Q3 last year. Operating expenses...\",\n\n    \"This agreement is made between Party A and Party B, hereinafter referred to as 'the Parties', on this day of...\",\n\n    \"The API accepts POST requests with JSON payloads. Required parameters include 'user_id' and 'transaction_type'. The endpoint returns a 200 status code on success.\"\n]\n\ntemplate = outlines.Template.from_string(\"\"\"\n<|im_start|>user\nClassify the following document into exactly one category among the following categories:\n- Financial Report\n- Legal Contract\n- Technical Documentation\n- Marketing Material\n- Personal Correspondence\n\nDocument:\n{{ document }}\n<|im_end|>\n<|im_start|>assistant\n\"\"\")\n\n# Classify documents\ndef classify_documents(texts: List[str]) -> List[DocumentCategory]:\n    results = []\n\n    for text in texts:\n        prompt = template(document=text)\n        # The model must return one of the predefined categories\n        category = model(prompt, DocumentCategory, max_new_tokens=200)\n        results.append(category)\n\n    return results\n\n# Perform classification\nclassifications = classify_documents(documents)\n\n# Create a simple results table\nresults_df = pd.DataFrame({\n    \"Document\": [doc[:50] + \"...\" for doc in documents],\n    \"Classification\": classifications\n})\n\nprint(results_df)\n\n# Count documents by category\ncategory_counts = pd.Series(classifications).value_counts()\nprint(\"\\nCategory Distribution:\")\nprint(category_counts)\n```\n</details>\n\n<details>\n<summary id=\"schedule-a-meeting-with-function-calling\"><b>📅 Schedule a meeting from requests with Function Calling</b>\n<br>This example demonstrates how outlines can interpret a natural language meeting request and translate it into a structured format matching a predefined function’s parameters. Once the meeting details are extracted (e.g., title, date, duration, attendees), they are used to automatically schedule the meeting.\n</summary>\n\n```python\nimport outlines\nimport json\nfrom typing import List, Optional\nfrom datetime import date\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\n\nMODEL_NAME = \"microsoft/phi-4\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\n\n# Define a function with typed parameters\ndef schedule_meeting(\n    title: str,\n    date: date,\n    duration_minutes: int,\n    attendees: List[str],\n    location: Optional[str] = None,\n    agenda_items: Optional[List[str]] = None\n):\n    \"\"\"Schedule a meeting with the specified details\"\"\"\n    # In a real app, this would create the meeting\n    meeting = {\n        \"title\": title,\n        \"date\": date,\n        \"duration_minutes\": duration_minutes,\n        \"attendees\": attendees,\n        \"location\": location,\n        \"agenda_items\": agenda_items\n    }\n    return f\"Meeting '{title}' scheduled for {date} with {len(attendees)} attendees\"\n\n# Natural language request\nuser_request = \"\"\"\nI need to set up a product roadmap review with the engineering team for next\nTuesday at 2pm. It should last 90 minutes. Please invite john@example.com,\nsarah@example.com, and the product team at product@example.com.\n\"\"\"\n\n# Outlines automatically infers the required structure from the function signature\nprompt = f\"\"\"\n<|im_start|>user\nExtract the meeting details from this request:\n\n{user_request}\n<|im_end|>\n<|im_start|>assistant\n\"\"\"\nmeeting_params = model(prompt, schedule_meeting, max_new_tokens=200)\n\n# The result is a dictionary matching the function parameters\nmeeting_params = json.loads(meeting_params)\nprint(meeting_params)\n\n# Call the function with the extracted parameters\nresult = schedule_meeting(**meeting_params)\nprint(result)\n# \"Meeting 'Product Roadmap Review' scheduled for 2023-10-17 with 3 attendees\"\n```\n</details>\n\n<details>\n<summary id=\"dynamically-generate-prompts-with-re-usable-templates\"><b>📝 Dynamically generate prompts with re-usable templates</b>\n<br>Using Jinja-based templates, this example shows how to generate dynamic prompts for tasks like sentiment analysis. It illustrates how to easily re-use and customize prompts—including few-shot learning strategies—for different content types while ensuring the outputs remain structured.\n</summary>\n\n```python\nimport outlines\nfrom typing import List, Literal\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\n\nMODEL_NAME = \"microsoft/phi-4\"\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map=\"auto\"),\n    AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n\n\n# 1. Create a reusable template with Jinja syntax\nsentiment_template = outlines.Template.from_string(\"\"\"\n<|im_start>user\nAnalyze the sentiment of the following {{ content_type }}:\n\n{{ text }}\n\nProvide your analysis as either \"Positive\", \"Negative\", or \"Neutral\".\n<|im_end>\n<|im_start>assistant\n\"\"\")\n\n# 2. Generate prompts with different parameters\nreview = \"This restaurant exceeded all my expectations. Fantastic service!\"\nprompt = sentiment_template(content_type=\"review\", text=review)\n\n# 3. Use the templated prompt with structured generation\nresult = model(prompt, Literal[\"Positive\", \"Negative\", \"Neutral\"])\nprint(result)  # \"Positive\"\n\n# Templates can also be loaded from files\nexample_template = outlines.Template.from_file(\"templates/few_shot.txt\")\n\n# Use with examples for few-shot learning\nexamples = [\n    (\"The food was cold\", \"Negative\"),\n    (\"The staff was friendly\", \"Positive\")\n]\nfew_shot_prompt = example_template(examples=examples, query=\"Service was slow\")\nprint(few_shot_prompt)\n```\n</details>\n\n## They use outlines\n\n<div align=\"center\">\n<img src=\"./docs/assets/images/readme-light.png#gh-light-mode-only\" alt=\"Users Logo\"></img>\n<img src=\"./docs/assets/images/readme-dark.png#gh-dark-mode-only\" alt=\"Users Logo\"></img>\n</div>\n\n## Model Integrations\n\n| Model type | Description | Documentation |\n|---------|-------------|:-------------:|\n| **Server Support** | vLLM and Ollama | [Server Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |\n| **Local Model Support** | transformers and llama.cpp | [Model Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |\n| **API Support** | OpenAI and Gemini | [API Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |\n\n## Core Features\n\n| Feature | Description | Documentation |\n|---------|-------------|:-------------:|\n| **Multiple Choices** | Constrain outputs to predefined options | [Multiple Choices Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#multiple-choices) |\n| **Function Calls** | Infer structure from function signatures | [Function Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) |\n| **JSON/Pydantic** | Generate outputs matching JSON schemas | [JSON Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) |\n| **Regular Expressions** | Generate text following a regex pattern | [Regex Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#regex-patterns) |\n| **Grammars** | Enforce complex output structures | [Grammar Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#context-free-grammars) |\n\n## Other Features\n\n| Feature | Description | Documentation |\n|---------|-------------|:-------------:|\n| **Prompt templates** | Separate complex prompts from code | [Template Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/template/) |\n| **Custome types** | Intuitive interface to build complex types | [Python Types Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#basic-python-types) |\n| **Applications** | Encapsulate templates and types into functions | [Application Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/application/) |\n\n## About .txt\n\n<div align=\"center\">\n<img src=\"./docs/assets/images/dottxt-light.svg#gh-light-mode-only\" alt=\"dottxt logo\" width=100></img>\n<img src=\"./docs/assets/images/dottxt-dark.svg#gh-dark-mode-only\" alt=\"dottxt logo\" width=100></img>\n</div>\n\nOutlines is developed and maintained by [.txt](https://dottxt.co), a company dedicated to making LLMs more reliable for production applications.\n\nOur focus is on advancing structured generation technology through:\n\n- 🧪 **Cutting-edge Research**: We publish our findings on [structured generation](http://blog.dottxt.co/performance-gsm8k.html)\n- 🚀 **Enterprise-grade solutions**: You can license [our enterprise-grade libraries](https://docs.dottxt.co).\n- 🧩 **Open Source Collaboration**: We believe in building in public and contributing to the community\n\nFollow us on [Twitter](https://twitter.com/dottxtai) or check out our [blog](https://blog.dottxt.co/) to stay updated on our latest work in making LLMs more reliable.\n\n## Community\n\n<div align=\"center\" style=\"margin-bottom: 1em;\">\n\n[![Contributors][contributors-badge]][contributors]\n[![Stars][stars-badge]][stars]\n[![Downloads][downloads-badge]][pypistats]\n[![Discord badge][discord-badge]][discord]\n\n</div>\n\n- 💡 **Have an idea?** Come chat with us on [Discord][discord]\n- 🐞 **Found a bug?** Open an [issue](https://github.com/dottxt-ai/outlines/issues)\n- 🧩  **Want to contribute?** Consult our [contribution guide](https://dottxt-ai.github.io/outlines/latest/community/contribute/).\n\n\n## Cite Outlines\n\n```\n@article{willard2023efficient,\n  title={Efficient Guided Generation for Large Language Models},\n  author={Willard, Brandon T and Louf, R{\\'e}mi},\n  journal={arXiv preprint arXiv:2307.09702},\n  year={2023}\n}\n```\n\n[contributors]: https://github.com/dottxt-ai/outlines/graphs/contributors\n[contributors-badge]: https://img.shields.io/github/contributors/dottxt-ai/outlines?style=flat-square&logo=github&logoColor=white&color=ECEFF4\n[dottxt-blog]: https://blog.dottxt.co/\n[dottxt-blog-badge]: https://img.shields.io/badge/dottxt%20blog-a6b4a3\n[dottxt-twitter]: https://twitter.com/dottxtai\n[dottxt-twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=social\n[discord]: https://discord.gg/R9DSu34mGd\n[discord-badge]: https://img.shields.io/discord/1182316225284554793?color=ddb8ca&logo=discord&logoColor=white&style=flat-square\n[downloads-badge]: https://img.shields.io/pypi/dm/outlines?color=A6B4A3&logo=python&logoColor=white&style=flat-square\n[pypistats]: https://pypistats.org/packages/outlines\n[pypi-version-badge]: https://img.shields.io/pypi/v/outlines?style=flat-square&logoColor=white&color=ddb8ca\n[pypi]: https://pypi.org/project/outlines/\n[stars]: https://github.com/dottxt-ai/outlines/stargazers\n[stars-badge]: https://img.shields.io/github/stars/dottxt-ai/outlines?style=flat-square&logo=github&color=BD932F&logoColor=white\n[twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=flat-square&logo=x&logoColor=white&color=bd932f\n[twitter]: https://x.com/dottxtai\n"
  },
  {
    "path": "docs/api_reference/index.md",
    "content": "# API Reference\n"
  },
  {
    "path": "docs/blog/index.md",
    "content": "# Blog\n"
  },
  {
    "path": "docs/community/contribute.md",
    "content": "---\ntitle: Contribute\n---\n\n## What contributions?\n\n- **Documentation** contributions are very valuable to us!\n- **Examples.** Show us what you did with Outlines :)\n- **Bug reports** with a minimum working examples in the [issue tracker][issues]\n- **Bug fixes** are always a pleasure to review.\n- **New features**. Please start a new [discussion][discussions], or [come chat with us][discord] beforehand!\n\nNote that the [issue tracker][issues] is only intended for actionable items. In doubt, open a [discussion][discussions] or [come talk to us][discord].\n\n## How to contribute?\n\n### Setup\n\nFirst, [fork the repository on GitHub](https://github.com/dottxt-ai/outlines/fork) and clone the fork locally:\n\n```shell\ngit clone git@github.com/YourUserName/outlines.git\ncd outlines\n```\n\nCreate a new virtual environment:\n\n*If you are using `uv`*:\n\n```shell\nuv venv\nsource .venv/bin/activate\nalias pip=\"uv pip\" # ... or just remember to prepend any pip command with uv in the rest of this guide\n```\n\n*If you are using `venv`*:\n\n```shell\npython -m venv .venv\nsource .venv/bin/activate\n```\n\n*If you are using `conda`*:\n\n```shell\nconda env create -f environment.yml\n```\n\nThen install the dependencies in editable mode, and install the `pre-commit` hooks:\n\n```shell\npip install -e \".[test]\"\npre-commit install\n```\nIf you own a GPU and want to run the vLLM tests you will have to run:\n\n```shell\npip install -e \".[test-gpu]\"\n```\n\ninstead.\n\nOutlines provides optional dependencies for different supported backends, which you can install with\n\n```shell\npip install \".[vllm]\"\n```\n\nA list of supported optional dependencies can be found in the [installation guide](/installation).\n\n### Using VSCode DevContainer / GitHub Codespaces\n\nIf you want a fully pre-configured development environment, you can use VSCode DevContainers or GitHub Codespaces.\n\n#### VSCode DevContainer\n\n1. Ensure that the [Docker](https://www.docker.com/get-started/) daemon is running on your machine.\n2. Install the [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension in VSCode.\n3. Open the Outlines repository in VSCode. When prompted, **Reopen in Container** (or press `F1` and select \"Remote-Containers: Reopen in Container\").\n4. Run the normal setup steps. Your environment will not complain about missing system dependencies!\n\n#### GitHub Codespaces\n\n1. Navigate to the Outlines repository on GitHub.\n2. Click on the **Code** button and select the **Codespaces** tab.\n3. Click **Create codespace on main** (or another branch you are working on).\n4. GitHub will launch a pre-configured cloud development environment.\n\nYou will not have access to a GPU, but you'll be able to make basic contributions to the project on the go while using a fully featured web-based IDE.\n\n### Before pushing your code\n\nRun the tests:\n\n```shell\npytest\n```\n\nAnd run the code style checks:\n\n```shell\npre-commit run --all-files\n```\n\n### Benchmarking\n\nOutlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degradation.\n\nYou can run the benchmark test suite locally with the following command:\n\n```shell\nasv run --config benchmarks/asv.conf.json\n```\n\nCaveats:\n\n- If you're on a device with CUDA, you must add the argument `--launch-method spawn`\n- Uncommitted code will not be benchmarked, you must first commit your changes.\n\n#### Run a specific test:\n\n```shell\nasv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm\n```\n\n#### Profile a specific test:\n\n```shell\nasv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm\n```\n\n#### Compare to `origin/main`\n\n```shell\nget fetch origin\nasv continuous origin/main HEAD --config benchmarks/asv.conf.json\n```\n\n#### ASV PR Behavior\n\n- **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section.\n- Merging is blocked unless benchmarks are run for the latest commit.\n- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.\n- The \"Benchmark PR\" workflow runs when it is manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.\n\n### Contribute to the documentation\n\nTo work on the *documentation* you will need to install the related dependencies:\n\n```shell\npip install -r requirements-doc.txt\n```\n\nTo build the documentation and serve it locally, run the following command in the repository's root folder:\n\n```shell\nmkdocs serve\n```\n\nBy following the instruction you will be able to view the documentation locally.\nIt will be updated every time you make a change.\n\n## Open a Pull Request\n\nCreate a new branch on your fork, commit and push the changes:\n\n```shell\ngit checkout -b new-branch\ngit add .\ngit commit -m \"Changes I made\"\ngit push origin new-branch\n```\n\nThen you can [open a pull request][pull-requests] on GitHub. It should prompt you to do so. Every subsequent change that you make on your branch will update the pull request.\n\nDo not hesitate to open a draft PR before your contribution is ready, especially if you have questions and/or need feedback. If you need help, come tell us on [Discord][discord].\n\n[discord]: https://discord.gg/R9DSu34mGd\n[discussions]: https://github.com/dottxt-ai/outlines/discussions\n[issues]: https://github.com/dottxt-ai/outlines/issues\n[pull-requests]: https://github.com/dottxt-ai/outlines/pulls\n"
  },
  {
    "path": "docs/community/examples.md",
    "content": "# Community projects and articles\n\nPublishing examples and articles about Outlines are a meaningful way to contribute to the community. Here is a list of projects we are aware of. Drop us a line if we forgot yours!\n\n[MMSG](https://github.com/leloykun/mmsg) is a Python library for generating interleaved text and image content in a structured format you can directly pass to downstream APIs.\n\n[Multimodal Structured Generation: CVPR's 2nd MMFM Challenge Technical Report](https://arxiv.org/abs/2406.11403) shows that Structured Generation can outperform finetuning, and maybe even multimodality, in document-image understanding tasks as part of CVPR's 2nd MMFM Challenge.\n\n[Chess LLM Arena](https://huggingface.co/spaces/mlabonne/chessllm) is a HuggingFace Space where you can make LLMs compete in a chess match.\n\n[LLM Data Gen](https://huggingface.co/spaces/lhoestq/LLM_DataGen) is a HuggingFace Space that generates synthetic dataset files in JSONLines format.\n\n[Fast, High-Fidelity LLM Decoding with Regex Constraints ](https://vivien000.github.io/blog/journal/llm-decoding-with-regex-constraints.html) presents an efficient alternative to Outlines's structured generation.\n\n[gigax](https://github.com/GigaxGames/gigax) is an Open-Source library that allows to create real-time LLM-powered NPCs for video games.\n\n[Improving Prompt Consistency with Structured Generations](https://huggingface.co/blog/evaluation-structured-outputs) shows how structured generation can improve consistency of evaluation runs by reducing sensitivity to changes in prompt format.\n\n[AskNews](https://asknews.app) is a news curation service processing 300k news articles per day in a structured way, with Outlines.\n"
  },
  {
    "path": "docs/community/feedback.md",
    "content": "---\ntitle: Feedback\n---\n\n# Feedback\n\nIf Outlines has been helpful to you, let us know on [Discord][discord] or give us a shoutout on [Twitter][twitter]! It's always heartwarming ❤️\n\n\n<head>\n  <!-- From Marvin AI's documentation -->\n  <!-- Their library is also awesome -->\n  <!-- https://www.askmarvin.ai/ -->\n  <style>\n    .tweet-masonry {\n      column-count: 2;\n      column-gap: 20px;\n      padding: 20px;\n    }\n\n    .twitter-tweet {\n      display: inline-block;\n      width: 100%;\n      margin-bottom: 20px;\n      margin-top: 0px !important;\n      break-inside: avoid;\n    }\n\n    @media (max-width: 600px) {\n      .tweet-masonry {\n        column-count: 1;\n      }\n    }\n  </style>\n</head>\n<body>\n\n<div class=\"tweet-masonry\">\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">I am once again reminding you that structured extraction using LLMs is going to transform every single industry in the next 10 years <a href=\"https://t.co/xQ3tcWnrZ8\">https://t.co/xQ3tcWnrZ8</a></p>&mdash; Sam Hogan (@0xSamHogan) <a href=\"https://twitter.com/0xSamHogan/status/1780637917737816323?ref_src=twsrc%5Etfw\">April 17, 2024</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">outline&#39;s growth is insane, using is an understatement! <a href=\"https://t.co/rHCNWhZdCs\">https://t.co/rHCNWhZdCs</a></p>&mdash; jason liu (@jxnlco) <a href=\"https://twitter.com/jxnlco/status/1780618454040797554?ref_src=twsrc%5Etfw\">April 17, 2024</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Outlines is an amazing lib and more popular than <a href=\"https://twitter.com/remilouf?ref_src=twsrc%5Etfw\">@remilouf</a>’s modesty will admit. <a href=\"https://t.co/DfHbMPIlX1\">https://t.co/DfHbMPIlX1</a> <a href=\"https://t.co/mDHIWJrD0C\">https://t.co/mDHIWJrD0C</a></p>&mdash; Delip Rao e/σ (@deliprao) <a href=\"https://twitter.com/deliprao/status/1780780217180598377?ref_src=twsrc%5Etfw\">April 18, 2024</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Impressive implementation of a true regex / json / grammar guided text generation <a href=\"https://t.co/RX5RVYaVIx\">pic.twitter.com/RX5RVYaVIx</a></p>&mdash; Rohan Paul (@rohanpaul_ai) <a href=\"https://twitter.com/rohanpaul_ai/status/1741099984299135403?ref_src=twsrc%5Etfw\">December 30, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Most underrated Github Repo in AI + LLM JSON guided Generation: <a href=\"https://t.co/lSB8KIet1H\">https://t.co/lSB8KIet1H</a></p>&mdash; 🎙Jean-Louis Queguiner (@JiliJeanlouis) <a href=\"https://twitter.com/JiliJeanlouis/status/1736857292581093706?ref_src=twsrc%5Etfw\">December 18, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Nice and useful. <a href=\"https://t.co/LX72AE0lgt\">https://t.co/LX72AE0lgt</a></p>&mdash; Dan Roy (@roydanroy) <a href=\"https://twitter.com/roydanroy/status/1691556956941525458?ref_src=twsrc%5Etfw\">August 15, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">HUGE dub for open source AI <a href=\"https://t.co/bYKuiEUZ1j\">https://t.co/bYKuiEUZ1j</a></p>&mdash; kenneth 🖇 (@k3nnethfrancis) <a href=\"https://twitter.com/k3nnethfrancis/status/1691304781732843521?ref_src=twsrc%5Etfw\">August 15, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">This is amazing - glad to see more outp guidance modules! <br><br>Will try this out soon I&#39;m wondering how they translate from regex automatons to token boundaries<br><br>Also why Open Source will succeed. Even today I don&#39;t see any guided output functionality from the big providers. <a href=\"https://t.co/Ity2H25Klf\">https://t.co/Ity2H25Klf</a></p>&mdash; Hrishi (@hrishioa) <a href=\"https://twitter.com/hrishioa/status/1691181499671080960?ref_src=twsrc%5Etfw\">August 14, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Outlines - a library to help LLM developers guide text generation in a fast and reliable way.<br><br>&quot;Provides generation methods that guarantee that the output will match a regular expressions, or follow a JSON schema.&quot;<br><br>Need to check this out. Reliable JSON output is a common use… <a href=\"https://t.co/Bkbh8vKogN\">pic.twitter.com/Bkbh8vKogN</a></p>&mdash; elvis (@omarsar0) <a href=\"https://twitter.com/omarsar0/status/1691179888214966273?ref_src=twsrc%5Etfw\">August 14, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Woah this is cool! Makes open source models more usable.<br><br>Give any LLM Function Call capability (and more) with Outlines: <a href=\"https://t.co/PtPykR5ZGR\">https://t.co/PtPykR5ZGR</a> <a href=\"https://t.co/RRQjWHnIxv\">https://t.co/RRQjWHnIxv</a> <a href=\"https://t.co/BwNnH8SMwv\">pic.twitter.com/BwNnH8SMwv</a></p>&mdash; Yohei (@yoheinakajima) <a href=\"https://twitter.com/yoheinakajima/status/1691231912466223104?ref_src=twsrc%5Etfw\">August 14, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">This is awesome! Being able to guarantee the output&#39;s structure unblocks so many applications. This is a great milestone and a fundamental building block for more advanced AI apps. <a href=\"https://t.co/WdwMOc7hE8\">https://t.co/WdwMOc7hE8</a></p>&mdash; Guilherme Castro (@skastr052) <a href=\"https://twitter.com/skastr052/status/1691239359494619136?ref_src=twsrc%5Etfw\">August 15, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">Juggling with the unpredictable outputs of ChatGPT API lately while building my product. 😓 <br><br>Tried prompt engineering to channel its wisdom into a neat JSON, but it&#39;s like asking a cat to fetch. 🐱<br><br>Luckily, stumbled upon &quot;Outlines&quot; – looks like a promising way to tame the LLM… <a href=\"https://t.co/oYQ6q8exAS\">pic.twitter.com/oYQ6q8exAS</a></p>&mdash; Charlie (@14435635Sun) <a href=\"https://twitter.com/14435635Sun/status/1691439342689095680?ref_src=twsrc%5Etfw\">August 15, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n\n<blockquote class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">A complex system of LLM input-outputs interacting with non-LLM agents and models benefits immeasurably from structured outputs. The outlines package saves so much time, <a href=\"https://t.co/NhVQ6NpKDR\">https://t.co/NhVQ6NpKDR</a></p>&mdash; Amir Sani (@amirsani) <a href=\"https://twitter.com/amirsani/status/1728734266568376433?ref_src=twsrc%5Etfw\">November 26, 2023</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>\n</div>\n</body>\n</html>\n\n# Let us know!\n\nWe highly value the insights of our users, and we would love to hear from you. If you are using Outlines for your projects and would like to share your experience with us, let's connect:\n\n- What are you building with it?\n- What do you like about it?\n- What challenges are you facing?\n- What do you think could be improved?\n\nTo schedule an appointment follow [this link](https://cal.com/dottxt/outlines). This is exclusively intended to share your experience, please go on [Discord][discord] or [GitHub](https://github.com/dottxt-ai/outlines/discussions) for support.\n\n[discord]: https://discord.gg/UppQmhEpe8\n[twitter]: https://twitter.com/dottxtai\n"
  },
  {
    "path": "docs/community/index.md",
    "content": "# Community\n\nOutlines exists for a community of users who believe software doesn't need to be complicated. Who share the same passion for Large Language Models but don't want to compromise on robustness. Together, we are bringing these powerful models back to the world of software.\n\n## Connect on Discord\n\nThe Outlines community lives on our Discord server. There you can ask questions, share ideas or just chat with people like you. Don't be a stranger and [join us][discord].\n\n[discord]: https://discord.gg/UppQmhEpe8\n"
  },
  {
    "path": "docs/community/versioning.md",
    "content": "---\ntitle: Versioning Guide\n---\n\n# Versioning Guide\n\n\nThe Outlines project follows a structured versioning scheme designed to provide clarity and minimize risk for downstream dependents.\n\nEach part of the version number (`major.minor.patch`) conveys information about the nature and impact of the changes included in the release.\n\n- **Major Releases** includes compatibility-breaking changes to core interfaces, such as `LogitsProcessor`s and `Guides`.\n- **Minor Releases** introduce changes of substance to internal or unexposed functionality. These changes are well tested and intended to maintain compatibility with existing use of core interfaces.\n- **Patch Releases** address bug fixes and incorporate low-risk changes to improve stability and performance.\n\n!!! note \"Breaking Changes\"\n\n    Outlines v1.0 introduced several breaking changes to the core interface. See [the migration guide](/user_guide/migration) for more details.\n\n## Releases\n\nReleases along with release notes can be found on the [Outlines Releases GitHub Page](https://github.com/dottxt-ai/outlines/releases).\n\n## Version Pinning Recommendations\n\nHere are our recommendations for managing dependencies on the Outlines package:\n\n**Small, Risk-Tolerant Projects:** Pin to a specific major version.\n\n**Large, Conservative Projects:** Pin to a specific minor version.\n"
  },
  {
    "path": "docs/core_concepts.md",
    "content": "---\ntitle: Core concepts\n---\n\n# Core concepts\n\nComing soon. This will document various concepts at a high level, so users can understand Outlines before diving into specific implementations.\n\n1. Constrained decoding, tokens, and the basics of logit biasing\n2. Different ways to define output structure (regex, JSON schema, Pydantic models, context-free grammars)\n3. How finite state machines are used to guarantee output structure\n4. `Generator`, `Application`, `Template`,\n5. Prompt engineering vs. structured generation\n"
  },
  {
    "path": "docs/examples/chain_of_density.md",
    "content": "# Summarize documents using Chain of Density prompting\n\nA good summary should be informative, concise and clear. While large language models are generally good at summarizing documents, their summaries tend to be long and contain redundant information; their information density tends to be on the lower end. This is where [chain of Density](https://arxiv.org/abs/2309.04269), a new prompting technique, comes in. In this example we will show how one can implement chain of density with a few lines of code using Outlines, leveraging both Outline's prompt templating and its structured generation capabilities.\n\nThe article we will try to summarize is the first three paragraphs of the [Alan Turing page on Wikipedia](https://en.wikipedia.org/wiki/Alan_Turing):\n\n```python\narticle = \"\"\"\nAlan Mathison Turing OBE FRS (/ˈtjʊərɪŋ/; 23 June 1912 – 7 June 1954) was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.[5] Turing was highly influential in the development of theoretical computer science, providing a formalisation of the concepts of algorithm and computation with the Turing machine, which can be considered a model of a general-purpose computer.[6][7][8] He is widely considered to be the father of theoretical computer science and artificial intelligence.[9]\n\nBorn in Maida Vale, London, Turing was raised in southern England. He graduated at King's College, Cambridge, with a degree in mathematics. Whilst he was a fellow at Cambridge, he published a proof demonstrating that some purely mathematical yes–no questions can never be answered by computation. He defined a Turing machine and proved that the halting problem for Turing machines is undecidable. In 1938, he obtained his PhD from the Department of Mathematics at Princeton University. During the Second World War, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence. For a time he led Hut 8, the section that was responsible for German naval cryptanalysis. Here, he devised a number of techniques for speeding the breaking of German ciphers, including improvements to the pre-war Polish bomba method, an electromechanical machine that could find settings for the Enigma machine. Turing played a crucial role in cracking intercepted coded messages that enabled the Allies to defeat the Axis powers in many crucial engagements, including the Battle of the Atlantic.[10][11]\n\nAfter the war, Turing worked at the National Physical Laboratory, where he designed the Automatic Computing Engine, one of the first designs for a stored-program computer. In 1948, Turing joined Max Newman's Computing Machine Laboratory at the Victoria University of Manchester, where he helped develop the Manchester computers[12] and became interested in mathematical biology. He wrote a paper on the chemical basis of morphogenesis[1] and predicted oscillating chemical reactions such as the Belousov–Zhabotinsky reaction, first observed in the 1960s. Despite these accomplishments, Turing was never fully recognised in Britain during his lifetime because much of his work was covered by the Official Secrets Act.[13]\n\"\"\"\n```\n\n## How Chain Of Density works\n\nChain Of Density starts with asking the model to generate a first long and non-specific summary. Then it asks the model to generate 4 extra summaries by proceeding in the following way:\n\n1. Identify 1-3 entities missing in the previous summary;\n2. Add all entities marked as missing in the previous step, while not dropping entities;\n3. Make the summary more concise;\n\nThe prompt also asks the model to return a list of JSON objects that contain the missing entities and the new summary. This is where structured generation will come in handy :) The paper provides the prompt and an example:\n\n![Figure 2 in the paper](./images/chain_of_density.png)\n\nWe can now implement the prompt provided in the paper. We stored the prompt template in a text file, and we can load it using the `Template` class:\n\n```python\nfrom outlines import Template\n\nchain_of_density = Template.from_file(\"prompt_templates/chain_of_density.txt\")\n```\n\n??? Note\n\n    Note that we modified the prompt slightly so it returns a JSON object that contains the summaries, instead of a list of summaries.\n\n\n## Outlines implementation\n\nWe will use Outline's JSON-structured generation to ensure that the model's output is consistent with the format specified in the prompt. We start with defining the JSON objects that the model is asked to return using Pydantic. One JSON object that contains a list of `Summary` objects that contain the missing entities and new summary:\n\n```python\nfrom pydantic import BaseModel, conlist\n\nclass Summary(BaseModel):\n    missing_entities: str\n    denser_summary: str\n\nclass Summaries(BaseModel):\n    summaries: conlist(Summary, max_length=5, min_length=5)\n```\n\nWe now generate the prompt by passing the article we want to summarize to the prompt template previously loaded. We load a quantized version of Mistral-7B using the AutoAWQ library, and then use the `Summaries` schema to generate the summaries with structured generation:\n\n```python\nimport outlines\nimport transformers\n\nMODEL_NAME = \"TheBloke/Mistral-7B-OpenOrca-AWQ\"\n\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),\n    transformers.AutoTokenizer.from_pretrained(MODEL_NAME)\n)\nprompt = chain_of_density(article=article)\nresult = model(prompt, Summaries, max_new_tokens=2000)\n```\n\nWe can now check the results:\n\n```python\nprint(result)\n# {'summaries': [\n#     {\n#       'missing_entities': 'English mathematician, cryptanalyst, philosopher',\n#       'denser_summary': 'Alan Mathison Turing was an English mathematician, cryptanalyst, philosopher.'\n#     },\n#     {\n#       'missing_entities': '',\n#       'denser_summary': \"Alan Mathison Turing was an English mathematician who was a crucial figure in WW2's Bletchley Park codebreaking centre and designed one of the first computers.\"\n#     },\n#     {\n#       'missing_entities': 'cryptanalyst, studied, biology, father',\n#       'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and contributed to mathematical biology.'\n#     },\n#     {\n#       'missing_entities': 'biology, morphogenesis, chemical',\n#       'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and predicted chemical reactions in morphogenesis.\n#     '},\n#     {\n#       'missing_entities': '',\n#       'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, developed computer science, and made strides in mathematical biology research.'\n#       }\n# ]}\n```\n\nNot bad, considering we used a smallish model to generate the summary! Chain of Density seems to be a very effective prompting technique to generate dense summaries, even with small quantized models. Its implementation in Outlines is also very short.\n\nNote that this is the first article I tried and it worked out of the box. Try it out on other articles, and please share the results on Twitter, or by opening [a new discussion](https://github.com/dottxt-ai/outlines/discussions/categories/show-and-tell) on the Outlines repository!\n"
  },
  {
    "path": "docs/examples/chain_of_thought.md",
    "content": "# Chain of thought\n\n\nChain of thought is a prompting technique introduced in the paper [\"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models\"](https://arxiv.org/abs/2201.11903) where throught prompting the authors generate a series of intermediate reasoning steps which improves the ability of LLMs to perform complex reasoning.\n\nIn this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to apply chain of thought through structured output.\n\nWe use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:\n\n```shell\npip install llama-cpp-python\n```\n\nTo create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub):\n\n```python\nimport llama_cpp\nimport outlines\n\nllm = llama_cpp.Llama(\n    \"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF\",\n    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(\n        \"NousResearch/Hermes-2-Pro-Llama-3-8B\"\n    ),\n    n_gpu_layers=-1,\n    flash_attn=True,\n    n_ctx=8192,\n    verbose=False\n)\nmodel = outlines.from_llamacpp(llm)\n```\n\n??? note \"(Optional) Store the model weights in a custom folder\"\n\n    By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):\n\n    ```shell\n    wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\n    ```\n\n    We initialize the model:\n\n    ```python\n    from llama_cpp import Llama\n\n    llm = Llama(\"/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\", ...)\n    ```\n\n## Chain of thought\n\nWe first define our Pydantic class for a reasoning step:\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass Reasoning_Step(BaseModel):\n    reasoning_step: str = Field(..., description=\"Reasoning step\")\n```\n\nWe then define the Pydantic class for reasoning which will consist on a list of reasoning steps and a conclusion, and we get its JSON schema:\n\n```python\nfrom typing import List\n\nclass Reasoning(BaseModel):\n    reasoning: List[Reasoning_Step] = Field(..., description=\"List of reasoning steps\")\n    conclusion: str = Field(..., description=\"Conclusion\")\n\njson_schema = Reasoning.model_json_schema()\n```\n\nWe then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):\n\n```python\nfrom outlines import Template\n\ngenerate_hermes_prompt = Template.from_string(\n    \"\"\"\n    <|im_start|>system\n    You are a world class AI model who answers questions in JSON\n    Here's the json schema you must adhere to:\n    <schema>\n    {{ json_schema }}\n    </schema>\n    <|im_end|>\n    <|im_start|>user\n    {{ user_prompt }}\n    <|im_end|>\n    <|im_start|>assistant\n    <schema>\n    \"\"\"\n)\n```\n\nFor a given user prompt:\n\n```python\nuser_prompt = \"9.11 and 9.9 -- which is bigger?\"\n```\n\nWe can use `outlines.Generator` with the Pydantic class we previously defined, and call the generator with the Hermes prompt:\n\n```python\ngenerator = outlines.Generator(model, regex_str)\nprompt = generate_hermes_prompt(json_schema=json_schema, user_prompt=user_prompt)\nresponse = generator(prompt, max_tokens=1024, temperature=0, seed=42)\n```\n\nWe obtain a series of intermediate reasoning steps as well as the conclusion:\n\n```python\nimport json\n\njson_response = json.loads(response)\n\nprint(json_response[\"reasoning\"])\nprint(json_response[\"conclusion\"])\n# [{'reasoning_step': 'Both 9.11 and 9.9 are decimal numbers.'},\n#  {'reasoning_step': 'When comparing decimal numbers, we look at the numbers after the decimal point.'},\n#  {'reasoning_step': 'In this case, 9.11 has the number 1 after the decimal point, while 9.9 has the number 9.'},\n#  {'reasoning_step': 'Since 1 is greater than 9, 9.11 is greater than 9.9.'}]\n# '9.11 is bigger.'\n```\n\nWe notice that the 4th reasoning step is wrong ``Since 1 is greater than 9, 9.11 is greater than 9.9.'', so we should probably give the model some examples for this particular task.\n\nThis example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).\n"
  },
  {
    "path": "docs/examples/classification.md",
    "content": "# Classification\n\nClassification is a classic problem in NLP and finds many applications: spam detection, sentiment analysis, triaging of incoming requests, etc. We will use the example of a company that wants to sort support requests between those that require immediate attention (`URGENT`), those that can wait a little (`STANDARD`). You could easily extend the example by adding new labels.\n\n\nThis tutorial shows how one can implement multi-label classification using Outlines.\n\nAs always, we start with initializing the model. Since we are GPU poor we will be using a quantized version of Mistal-7B-v0.1:\n\n```python\nimport outlines\nimport transformers\n\nMODEL_NAME = \"TheBloke/Mistral-7B-OpenOrca-AWQ\"\n\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),\n    transformers.AutoTokenizer.from_pretrained(MODEL_NAME)\n)\n```\n\nWe will use a prompt template stored in a text file:\n\n```python\nfrom outlines import Template\n\ncustomer_support = Template.from_file(\"prompt_templates/classification.txt\")\n```\n\n## Choosing between multiple choices\n\nOutlines provides a convenient way to do multi-label classification, passing a Literal type hint to the `outlines.Generator` object:\n\n```python\nfrom typing import Literal\nimport outlines\n\ngenerator = outlines.Generator(model, Literal[\"URGENT\", \"STANDARD\"])\n\n```\nOutlines supports batched requests, so we will pass two requests to the model:\n\n```python\nrequests = [\n    \"My hair is one fire! Please help me!!!\",\n    \"Just wanted to say hi\"\n]\n\nprompts = [customer_support(request=request) for request in requests]\n```\n\nWe can now ask the model to classify the requests:\n\n```python\nlabels = generator(prompts)\nprint(labels)\n# ['URGENT', 'STANDARD']\n```\n\n## Using JSON-structured generation\n\nAnother (convoluted) way to do multi-label classification is to JSON-structured generation in Outlines. We first need to define our Pydantic schema that contains the labels:\n\n```python\nfrom enum import Enum\nfrom pydantic import BaseModel\n\n\nclass Label(str, Enum):\n    urgent = \"URGENT\"\n    standard = \"STANDARD\"\n\n\nclass Classification(BaseModel):\n    label: Label\n```\n\nWe can then create a generator with the Pydantic model we just defined and call it:\n\n```python\ngenerator = outlines.Generator(model, Classification)\nlabels = generator(prompts)\nprint(labels)\n# ['{\"label\":\"URGENT\"}', '{ \"label\": \"STANDARD\" }']\n```\n"
  },
  {
    "path": "docs/examples/dating_profiles.md",
    "content": "# Generate a synthetic dating profile from a description\n\nIn this example we will see how we can use Outlines to generate synthetic data for a dating application. This example was originally contributed by [Vibhor Kumar](https://github.com/veezbo).\n\n```python\nimport json\nfrom dataclasses import dataclass\nfrom enum import Enum\n\nimport torch\nimport transformers\nfrom pydantic import BaseModel, conlist, constr\n\nimport outlines\n```\n\n## Defining the profile with Pydantic\n\nHere a dating profile will consist in a biography, a job, a list of interests and two question-answer pairs. The questions are written in advance by the team, and the users are asked to provide an answer:\n\n```python\nclass QuestionChoice(str, Enum):\n    A = \"The key to my heart is\"\n    B = \"The first item on my bucket list is\"\n    C = \"Perks of dating me\"\n    D = \"Message me if you also love\"\n    E = \"People would describe me as\"\n    F = \"I can beat you in a game of\"\n\n@dataclass\nclass QuestionAnswer:\n    question: QuestionChoice\n    answer: str\n```\n\nUsers need to provide a short biography, with a minimum of 10 and a maximum of 300 characters. The application also limits job descriptions to 50 characters. In addition to the question-answer pairs, the user is required to provide a list of between 1 and 5 interests:\n\n```python\nclass DatingProfile(BaseModel):\n    bio: constr(str, min_length=10, max_length=300)\n    job: constr(str, max_lengt=50)\n    interests: conlist(str, min_length=1, max_length=5)  # type: ignore\n    qna1: QuestionAnswer\n    qna2: QuestionAnswer\n```\n\n## Prompt template and examples\n\nWe will ask the model to generate profiles from a high-level description:\n\n```python\n@dataclass\nclass Example:\n    description: str\n    profile: DatingProfile\n```\n\nWe will use Outlines' prompt templating abilities to generate the prompt for us. This help clearly separate the general prompting logic from what is specific to an example.\n\n```python\nfrom outlines import Template\n\ndating_profile_prompt = Template.from_string(\n    \"\"\"\n    You are a world-renowned matchmaker who understands the modern dating\n    market. Your job is to generate dating app profiles for male clients\n    interested in women based on a provided description. The profiles should be\n    authentic, show off their strengths, and maximize their likelihood of\n    getting matches on dating apps.  Here are some examples of past clients that\n    you have successfully created profiles for:\n\n    {% for example in examples %}\n    Description:\n    {{ example.description }}\n    Profile:\n    {{ example.profile }}\n    {% endfor %}\n\n    Here is the new client who you need to create a profile for:\n    Description: {{ description }}\n    Profile:\n    \"\"\"\n)\n```\n\nWe will provide the model with several few-shot examples:\n\n```python\nsamples: list[Example] = [\n    Example(\n        description=\"I'm an author and former professional soccer player living in Seattle who publishes popular fiction books. A typical day for me starts by hanging out with my cat, drinking a coffee, and reading as much as I can in a few hours. Then, I'll prepare a quick smoothie before starting to write for a few hours, take a break with soccer or running a few miles, and finally meet friends for dinner at a new, hip restaurant in the evening. Sometimes we go axe-throwing afterwards, or play poker, or watch a comedy show, or visit a dive bar. On my vacations, I travel extensively to countries South America, Europe, and Asia, with the goal of visiting them all!\",\n        profile=DatingProfile(\n            bio=\"Adventurer, dreamer, author, and soccer enthusiast. Life’s too short to waste time so I make the most of each day by exploring new places and playing with my friends on the pitch. What’s your favorite way to get out and have fun?\",\n            job=\"Famous Soccer Player -> Famous Author\",\n            interests=[\"Soccer\", \"Travel\", \"Friends\", \"Books\", \"Fluffy Animals\"],\n            qna1=QuestionAnswer(\n                question=QuestionChoice.B, answer=\"swim in all seven oceans!\"\n            ),\n            qna2=QuestionAnswer(\n                question=QuestionChoice.E,\n                answer=\"fun-loving, adventurous, and a little bit crazy\",\n            ),\n        ),\n    ),\n    Example(\n        description=\"I run my company and build houses for a living. I'm a big fan of the outdoors and love to go hiking, camping, and fishing. I don't like video games, but do like to watch movies. My love language is home-cooked food, and I'm looking for someone who isn't afraid to get their hands dirty.\",\n        profile=DatingProfile(\n            bio=\"If you're looking for a Montana man who loves to get outdoors and hunt, and who's in-tune with his masculinity then I'm your guy!\",\n            job=\"House Construction Manager / Entrepreneur\",\n            interests=[\"Hunting\", \"Hiking\", \"The outdoors\", \"Home-cooked food\"],\n            qna1=QuestionAnswer(question=QuestionChoice.A, answer=\"food made at home\"),\n            qna2=QuestionAnswer(\n                question=QuestionChoice.C,\n                answer=\"having a man in your life who can fix anything\",\n            ),\n        ),\n    ),\n    Example(\n        description=\"I run my own Youtube channel with 10M subscribers. I love working with kids, and my audience skews pretty young too. In my free time, I play Fortnite and Roblox. I'm looking for someone who is also a gamer and likes to have fun. I'm learning Japanese in my free time as well as how to cook.\",\n        profile=DatingProfile(\n            bio=\"Easy on the eyes (find me on Youtube!) and great with kids. What more do you need?\",\n            job=\"Youtuber 10M+ subscribers\",\n            interests=[\"Kids\", \"Gaming\", \"Japanese\"],\n            qna1=QuestionAnswer(question=QuestionChoice.D, answer=\"anime and gaming!\"),\n            qna2=QuestionAnswer(question=QuestionChoice.F, answer=\"Fortnite, gg ez\"),\n        ),\n    ),\n]\n```\n\n## Load the model\n\nWe will use Mosaic's MPT-7B model (requires 13GB of GPU memory) which can fit on a single GPU with a reasonable context window. We initialize it with Outlines:\n\n```python\nMODEL_NAME = \"mosaicml/mpt-7b-8k-instruct\"\n\nconfig = transformers.AutoConfig.from_pretrained(\n    MODEL_NAME, trust_remote_code=True\n)\nconfig.init_device = \"meta\"\nmodel_kwargs = {\n    \"config\": config,\n    \"trust_remote_code\": True,\n    \"torch_dtype\": torch.bfloat16,\n    \"device_map\": \"cuda\",\n}\ntf_model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME, **model_kwargs)\ntf_tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)\nmodel = outlines.from_transformers(tf_model, tokenizer=tf_tokenizer)\n```\n\n## JSON-structured generation of profiles\n\nWe will now generate a dating profile from a textual description of oneself:\n\n``` python\nnew_description = \"\"\"I'm a laid-back lawyer who spends a lot of his free-time\ngaming. I work in a corporate office, but ended up here after the start-up  I\ncofounded got acquired, so still play ping pong with my cool coworkers every\nday.  I have a bar at home where I make cocktails, which is great for\nentertaining  friends. I secretly like to wear suits and get a new one tailored\nevery few  months. I also like weddings because I get to wear those suits, and\nit's  a good excuse for a date. I watch the latest series because I'm paying,\nwith my hard-earned money, for every streaming service.\"\"\"\n\nprompt = dating_profile_prompt(description=new_description, examples=samples)\nprofile = model(prompt, DatingProfile)\nparsed_profile = DatingProfile.model_validate_json(json.loads(profile))\n```\n\n## Results\n\nHere are a couple of results:\n\n```json\n{\n    \"bio\": \"\"\"I'm an ambitious lawyer with a casual and fashionable style. I love\n    games and sports, but my true passion is preparing refreshing cocktails at\n    home and dressing to the nines at weddings. I'm currently looking for a woman\n    to show a good time to and get a kiss on the opulent suit I just had made.\n    Send resume to this inbox.\"\"\",\n    \"job\": \"Lawyer\",\n    \"interests\":\n    [\n        \"Stylish guys\",\n        \"Gaming\",\n        \"Ping pong\",\n        \"Cocktails\",\n        \"Weddings\"\n    ],\n    \"qna1\":\n    {\n        \"question\": \"The first item on my bucket list is\",\n        \"answer\": \"be married and have a family.\"\n    },\n    \"qna2\":\n    {\n        \"question\": \"People would describe me as\",\n        \"answer\": \"charming, stylish, and funny.\"\n    }\n}\n```\n\n```json\n{\n    \"bio\": \"\"\"I’m a sexy lawyer with time on my hands. I love to game and\n    play ping pong, but the real reason you should swipe to the right\n    is because I look great in a suit. Who doesn’t love a man in a\n    suit? Just saying. Send me a message if you think it’s time to take\n    your dating life to the next level.\"\"\",\n    \"job\": \"Lawyer\",\n    \"interests\":\n    [\n        \"Gaming\",\n        \"Ping Pong\",\n        \"Tailored Suits\",\n        \"Weddings\",\n        \"Streaming Services\"\n    ],\n    \"qna1\":\n    {\n        \"question\": \"The first item on my bucket list is\",\n        \"answer\": \"simulate space but stay alive for as long as possible\"\n    },\n    \"qna2\":\n    {\n        \"question\": \"People would describe me as\",\n        \"answer\": \"easy-going, a little nerdy but with a mature essence\"\n    }\n}\n```\n"
  },
  {
    "path": "docs/examples/deploy-using-bentoml.md",
    "content": "# Run Outlines using BentoML\n\n[BentoML](https://github.com/bentoml/BentoML) is an open-source model serving library for building performant and scalable AI applications with Python. It comes with tools that you need for serving optimization, model packaging, and production deployment.\n\nIn this guide, we will show you how to use BentoML to run programs written with Outlines on GPU locally and in [BentoCloud](https://www.bentoml.com/), an AI Inference Platform for enterprise AI teams. The example source code in this guide is also available in the [examples/bentoml/](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/) directory.\n\n## Import a model\n\nFirst we need to download an LLM (Mistral-7B-v0.1 in this example and you can use any other LLM) and import the model into BentoML's [Model Store](https://docs.bentoml.com/en/latest/guides/model-store.html). Let's install BentoML and other dependencies from PyPi (preferably in a virtual environment):\n\n```shell\npip install -r requirements.txt\n```\n\nThen save the code snippet below as `import_model.py` and run `python import_model.py`.\n\n**Note**: You need to accept related conditions on [Hugging Face](https://huggingface.co/mistralai/Mistral-7B-v0.1) first to gain access to Mistral-7B-v0.1.\n\n```python\nimport bentoml\n\nMODEL_ID = \"mistralai/Mistral-7B-v0.1\"\nBENTO_MODEL_TAG = MODEL_ID.lower().replace(\"/\", \"--\")\n\ndef import_model(model_id, bento_model_tag):\n\n    import torch\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n\n    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)\n    model = AutoModelForCausalLM.from_pretrained(\n        MODEL_ID,\n        torch_dtype=torch.float16,\n        low_cpu_mem_usage=True,\n    )\n\n    with bentoml.models.create(bento_model_tag) as bento_model_ref:\n        tokenizer.save_pretrained(bento_model_ref.path)\n        model.save_pretrained(bento_model_ref.path)\n\n\nif __name__ == \"__main__\":\n    import_model(MODEL_ID, BENTO_MODEL_TAG)\n```\n\nYou can verify the download is successful by running:\n\n```shell\n$ bentoml models list\n\nTag                                          Module  Size        Creation Time\nmistralai--mistral-7b-v0.1:m7lmf5ac2cmubnnz          13.49 GiB   2024-04-25 06:52:39\n```\n\n## Define a BentoML Service\n\nAs the model is ready, we can define a [BentoML Service](https://docs.bentoml.com/en/latest/guides/services.html) to wrap the capabilities of the model.\n\nWe will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema:\n\n```python\nDEFAULT_SCHEMA = \"\"\"{\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\n            \"title\": \"Name\",\n            \"maxLength\": 10,\n            \"type\": \"string\"\n        },\n        \"age\": {\n            \"title\": \"Age\",\n            \"type\": \"integer\"\n        },\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\n            \"title\": \"Strength\",\n            \"type\": \"integer\"\n        }\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\"\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\"\n        }\n    }\n}\"\"\"\n```\n\nFirst, we need to define a BentoML service by decorating an ordinary class (`Outlines` here) with `@bentoml.service` decorator. We pass to this decorator some configuration and GPU on which we want this service to run in BentoCloud (here an L4 with 24GB memory):\n\n```python\nimport typing as t\nimport bentoml\n\nfrom import_model import BENTO_MODEL_TAG\n\n@bentoml.service(\n    traffic={\n        \"timeout\": 300,\n    },\n    resources={\n        \"gpu\": 1,\n        \"gpu_type\": \"nvidia-l4\",\n    },\n)\nclass Outlines:\n\n    bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG)\n\n    def __init__(self) -> None:\n        import outlines\n        import torch\n        from transformers import AutoModelForCausalLM, AutoTokenizer\n\n        # Load tokenizer and model from the BentoML model reference path\n        hf_tokenizer = AutoTokenizer.from_pretrained(self.bento_model_ref.path)\n        hf_model = AutoModelForCausalLM.from_pretrained(\n            self.bento_model_ref.path,\n            torch_dtype=torch.float16,\n            low_cpu_mem_usage=True,\n            device_map=\"cuda\"\n        )\n\n        # Then use the loaded model with Outlines\n        self.model = outlines.from_transformers(hf_model, hf_tokenizer)\n\n    ...\n```\n\nWe then need to define an HTTP endpoint using `@bentoml.api` to decorate the method `generate` of `Outlines` class:\n\n```python\n    ...\n\n    @bentoml.api\n    async def generate(\n        self,\n        prompt: str = \"Give me a character description.\",\n        json_schema: t.Optional[str] = DEFAULT_SCHEMA,\n    ) -> t.Dict[str, t.Any]:\n        import json\n        import outlines\n        from outlines.types import JsonSchema\n\n        generator = outlines.Generator(self.model, JsonSchema(json_schema))\n        character = generator(prompt)\n\n        return json.loads(character)\n```\n\nHere `@bentoml.api` decorator defines `generate` as an HTTP endpoint that accepts a JSON request body with two fields: `prompt` and `json_schema` (optional, which allows HTTP clients to provide their own JSON schema). The type hints in the function signature will be used to validate incoming JSON requests. You can define as many HTTP endpoints as you want by using `@bentoml.api` to decorate other methods of `Outlines` class.\n\nNow you can save the above code to `service.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/)), and run the code using the BentoML CLI.\n\n## Run locally for testing and debugging\n\nThen you can run a server locally by:\n\n```shell\nbentoml serve .\n```\n\nThe server is now active at <http://localhost:3000>. You can interact with it using the Swagger UI or in other different ways:\n\n<details>\n\n<summary>CURL</summary>\n\n```shell\ncurl -X 'POST' \\\n  'http://localhost:3000/generate' \\\n  -H 'accept: application/json' \\\n  -H 'Content-Type: application/json' \\\n  -d '{\n  \"prompt\": \"Give me a character description.\"\n}'\n```\n\n</details>\n\n<details>\n\n<summary>Python client</summary>\n\n```python\nimport bentoml\n\nwith bentoml.SyncHTTPClient(\"http://localhost:3000\") as client:\n    response = client.generate(\n        prompt=\"Give me a character description\"\n    )\n    print(response)\n```\n\n</details>\n\nExpected output:\n\n```shell\n{\n  \"name\": \"Aura\",\n  \"age\": 15,\n  \"armor\": \"plate\",\n  \"weapon\": \"sword\",\n  \"strength\": 20\n}\n```\n\n## Deploy to BentoCloud\n\nAfter the Service is ready, you can deploy it to [BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/get-started.html) for better management and scalability. [Sign up](https://cloud.bentoml.com/signup) if you haven't got a BentoCloud account.\n\nMake sure you have [logged in to BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html), then run the following command to deploy it.\n\n```shell\nbentoml deploy .\n```\n\nOnce the application is up and running on BentoCloud, you can access it via the exposed URL.\n\n**Note**: For custom deployment in your own infrastructure, use [BentoML to generate an OCI-compliant image](https://docs.bentoml.com/en/latest/guides/containerization.html).\n"
  },
  {
    "path": "docs/examples/deploy-using-cerebrium.md",
    "content": "# Run Outlines using Cerebrium\n\n[Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications. They offer Serverless GPU's with low cold start times with over 12 varieties of GPU chips that auto scale and you only pay for the compute you use.\n\nIn this guide we will show you how you can use Cerebrium to run programs written with Outlines on GPUs in the cloud.\n\n# Setup Cerebrium\n\nFirst, we install Cerebrium and login to get authenticated.\n\n```shell\npip install cerebrium\ncerebrium login\n```\n\nThen let us create our first project\n\n```shell\ncerebrium init outlines-project\n```\n\n## Setup Environment and Hardware\n\nYou set up your environment and hardware in the cerebrium.toml file that was created using the init function above.\n\n```toml\n[cerebrium.deployment]\ndocker_base_image_url = \"nvidia/cuda:12.1.1-runtime-ubuntu22.04\"\n\n[cerebrium.hardware]\ncpu = 2\nmemory = 14.0\ngpu = \"AMPERE A10\"\ngpu_count = 1\nprovider = \"aws\"\nregion = \"us-east-1\"\n\n[cerebrium.dependencies.pip]\noutline = \"==1.0.0\"\ntransformers = \"==4.38.2\"\ndatasets = \"==2.18.0\"\naccelerate = \"==0.27.2\"\n```\n\n## Setup inference\n\nRunning code in Cerebrium is like writing normal python with no special syntax. In a `main.py` file specify the following:\n\n```python\nimport outlines\nimport transformers\nfrom outlines.types import JsonSchema\n\n\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    transformers.AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nschema = \"\"\"{\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\n            \"title\": \"Name\",\n            \"maxLength\": 10,\n            \"type\": \"string\"\n        },\n        \"age\": {\n            \"title\": \"Age\",\n            \"type\": \"integer\"\n        },\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\n            \"title\": \"Strength\",\n            \"type\": \"integer\"\n        }\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\"\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\"\n        }\n    }\n}\"\"\"\n\ngenerator = outlines.Generator(model, JsonSchema(schema))\n```\n\nOn first deploy, it will download the model and store it on disk therefore for subsequent calls it will load the model from disk.\n\nEvery function in Cerebrium is callable through an API endpoint. Code at the top most layer (ie: not in a function) is instantiated only when the container is spun up the first time so for subsequent calls, it will simply run the code defined in the function you call.\n\nTo deploy an API that creates a new character when called with a prompt you can add the following code to `main.py`:\n\n```python\ndef generate(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n\n    character = generator(\n        f\"<s>[INST]Give me a character description. Describe {prompt}.[/INST]\"\n    )\n\n    return character\n```\n\n\n## Run on the cloud\n\n```shell\ncerebrium deploy\n```\n\nYou will see your application deploy, install pip packages and download the model. Once completed it will output a CURL request you can use to call your endpoint. Just remember to end\nthe url with the function you would like to call - in this case /generate. You should see your response returned!\n"
  },
  {
    "path": "docs/examples/deploy-using-modal.md",
    "content": "# Run Outlines using Modal\n\n[Modal](https://modal.com/) is a serverless platform that allows you to easily run code on the cloud, including GPUs. It can come very handy for those of us who don't have a monster GPU at home and want to be able to quickly and easily provision, configure and orchestrate cloud infrastructure.\n\nIn this guide we will show you how you can use Modal to run programs written with Outlines on GPU in the cloud.\n\n## Requirements\n\nWe recommend installing `modal` and `outlines` in a virtual environment. You can create one with:\n\n```shell\npython -m venv venv\nsource venv/bin/activate\n```\n\nThen install the required packages:\n\n```shell\npip install modal outlines\n```\n\n## Build the image\n\nFirst we need to define our container image. If you need to access a gated model, you will need to provide an [access token](https://huggingface.co/settings/tokens). See the `.env` call below for how to provide a HuggingFace token.\n\nSetting a token is best done by setting an environment variable `HF_TOKEN` with your token. If you do not wish to do this, we provide a commented-out line in the code to set the token directly in the code.\n\n```python\nfrom modal import Image, App, gpu\nimport os\n\n# This creates a modal App object. Here we set the name to \"outlines-app\".\n# There are other optional parameters like modal secrets, schedules, etc.\n# See the documentation here: https://modal.com/docs/reference/modal.App\napp = App(name=\"outlines-app\")\n\n# Specify a language model to use.\n# Another good model to use is \"NousResearch/Hermes-2-Pro-Mistral-7B\"\nlanguage_model = \"mistral-community/Mistral-7B-v0.2\"\n\n# Please set an environment variable HF_TOKEN with your Hugging Face API token.\n# The code below (the .env({...}) part) will copy the token from your local\n# environment to the container.\n# More info on Image here: https://modal.com/docs/reference/modal.Image\noutlines_image = Image.debian_slim(python_version=\"3.11\").pip_install(\n    \"outlines\",\n    \"transformers\",\n    \"datasets\",\n    \"accelerate\",\n    \"sentencepiece\",\n).env({\n    # This will pull in your HF_TOKEN environment variable if you have one.\n    'HF_TOKEN':os.environ['HF_TOKEN']\n\n    # To set the token directly in the code, uncomment the line below and replace\n    # 'YOUR_TOKEN' with the HuggingFace access token.\n    # 'HF_TOKEN':'YOUR_TOKEN'\n})\n```\n\n## Setting the container up\n\nWhen running longer Modal apps, it's recommended to download your language model when the container starts, rather than when the function is called. This will cache the model for future runs.\n\n```python\n# This function imports the model from Hugging Face. The modal container\n# will call this function when it starts up. This is useful for\n# downloading models, setting up environment variables, etc.\ndef import_model():\n    import outlines\n    import transformers\n\n    outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(language_model),\n        transformers.AutoTokenizer.from_pretrained(language_model)\n    )\n\n# This line tells the container to run the import_model function when it starts.\noutlines_image = outlines_image.run_function(import_model)\n```\n\n## Define a schema\n\nWe will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema:\n\n```python\n# Specify a schema for the character description. In this case,\n# we want to generate a character with a name, age, armor, weapon, and strength.\nschema = \"\"\"{\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\n            \"title\": \"Name\",\n            \"maxLength\": 10,\n            \"type\": \"string\"\n        },\n        \"age\": {\n            \"title\": \"Age\",\n            \"type\": \"integer\"\n        },\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\n            \"title\": \"Strength\",\n            \"type\": \"integer\"\n        }\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\"\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\"\n        }\n    }\n}\"\"\"\n```\n\nTo make the inference work on Modal we need to wrap the corresponding function in a `@app.function` decorator. We pass to this decorator the image and GPU on which we want this function to run.\n\nLet's choose an A100 with 80GB memory. Valid GPUs can be found [here](https://modal.com/docs/reference/modal.gpu).\n\n```python\n# Define a function that uses the image we chose, and specify the GPU\n# and memory we want to use.\n@app.function(image=outlines_image, gpu=gpu.A100(size='80GB'))\ndef generate(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n    # Remember, this function is being executed in the container,\n    # so we need to import the necessary libraries here. You should\n    # do this with any other libraries you might need.\n    import outlines\n    import transformers\n    from outlines.types import JsonSchema\n\n    # Load the model into memory. The import_model function above\n    # should have already downloaded the model, so this call\n    # only loads the model into GPU memory.\n    outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(language_model, device_map=\"cuda\"),\n        transformers.AutoTokenizer.from_pretrained(language_model)\n    )\n\n    # Generate a character description based on the prompt.\n    # We use the .json generation method -- we provide the\n    # - model: the model we loaded above\n    # - schema: the JSON schema we defined above\n    generator = outlines.Generator(model, JsonSchema(schema))\n\n    # Make sure you wrap your prompt in instruction tags ([INST] and [/INST])\n    # to indicate that the prompt is an instruction. Instruction tags can vary\n    # by models, so make sure to check the model's documentation.\n    character = generator(\n        f\"<s>[INST]Give me a character description. Describe {prompt}.[/INST]\"\n    )\n\n    # Print out the generated character.\n    print(character)\n```\n\nWe then need to define a `local_entrypoint` to call our function `generate` remotely.\n\n```python\n@app.local_entrypoint()\ndef main(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n    # We use the \"generate\" function defined above -- note too that we are calling\n    # .remote() on the function. This tells modal to run the function in our cloud\n    # machine. If you want to run the function locally, you can call .local() instead,\n    # though this will require additional setup.\n    generate.remote(prompt)\n```\n\nHere `@app.local_entrypoint()` decorator defines `main` as the function to start from locally when using the Modal CLI. You can save above code to `example.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/modal_example.py)). Let's now see how to run the code on the cloud using the Modal CLI.\n\n## Run on the cloud\n\nFirst install the Modal client from PyPi, if you have not already:\n\n```shell\npip install modal\n```\n\nYou then need to obtain a token from Modal. Run the following command:\n\n```shell\nmodal setup\n```\n\nOnce that is set you can run inference on the cloud using:\n\n```shell\nmodal run example.py\n```\n\nYou should see the Modal app initialize, and soon after see the result of the `print` function in your terminal. That's it!\n"
  },
  {
    "path": "docs/examples/earnings-reports.md",
    "content": "# Extracting financial data from earnings reports\n\nA common task in finance is to extract financial data from earnings reports. Earnings reports are infamously poorly formatted, as the SEC does not have requirements for producing machine-readable documents.\n\nEarnings reports are often provided as HTML documents, which can be difficult to parse. Investors often use complicated parsing systems or manual review to extract data. Entire companies are built around automating this task.\n\nThis cookbook is a proof of concept about how we can use LLMs to extract financial data directly into CSV. Comma-separated values are well-structured and can be defined by a regular expression, which Outlines can use to guide the LLM's output.\n\nThe example is a smaller subset of a full demo found [here](https://github.com/dottxt-ai/demos/tree/main/earnings-reports). The demo contains the full set of pre-processing steps needed to convert raw HTML into a structured CSV file, and tests the results across three company's 10k reports.\n\n## Setup\n\nInstall outlines and required dependencies:\n\n```shell\n# Later versions of torch can have difficulty with certain CUDA drivers.\n# We recommend using 2.4.0 for now, but you may wish to experiment with\n# other versions.\npip install outlines pandas transformers torch==2.4.0 accelerate\n```\n\n## Load the model\n\nChoose your language model. We'll use Phi-3 mini, which is small enough to run on reasonably small machines.\n\n```python\nimport outlines\nimport torch\nimport transformers\n\nmodel_name = 'microsoft/Phi-3-mini-4k-instruct'\ntf_model = transformers.AutoModelForCausalLM.from_pretrained(\n    model_name, device_map=\"cuda\", torch_dtype=torch.bfloat16\n)\ntf_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\nmodel = outlines.from_transformers(tf_model, tf_tokenizer)\n```\n\n## Set up the data\n\nFor brevity, we've attached the markdown version of Nvidia's 10k report. The [full demonstration](https://github.com/dottxt-ai/demos/tree/main/earnings-reports) processes the raw HTML version of the report to these markdown tables. Pages are filtered by whether they seem to contain income statements, and then compacted into the string you see below.\n\n```python\nincome_statement = \"\"\"\nTable of ContentsNVIDIA Corporation and SubsidiariesConsolidated Statements of Income(In millions, except per share data)\n\n|  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n|  | | | Year Ended | | | | | | | | | | | | | | |\n|  | | | Jan 28, 2024 | | |  | | | Jan 29, 2023 | | |  | | | Jan 30, 2022 | | |\n| Revenue | | | $ | 60,922 |  |  | | | $ | 26,974 |  |  | | | $ | 26,914 |  |\n| Cost of revenue | | | 16,621 | |  |  | | | 11,618 | |  |  | | | 9,439 | |  |\n| Gross profit | | | 44,301 | |  |  | | | 15,356 | |  |  | | | 17,475 | |  |\n| Operating expenses | | |  | | |  | | |  | | |  | | |  | | |\n| Research and development | | | 8,675 | |  |  | | | 7,339 | |  |  | | | 5,268 | |  |\n| Sales, general and administrative | | | 2,654 | |  |  | | | 2,440 | |  |  | | | 2,166 | |  |\n| Acquisition termination cost | | |  | |  |  | | | 1,353 | |  |  | | |  | |  |\n| Total operating expenses | | | 11,329 | |  |  | | | 11,132 | |  |  | | | 7,434 | |  |\n| Operating income | | | 32,972 | |  |  | | | 4,224 | |  |  | | | 10,041 | |  |\n| Interest income | | | 866 | |  |  | | | 267 | |  |  | | | 29 | |  |\n| Interest expense | | | (257) | |  |  | | | (262) | |  |  | | | (236) | |  |\n| Other, net | | | 237 | |  |  | | | (48) | |  |  | | | 107 | |  |\n| Other income (expense), net | | | 846 | |  |  | | | (43) | |  |  | | | (100) | |  |\n| Income before income tax | | | 33,818 | |  |  | | | 4,181 | |  |  | | | 9,941 | |  |\n| Income tax expense (benefit) | | | 4,058 | |  |  | | | (187) | |  |  | | | 189 | |  |\n| Net income | | | $ | 29,760 |  |  | | | $ | 4,368 |  |  | | | $ | 9,752 |  |\n|  | | |  | | |  | | |  | | |  | | |  | | |\n| Net income per share: | | |  | | |  | | |  | | |  | | |  | | |\n| Basic | | | $ | 12\\.05 |  |  | | | $ | 1\\.76 |  |  | | | $ | 3\\.91 |  |\n| Diluted | | | $ | 11\\.93 |  |  | | | $ | 1\\.74 |  |  | | | $ | 3\\.85 |  |\n|  | | |  | | |  | | |  | | |  | | |  | | |\n| Weighted average shares used in per share computation: | | |  | | |  | | |  | | |  | | |  | | |\n| Basic | | | 2,469 | |  |  | | | 2,487 | |  |  | | | 2,496 | |  |\n| Diluted | | | 2,494 | |  |  | | | 2,507 | |  |  | | | 2,535 | |  |\n\"\"\"\n```\n\nThe markdown tables extracted from the earnings reports can vary widely in row names, column counts, data types, etc. The advantage of LLMs here is that we can define the data we want in terms of the data types, and the LLM will output the data in the desired format.\n\nFor comparison, here is how the income statement looks in the original HTML:\n\n![Nvidia income statement](./images/nvidia-income.png)\n\n## Define the data we want\n\nOutlines is often used for JSON output, but it can also be used for CSV. We know the columns we want to extract, and we know the data types of the columns. Year for example is always a four-digit number, revenue is a number with commas, and so on.\n\nWe can define a regex pattern for each column type:\n\n```python\n# Define the column type regex patterns\ncolumn_types = {\n    # Year is always a four-digit number\n    \"year\": r\"\\d{4}\",\n\n    # Revenue, operating income, and net income are always numbers with commas.\n    # This regex permits integers that may begin with a minus sign, and may have\n    # commas separating the thousands, millions, etc.\n    \"integer_comma\": r\"((-?\\d+),?\\d+|(-?\\d+))\",\n    # Number is currently not used, but it represents a number with up to two decimal places.\n    \"number\": r\"(-?\\d+(?:\\.\\d{1,2})?)\",\n}\n```\n\nNext, let's choose the columns we want to extract. We want\n\n- Year, always a four-digit number\n- Revenue, a number with commas\n- Operating income, a number with commas\n- Net income, a number with commas\n\n```python\n# Define the columns to extract, and their data types.\ncolumns_to_extract = {\n    \"year\": \"year\",\n    \"revenue\": \"integer_comma\",\n    \"operating_income\": \"integer_comma\",\n    \"net_income\": \"integer_comma\",\n}\n```\n\nYou can modify `column_type_regex` to match the data types of the columns you want to extract.  Adding a new financial metric to extract is as simple as adding a new key/value pair to `columns_to_extract`:\n\n```python\ncolumns_to_extract[\"diluted_earnings_per_share\"] = \"number\"\n```\n\nAdditional columns are not well tested for accuracy, so use with caution.\n\n## Create the regex describing the data we want\n\n\n```python\n# Create the header line. This is the requested column names\n# separated by commas, i.e. \"year,revenue,...\"\nheader = \",\".join(columns_to_extract.keys())\n\n# Create the data capture patterns. These are the regex patterns\n# that will be used to capture the data in each column\ndata_patterns = [column_types[dtype] for dtype in columns_to_extract.values()]\ndata_line = \",\".join(data_patterns)\n\n# Our final regex pattern.\nmax_rows = 3 # We expect 3 rows of data, firms usually report 3 years of income statements\ncsv_regex = f\"{header}(\\n{data_line}){{,{max_rows}}}\\n\\n\"\n\nprint(csv_regex)\n```\n\nwhich gives us\n\n```\nyear,revenue,operating_income,net_income,basic_earnings_per_share(\n\\d{4},((-?\\d+),?\\d+|(-?\\d+)),((-?\\d+),?\\d+|(-?\\d+)),((-?\\d+),?\\d+|(-?\\d+)),(-?\\d+(?:\\.\\d{1,2})?)){,3}\n```\n\nPretty hairy, right? Thankfully, we have a simple function to construct this regex for you. The regex defines a header line, followed by a data line that repeats for each row of data we want to extract. Passing the regex to `outlines.Generator` will produce a function that will __always__ produce a CSV string that is consistent with the regex.\n\n## Prompting the model\n\nOutlines does not add system or instruction tokens by default, so we need to use `transformers.AutoTokenizer` to add them for whatever model we're using.\n\n```python\nfrom transformers import AutoTokenizer\n\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\ndef add_instruction(prompt):\n    return tokenizer.apply_chat_template([{\"role\": \"user\", \"content\": prompt}], tokenize=False, add_generation_prompt=True)\n\nprint(add_instruction(\"Howdy\"))\n```\n```\n<|user|>\nHowdy<|end|>\n<|assistant|>\n```\n\nOur prompt roughly describes the task we want the model to perform, and a few pieces of information it may need to know about income statements.\n\n```python\ndef extract_financial_data_prompt(columns_to_extract, income_statement):\n    user_prompt = f\"\"\"\n    Extract annual financial data from this set of pages. Pages\n    are from a 10k filing and were chosen because they may contain\n    a comprehensive income statement. Note that selected pages may\n    be incorrectly extracted, so you should verify that you are extracting\n    from the comprehensive income statement and not some other financial\n    statement.\n\n    Create a row for each year available in the income statement with the\n    following columns: {', '.join(columns_to_extract.keys())}. Firms typically report the\n    most recent 3 years of data, but this can vary.\n\n    Each column has types: {', '.join(columns_to_extract.values())}.\n\n    # Relevant pages:\n\n    {income_statement}\n\n    # Key instructions:\n\n    1. Look ONLY at the \"Consolidated Statements of Income\" table\n    2. For operating income, look for \"Income from operations\" or \"Operating income\"\n    3. For net income, use the TOTAL net income figure, not amounts allocated to specific share classes\n    4. Use NULL for missing values\n    5. Operating income must be less than revenue\n    6. Net income must be less than operating income\n    7. Ignore segment breakdowns, quarterly data, or per-share amounts\n\n    # Output format:\n\n    - CSV format with headers: {','.join(columns_to_extract.keys())}\n    - Use NULL for missing values\n    - If no data are found, do not create a row.\n    - Enter two newline characters to terminate the CSV when no more data are found.\n\n    # Definitions:\n    - Revenue: Total sales of goods and services. Usually this is at the top of the\n    income statement.\n    - Operating income: Revenue minus operating expenses for the entire company. This is revenue\n    minus costs. Operating income is also called operating profit, EBIT, or income from\n    operations.\n    - Net income: Operating income minus taxes. This is the bottom line of the\n    income statement.\n    \"\"\"\n\n    return add_instruction(user_prompt)\n```\n\n## Running the model\n\nNow that we have our prompt and regular expression, we can run the model.\n\nConstruct our regex extractor function.\n\n```python\nfrom outlines.types import Regex\n\ncsv_extractor = outlines.Generator(model, Regex(csv_regex))\n```\n\nProvide the prompt to the model and run it:\n\n```python\ncsv_data = csv_extractor(\n    extract_financial_data_prompt(columns_to_extract, income_statement),\n    max_new_tokens=1024,\n)\n\nprint(csv_data)\n```\n```\nyear,revenue,operating_income,net_income\n2024,60922,32972,29760\n2023,26974,4224,4368\n2022,26914,10041,9752\n```\n\nVoila! We've extracted the financial data from the income statement, and it's correct upon inspection.\n\nYou can even load this into a `pandas` DataFrame for further analysis:\n\n```python\nimport pandas as pd\nfrom io import StringIO\n\ndf = pd.read_csv(StringIO(csv_data))\nprint(df)\n```\n```\n   year  revenue  operating_income  net_income\n0  2024    60922             32972       29760\n1  2023    26974              4224        4368\n2  2022    26914             10041        9752\n```\n"
  },
  {
    "path": "docs/examples/extract_event_details.md",
    "content": "This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message.\nWe will extract the title, location, and start date and time from messages like the following:\n\n```plaintext\nHello Kitty, my grandmother will be here, I think it's better to postpone\nour appointment to review math lessons to next Monday at 2pm at the same\nplace, 3 avenue des tanneurs, one hour will be enough see you 😘\n```\n\nLet see how to extract the event details from the message with the MLX\nlibrary dedicated to Apple Silicon processor (M series).\n\n```python\n--8<-- \"docs/cookbook/extract_event_details.py\"\n```\n\nThe output will be:\n\n```plaintext\nToday: Saturday 16 November 2024 and it's 10:55\n```\n\nand the extracted event information will be:\n\n```json\n{\n  \"title\":\"Math Review\",\n  \"location\":\"3 avenue des tanneurs\",\n  \"start\":\"2024-11-22T14:00:00Z\"\n}\n```\n\n\nTo find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator)\n"
  },
  {
    "path": "docs/examples/extract_event_details.py",
    "content": "from datetime import datetime\n\nfrom mlx_lm import load\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outlines import Generator, Template\n\n\n# Load the model\nmodel = outlines.from_mlxlm(*load(\"mlx-community/Hermes-3-Llama-3.1-8B-8bit\"))\n\n\n# Define the event schema using Pydantic\nclass Event(BaseModel):\n    title: str = Field(description=\"title of the event\")\n    location: str\n    start: datetime = Field(\n        default=None, description=\"date of the event if available in iso format\"\n    )\n\n# Load the prompt template from a string\nprompt_template = Template.from_string(\n    \"\"\"\n    Today's date and time are {{ now }}\n    Given a user message, extract information of the event like date and time in iso format, location and title.\n    If the given date is relative, think step by step to find the right date.\n    Here is the message:\n    {{ message }}\n    \"\"\"\n)\n\n# Get the current date and time\nnow = datetime.now().strftime(\"%A %d %B %Y and it's %H:%M\")\n\n# Sample message\nmessage = \"\"\"Hello Kitty, my grandmother will be here, I think it's better to postpone our\nappointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough\nsee you 😘 \"\"\"\n\n# Create the generator\ngenerator = Generator(model, Event)\n\n# Create the prompt\nprompt = prompt_template(now=now, message=message)\n\n# Extract the event information\nevent = generator(prompt)\n\n# Print the current date and time\nprint(f\"Today: {now}\")\n\n# Print the extracted event information\nprint(event)\n"
  },
  {
    "path": "docs/examples/extraction.md",
    "content": "# Named entity extraction\n\nNamed Entity Extraction is a fundamental problem in NLP. It involves identifying and categorizing named entities within a document: people, organization, dates, places, etc. It is usually the first step in a more complex NLP worklow. Here we will use the example of a pizza restaurant that receives orders via their website and need to identify the number and types of pizzas that are being ordered.\n\nGetting LLMs to output the extracted entities in a structured format can be challenging. In this tutorial we will see how we can use Outlines' JSON-structured generation to extract entities from a document and return them in a valid JSON data structure 100% of the time.\n\nAs always, we start with initializing the model. We will be using a quantized version of Mistal-7B-v0.1 (we're GPU poor):\n\n```python\nimport transformers\nimport outlines\n\nmodel_name = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map=\"cuda\"),\n    transformers.AutoTokenizer.from_pretrained(model_name),\n)\n```\n\nAnd we will be using the following prompt template:\n\n```python\nfrom outlines import Template\n\ntake_order = Template.from_string(\n    \"\"\"You are the owner of a pizza parlor. Customers \\\n    send you orders from which you need to extract:\n\n    1. The pizza that is ordered\n    2. The number of pizzas\n\n    # EXAMPLE\n\n    ORDER: I would like one Margherita pizza\n    RESULT: {\"pizza\": \"Margherita\", \"number\": 1}\n\n    # OUTPUT INSTRUCTIONS\n\n    Answer in valid JSON. Here are the different objects relevant for the output:\n\n    Order:\n        pizza (str): name of the pizza\n        number (int): number of pizzas\n\n    Return a valid JSON of type \"Order\"\n\n    # OUTPUT\n\n    ORDER: {{ order }}\n    RESULT: \"\"\"\n)\n```\n\nWe now define our data model using Pydantic:\n\n```python\nfrom enum import Enum\nfrom pydantic import BaseModel\n\nclass Pizza(str, Enum):\n    margherita = \"Margherita\"\n    pepperonni = \"Pepperoni\"\n    calzone = \"Calzone\"\n\nclass Order(BaseModel):\n    pizza: Pizza\n    number: int\n```\n\nWe can now define our generator and call it on several incoming orders:\n\n```python\norders = [\n    \"Hi! I would like to order two pepperonni pizzas and would like them in 30mins.\",\n    \"Is it possible to get 12 margheritas?\"\n]\nprompts = [take_order(order=order) for order in orders]\n\ngenerator = outlines.Generator(model, Order)\n\nresults = generator(prompts)\nprint(results)\n# ['{\"pizza\": \"Pepperoni\", \"number\": 2}',\n# '{\"pizza\": \"Margherita\", \"number\": 12}']\n```\n\nThere are several ways you could improve this example:\n\n- Clients may order several types of pizzas.\n- Clients may order drinks as well.\n- If the pizza place has a delivery service we need to extract the client's address and phone number\n- Clients may specify the time for which they want the pizza. We could then check against a queuing system and reply to them with the estimated delivery time.\n\nHow would you change the Pydantic model to account for these use cases?\n"
  },
  {
    "path": "docs/examples/index.md",
    "content": "# Examples\n\nThis part of the documentation provides a few cookbooks that you can browse to get acquainted with the library and get some inspiration about what you could do with structured generation. Remember that you can easily change the model that is being used!\n\n- [Classification](classification.md): Classify customer requests.\n- [Named Entity Extraction](extraction.md): Extract information from pizza orders.\n- [Dating Profiles](dating_profiles.md): Build dating profiles from descriptions using prompt templating and JSON-structured generation.\n- [Chain Of Density](chain_of_density.md): Summarize documents using chain of density prompting and JSON-structured generation.\n- [Playing Chess](models_playing_chess.md): Make Phi-3 Mini play chess against itself using regex-structured generation.\n- [SimToM](simtom.md): Improve LLMs' Theory of Mind capabilities with perspective-taking prompting and JSON-structured generation.\n- [Q&A with Citations](qa-with-citations.md): Answer questions and provide citations using JSON-structured generation.\n- [Knowledge Graph Generation](knowledge_graph_extraction.md): Generate a Knowledge Graph from unstructured text using JSON-structured generation.\n- [Structured Generation Workflow](structured_generation_workflow.md):\n- [Chain Of Thought (CoT)](chain_of_thought.md): Generate a series of intermediate reasoning steps using regex-structured generation.\n- [ReAct Agent](react_agent.md): Build an agent with open weights models using regex-structured generation.\n- [Structured Generation from PDFs](read-pdfs.md): Use Outlines with vision-language models to read PDFs and produce structured output.\n- [Earnings reports to CSV](earnings-reports.md): Extract data from earnings reports to CSV using regex-structured generation.\n- [Receipt Digitization](receipt-digitization.md): Extract information from a picture of a receipt using structured generation.\n- [Extract Events Details](extract_event_details.md):\n\nRun Outlines on the cloud:\n\n- [BentoML](deploy-using-bentoml.md)\n- [Cerebrium](deploy-using-cerebrium.md)\n- [Modal](deploy-using-modal.md)\n"
  },
  {
    "path": "docs/examples/knowledge_graph_extraction.md",
    "content": "# Knowledge Graph Extraction\n\nIn this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to extract a knowledge graph from unstructured text.\n\nWe will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:\n\n```shell\npip install llama-cpp-python\n```\n\nTo create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub):\n\n```python\nimport llama_cpp\nimport outlines\n\nllm = llama_cpp.Llama(\n    \"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF\",\n    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(\n        \"NousResearch/Hermes-2-Pro-Llama-3-8B\"\n    ),\n    n_gpu_layers=-1,\n    flash_attn=True,\n    n_ctx=8192,\n    verbose=False\n)\nmodel = outlines.from_llamacpp(llm)\n```\n\n??? note \"(Optional) Store the model weights in a custom folder\"\n\n    By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):\n\n    ```shell\n    wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\n    ```\n\n    We initialize the model:\n\n    ```python\n    from llama_cpp import Llama\n\n    llm = Llama(\"/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\", ...)\n    ```\n\n## Knowledge Graph Extraction\n\nWe first need to define our Pydantic class for each node and each edge of the knowledge graph:\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass Node(BaseModel):\n    \"\"\"Node of the Knowledge Graph\"\"\"\n\n    id: int = Field(..., description=\"Unique identifier of the node\")\n    label: str = Field(..., description=\"Label of the node\")\n    property: str = Field(..., description=\"Property of the node\")\n\n\nclass Edge(BaseModel):\n    \"\"\"Edge of the Knowledge Graph\"\"\"\n\n    source: int = Field(..., description=\"Unique source of the edge\")\n    target: int = Field(..., description=\"Unique target of the edge\")\n    label: str = Field(..., description=\"Label of the edge\")\n    property: str = Field(..., description=\"Property of the edge\")\n```\n\nWe then define the Pydantic class for the knowledge graph and get its JSON schema:\n\n```python\nfrom typing import List\n\nclass KnowledgeGraph(BaseModel):\n    \"\"\"Generated Knowledge Graph\"\"\"\n\n    nodes: List[Node] = Field(..., description=\"List of nodes of the knowledge graph\")\n    edges: List[Edge] = Field(..., description=\"List of edges of the knowledge graph\")\n\nschema = KnowledgeGraph.model_json_schema()\n```\n\nWe then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):\n\n```python\nfrom outlines import Template\n\ngenerate_hermes_prompt = Template.from_string(\n    \"\"\"\n    <|im_start|>system\n    You are a world class AI model who answers questions in JSON\n    Here's the json schema you must adhere to:\n    <schema>\n    {{ schema }}\n    </schema>\n    <|im_end|>\n    <|im_start|>user\n    {{ user_prompt }}\n    <|im_end|>\n    <|im_start|>assistant\n    <schema>\n    \"\"\"\n)\n```\n\nFor a given user prompt, for example:\n\n```python\nuser_prompt = \"Alice loves Bob and she hates Charlie.\"\n```\n\nWe can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with the Hermes prompt:\n\n```python\nfrom outlines import Generator\n\ngenerator = Generator(model, KnowledgeGraph)\nprompt = generate_hermes_prompt(schema=schema, user_prompt=user_prompt)\nresponse = generator(prompt, max_tokens=1024, temperature=0, seed=42)\n```\n\nWe obtain the nodes and edges of the knowledge graph:\n\n```python\nprint(response)\n# {\"nodes\":[{\"id\":1,\"label\":\"Alice\",\"property\":\"loves,hates\"},\n# {\"id\":2,\"label\":\"Bob\",\"property\":\"loved_by\"},\n# {\"id\":3,\"label\":\"Charlie\",\"property\":\"hated_by\"}],\n# \"edges\":[{\"source\":1,\"target\":2,\"label\":\"loves\",\"property\":\"love\"},\n# {\"source\":1,\"target\":3,\"label\":\"hates\",\"property\":\"hate\"}]}\n\n```\n\n## (Optional) Visualizing the Knowledge Graph\n\nWe can use the [Graphviz library](https://graphviz.readthedocs.io/en/stable/) to visualize the generated knowledge graph. For detailed installation instructions, see [here](https://graphviz.readthedocs.io/en/stable/#installation).\n\n```python\nfrom graphviz import Digraph\n\ndot = Digraph()\nfor node in response[\"nodes\"]:\n    dot.node(str(node[\"id\"]), node[\"label\"], shape='circle', width='1', height='1')\nfor edge in response[\"edges\"]:\n    dot.edge(str(edge[\"source\"]), str(edge[\"target\"]), label=edge[\"label\"])\n\ndot.render('knowledge-graph.gv', view=True)\n```\n\n![Image of the Extracted Knowledge Graph](./images/knowledge-graph-extraction.png)\n\nThis example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).\n"
  },
  {
    "path": "docs/examples/models_playing_chess.md",
    "content": "# Large language models playing chess\n\nIn this example we will make a Phi-3 model play chess against itself. On its own the model easily generates invalid moves, so we will give it a little help. At each step we will generate a regex that only matches valid move, and use it to help the model only generating valid moves.\n\n## The chessboard\n\nThe game will be played on a standard checkboard. We will use the `chess` [library](https://github.com/niklasf/python-chess) to track the opponents' moves, and check that the moves are valid.\n\n```python\n%pip install outlines -q\n%pip install chess -q\n%pip install transformers accelerate einops -q\n\nimport chess\n\nboard = chess.Board(\"rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1\")\n```\n\n## The opponents\n\nPhi-3 will be playing against itself:\n\n```python\nimport transformers\nimport outlines\n\nmodel_name = \"microsoft/Phi-3-mini-4k-instruct\"\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name),\n    transformers.AutoTokenizer.from_pretrained(model_name),\n)\n```\n\n## A little help for the language model\n\nTo make sure Phi-3 generates valid chess moves we will use Outline's regex-structured generation. We define a function that takes the current state of the board and returns a regex that matches all possible legal moves:\n\n```python\nimport re\nfrom outlines.types.dsl import either, String\n\ndef legal_moves_regex(board):\n    \"\"\"Build a regex that only matches valid moves.\"\"\"\n    legal_moves = list(board.legal_moves)\n    legal_modes_str = [board.san(move) for move in legal_moves]\n    legal_modes_str = [re.sub(r\"[+#]\", \"\", move) for move in legal_modes_str]\n    regex_pattern = either(*[String(move) for move in legal_modes_str])\n    return regex_pattern\n```\n\n## Prompting the language model\n\nThe prompt corresponds to the current state of the board, so we start with:\n\n```python\nprompt = \"Let's play Chess. Moves: \"\n\n```\n\nWe update the prompt at each step so it reflects the state of the board after the previous move.\n\n## Let's play\n\n```python\nboard_state = \" \"\nturn_number = 0\nwhile not board.is_game_over():\n    regex_pattern = legal_moves_regex(board)\n    structured = model(prompt + board_state, regex_pattern)\n    move = board.parse_san(structured)\n\n    if turn_number % 2 == 0 :  # It's White's turn\n        board_state += board.san(move) + \" \"\n    else:\n        board_state += board.san(move) + \" \" + str(turn_number) + \".\"\n\n    turn_number += 1\n\n    board.push(move)\n\n    print(board_state)\n```\n\nInterestingly enough, Phi-3 hates capturing.\n\n```pgn\n e4 e5 1.Nf3 Ne7 3.b4 Nf5 5.Nc3 Ne7 7.Bb5 a6 9.Na4 b6 11.c3 Nec6 13.c4 a5 15.d4 Qg5 17.Nd2 Bb7 19.dxe5\n```\n\n*This example was originally authored by [@903124S](https://x.com/903124S) in [this gist](https://gist.github.com/903124/cfbefa24da95e2316e0d5e8ef8ed360d).*\n"
  },
  {
    "path": "docs/examples/prompt_templates/chain_of_density.txt",
    "content": "Article: {{ article }}\n\nYou will generate increasingly concise, entity-dense summaries of the above Article.\n\nRepeat the following 2 steps 5 times.\n\nStep 1. Identify 1-3 informative Entities (\"; \" delimited) from the Article which are missing from the previously generated summary.\nStep 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities.\n\nA Missing Entity is:\n- Relevant: to the main story.\n- Specific: descriptive yet concise (5 words or fewer).\n- Novel: not in the previous summary.\n- Faithful: present in the Article.\n- Anywhere: located anywhere in the Article.\n\nGuidelines:\n- The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., \"this article discusses\") to reach ~80 words.\n- Make every word count: rewrite the previous summary to improve flow and make space for additional entities.\n- Make space with fusion, compression, and removal of uninformative phrases like \"the article discusses\".\n- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article.\n- Missing entities can appear anywhere in the new summary.\n- Never drop entities from the previous summary. If space cannot be made, add fewer new entities.\n\nRemember, use the exact same number of words for each summary.\n\nAnswer in JSON. The JSON should be a a dictionary with key \"summaries\" that contains a list (length 5) of dictionaries whose keys are \"Missing_Entities\" and \"Denser_Summary\".\n"
  },
  {
    "path": "docs/examples/prompt_templates/classification.txt",
    "content": "You are an experienced customer success manager.\n\nGiven a request from a client, you need to determine when the\nrequest is urgent using the label \"URGENT\" or when it can wait\na little with the label \"STANDARD\".\n\n# Examples\n\nRequest: \"How are you?\"\nLabel: STANDARD\n\nRequest: \"I need this fixed immediately!\"\nLabel: URGENT\n\n# TASK\n\nRequest: {{ request }}\nLabel:\n"
  },
  {
    "path": "docs/examples/prompt_templates/react_agent.txt",
    "content": "<|im_start|>system\nYou are a world class AI model who answers questions in JSON with correct Pydantic schema.\nHere's the json schema you must adhere to:\n<schema>\n{{ schema }}\n</schema>\nToday is {{ today }}\nYou run in a loop of Scratchpad, Thought, Action, Action Input, PAUSE, Observation.\nAt the end of the loop you output a Final Answer.\nUse Scratchpad to store the information from the Observation useful to answer the question\nUse Thought to describe your thoughts about the question you have been asked and reflect carefully about the Observation if it exists.\nUse Action to run one of the actions available to you.\nUse Action Input to input the arguments of the selected action - then return PAUSE.\nObservation will be the result of running those actions.\nYour available actions are:\ncalculate:\ne.g. calulate: 4**2 / 3\nRuns a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary\nwikipedia:\ne.g. wikipedia: Django\nReturns a summary from searching Wikipedia\nDO NOT TRY TO GUESS THE ANSWER. Begin!\n<|im_end|>\n<|im_start|>user\n{{ question }}\n<|im_end|>\n<|im_start|>assistant\n"
  },
  {
    "path": "docs/examples/prompt_templates/simtom_prospective_taking.txt",
    "content": "<s>[INST] The following is a sequence of events about some characters, that takes place in multiple locations.\nYour job is to output only the events that the specified character, {{character}}, knows about.\n\nHere are a few rules:\n1. A character knows about all events that they do.\n2. If a character is in a certain room/location, that character knows about all other events that happens in the room. This includes other characters leaving or exiting the location, the locations of objects in that location, and whether somebody moves an object to another place.\n3. If a character leaves a location, and is NOT in that location, they no longer know about any events that happen within that location. However, they can re-enter the location.\n\nStory: {{story}}\nWhat events does {{character}} know about? Only output the events according to the above rules, do not provide an explanation. [/INST]\n"
  },
  {
    "path": "docs/examples/prompt_templates/simtom_simulation.txt",
    "content": "<s>[INST] {% for event in events %}\n{{event}}\n{% endfor %}\nYou are {{name}}.\nBased on the above information, answer the following question:\n{{question}}\nYou must choose one of the above choices, do not say there is not enough information. Answer with a single word, do not output anything else. [/INST]\n"
  },
  {
    "path": "docs/examples/qa-with-citations.md",
    "content": "# Generate Synthetic Data and Q&A with Citations\n\nThis tutorial is adapted from the [instructor-ollama notebook](https://github.com/alonsosilvaallende/Hermes-Function-Calling/blob/main/examples/instructor_ollama.ipynb). We start with a simple example to generate synthetic data and then we approach the problem of question answering by providing citations.\n\nWe will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:\n\n```shell\npip install llama-cpp-python\n```\n\nWe download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern):\n```python\nimport llama_cpp\nimport outlines\n\nllm = llama_cpp.Llama(\n    \"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF\",\n    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(\n        \"NousResearch/Hermes-2-Pro-Llama-3-8B\"\n    ),\n    n_gpu_layers=-1,\n    flash_attn=True,\n    n_ctx=8192,\n    verbose=False\n)\nmodel = outlines.from_llamacpp(llm)\n```\n\n??? note \"(Optional) Store the model weights in a custom folder\"\n\n    By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):\n\n    ```shell\n    wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\n    ```\n\n    We initialize the model:\n\n    ```python\n    from llama_cpp import Llama\n\n    llm = Llama(\"/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\", ...)\n    ```\n\n## Generate Synthetic Data\n\nWe first need to define our Pydantic class for a user:\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass UserDetail(BaseModel):\n    id: int = Field(..., description=\"Unique identifier\") # so the model keeps track of the number of users\n    first_name: str\n    last_name: str\n    age: int\n```\n\nWe then define a Pydantic class for a list of users:\n\n```python\nfrom typing import List\n\nclass Users(BaseModel):\n    users: List[UserDetail]\n```\n\nWe can use a `outlines.Generator` by passing this Pydantic class we just defined, and call the generator:\n\n```python\nimport json\n\ngenerator = outlines.Generator(model, Users)\nresponse = generator(\"Create 5 fake users\", max_tokens=1024, temperature=0, seed=42)\nresponse = json.loads(response)\nprint(response['users'])\n# [{'id': 1, 'first_name': 'John', 'last_name': 'Doe', 'age': 25},\n# {'id': 2, 'first_name': 'Jane', 'last_name': 'Doe', 'age': 30},\n# {'id': 3, 'first_name': 'Bob', 'last_name': 'Smith', 'age': 40},\n# {'id': 4, 'first_name': 'Alice', 'last_name': 'Smith', 'age': 35},\n# {'id': 5, 'first_name': 'John', 'last_name': 'Smith', 'age': 20}]\n```\n\n```python\nfor user in response['users']:\n    print(user['first_name'])\n    print(user['last_name'])\n    print(user['age'])\n    print(\"#####\")\n# John\n# Doe\n# 25\n# #####\n# Jane\n# Doe\n# 30\n# #####\n# Bob\n# Smith\n# 40\n# #####\n# Alice\n# Smith\n# 35\n# #####\n# John\n# Smith\n# 20\n# #####\n```\n\n## QA with Citations\n\nWe first need to define our Pydantic class for QA with citations:\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\n\nclass QuestionAnswer(BaseModel):\n    question: str\n    answer: str\n    citations: List[str]\n\nschema = QuestionAnswer.model_json_schema()\n```\n\nWe then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):\n\n```python\nfrom outlines import Template\n\nhermes_prompt = Template.from_string(\n    \"\"\"\n    <|im_start|>system\n    You are a world class AI model who answers questions in JSON with correct and exact citations\n    extracted from the `Context`.\n    Here's the json schema you must adhere to:\n    <schema>\n    {{ schema }}\n    </schema>\n    <|im_end|>\n    <|im_start|>user\n    `Context`:\n    {{ context }}\n    `Question`:\n    {{ question }}\n    <|im_end|>\n    <|im_start|>assistant\n    \"\"\"\n)\n```\n\nWe can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with Hermes prompt:\n\n```python\nquestion = \"What did the author do during college?\"\ncontext = \"\"\"\nMy name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\nI went to an arts high school but in university I studied Computational Mathematics and physics.\nAs part of coop I worked at many companies including Stitchfix, Facebook.\nI also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n\"\"\"\ngenerator = outlines.Generator(model, QuestionAnswer)\nprompt = hermes_prompt(question=question, context=context, schema=schema)\nresponse = generator(prompt, max_tokens=1024, temperature=0, seed=42)\nprint(response)\n# {\"question\": \"What did the author do during college?\", \"answer\": \"The author studied Computational Mathematics and physics in university and was also involved in starting the Data Science club, serving as its president for 2 years.\", \"citations\": [\"I went to an arts high school but in university I studied Computational Mathematics and physics.\", \"I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\"]}\n```\n\nWe can do the same for a list of question-context pairs:\n\n```python\nquestion1 = \"Where was John born?\"\ncontext1 = \"\"\"\nJohn Doe is a software engineer who was born in New York, USA.\nHe studied Computer Science at the Massachusetts Institute of Technology.\nDuring his studies, he interned at Google and Microsoft.\nHe also founded the Artificial Intelligence club at his university and served as its president for three years.\n\"\"\"\n\nquestion2 = \"What did Emily study in university?\"\ncontext2 = \"\"\"\nEmily Smith is a data scientist from London, England.\nShe attended the University of Cambridge where she studied Statistics and Machine Learning.\nShe interned at IBM and Amazon during her summer breaks.\nEmily was also the head of the Women in Tech society at her university.\n\"\"\"\n\nquestion3 = \"Which companies did Robert intern at?\"\ncontext3 = \"\"\"\nRobert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert.\nHe studied Information Systems at the University of Melbourne.\nRobert interned at several cybersecurity firms including NortonLifeLock and McAfee.\nHe was also the leader of the Cybersecurity club at his university.\n\"\"\"\n\nquestion4 = \"What club did Alice start at her university?\"\ncontext4 = \"\"\"\nAlice Williams, a native of Dublin, Ireland, is a successful web developer.\nShe studied Software Engineering at Trinity College Dublin.\nAlice interned at several tech companies including Shopify and Squarespace.\nShe started the Web Development club at her university and was its president for two years.\n\"\"\"\n\nquestion5 = \"What did Michael study in high school?\"\ncontext5 = \"\"\"\nMichael Brown is a game developer from Tokyo, Japan.\nHe attended a specialized high school where he studied Game Design.\nHe later attended the University of Tokyo where he studied Computer Science.\nMichael interned at Sony and Nintendo during his university years.\nHe also started the Game Developers club at his university.\n\"\"\"\n\nfor question, context in [\n    (question1, context1),\n    (question2, context2),\n    (question3, context3),\n    (question4, context4),\n    (question5, context5),\n]:\n    prompt = hermes_prompt(question=question, context=context, schema=schema)\n    generator = outlines.Generator(model, QuestionAnswer)\n    response = generator(prompt, max_tokens=1024, temperature=0, seed=42)\n    response = json.loads(response)\n    print(question)\n    print(response['answer'])\n    print(response['citations'])\n    print(\"\\n\\n\")\n\n# 'Where was John born?'\n# 'John Doe was born in New York, USA.'\n# ['John Doe is a software engineer who was born in New York, USA.']\n#\n#\n# 'What did Emily study in university?'\n# 'Emily studied Statistics and Machine Learning in university.'\n# ['She attended the University of Cambridge where she studied Statistics and Machine Learning.']\n#\n#\n# 'Which companies did Robert intern at?'\n# 'Robert interned at NortonLifeLock and McAfee.'\n# ['Robert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert. He interned at several cybersecurity firms including NortonLifeLock and McAfee.']\n#\n#\n# 'What club did Alice start at her university?'\n# 'Alice started the Web Development club at her university.'\n# ['Alice Williams, a native of Dublin, Ireland, is a successful web developer. She started the Web Development club at her university and was its president for two years.']\n#\n#\n# 'What did Michael study in high school?'\n# 'Michael studied Game Design in high school.'\n# ['Michael Brown is a game developer from Tokyo, Japan. He attended a specialized high school where he studied Game Design.']\n```\n\nThis example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).\n"
  },
  {
    "path": "docs/examples/react_agent.md",
    "content": "# ReAct Agent\n\nThis example shows how to use [outlines](https://dottxt-ai.github.io/outlines/) to build your own agent with open weights local models and structured outputs. It is inspired by the blog post [A simple Python implementation of the ReAct pattern for LLMs](https://til.simonwillison.net/llms/python-react-pattern) by [Simon Willison](https://simonwillison.net/).\n\nThe ReAct pattern (for Reason+Act) is described in the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629). It's a pattern where you implement additional actions that an LLM can take - searching Wikipedia or running calculations for example - and then teach it how to request the execution of those actions, and then feed their results back into the LLM.\n\nAdditionally, we give the LLM the possibility of using a scratchpad described in the paper [Show Your Work: Scratchpads for Intermediate Computation with Language Models](https://arxiv.org/abs/2112.00114) which improves the ability of LLMs to perform multi-step computations.\n\nWe use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:\n\n```shell\npip install llama-cpp-python\n```\n\nWe download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern):\n```python\nimport llama_cpp\nimport outlines\n\nllm = llama_cpp.Llama(\n    \"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF\",\n    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(\n        \"NousResearch/Hermes-2-Pro-Llama-3-8B\"\n    ),\n    n_gpu_layers=-1,\n    flash_attn=True,\n    n_ctx=8192,\n    verbose=False\n)\nmodel = outlines.from_llamacpp(llm)\n```\n\n??? note \"(Optional) Store the model weights in a custom folder\"\n\n    By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):\n\n    ```shell\n    wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\n    ```\n\n    We initialize the model:\n\n    ```python\n    from llama_cpp import Llama\n\n    llm = Llama(\"/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf\", ...)\n    ```\n\n## Build a ReAct agent\n\nIn this example, we use two tools:\n\n- wikipedia: \\<search term\\> - search Wikipedia and returns the snippet of the first result\n- calculate: \\<expression\\> - evaluate an expression using Python's eval() function\n\n```python\nimport httpx\n\ndef wikipedia(q):\n    return httpx.get(\"https://en.wikipedia.org/w/api.php\", params={\n        \"action\": \"query\",\n        \"list\": \"search\",\n        \"srsearch\": q,\n        \"format\": \"json\"\n    }).json()[\"query\"][\"search\"][0][\"snippet\"]\n\n\ndef calculate(numexp):\n    return eval(numexp)\n```\n\nWe define the logic of the agent through a Pydantic class. First, we want the LLM to decide only between the two previously defined tools:\n\n```python\nfrom enum import Enum\n\nclass Action(str, Enum):\n    wikipedia = \"wikipedia\"\n    calculate = \"calculate\"\n```\n\nOur agent will loop through Thought and Action. We explicitly give the Action Input field so it doesn't forget to add the arguments of the Action. We also add a scratchpad (optional).\n\n```python\nfrom pydantic import BaseModel, Field\n\nclass Reason_and_Act(BaseModel):\n    Scratchpad: str = Field(..., description=\"Information from the Observation useful to answer the question\")\n    Thought: str = Field(..., description=\"It describes your thoughts about the question you have been asked\")\n    Action: Action\n    Action_Input: str = Field(..., description=\"The arguments of the Action.\")\n```\n\nOur agent will reach a Final Answer. We also add a scratchpad (optional).\n\n```python\nclass Final_Answer(BaseModel):\n    Scratchpad: str = Field(..., description=\"Information from the Observation useful to answer the question\")\n    Final_Answer: str = Field(..., description=\"Answer to the question grounded on the Observation\")\n```\n\nOur agent will decide when it has reached a Final Answer and therefore to stop the loop of Thought and Action.\n\n```python\nfrom typing import Union\n\nclass Decision(BaseModel):\n    Decision: Union[Reason_and_Act, Final_Answer]\n\njson_schema = Decision.model_json_schema()\n```\n\nWe then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs) and explain the agent logic. We can load a template from a file for that:\n\n```python\nfrom outlines import Template\n\nhermes_prompt = Template.from_file(\"prompt_templates/react_agent.txt\")\n```\n\nWe define a ChatBot class\n\n```python\nclass ChatBot:\n    def __init__(self, prompt=\"\"):\n        self.prompt = prompt\n\n    def __call__(self, user_prompt):\n        self.prompt += user_prompt\n        result = self.execute()\n        return result\n\n    def execute(self):\n        generator = outlines.Generator(model, Decision)\n        result = generator(self.prompt, max_tokens=1024, temperature=0, seed=42)\n        return result\n```\n\nWe define a query function:\n\n```python\nimport json\n\ndef query(question, max_turns=5):\n    i = 0\n    next_prompt = (\n        \"\\n<|im_start|>user\\n\" + question + \"<|im_end|>\"\n        \"\\n<|im_start|>assistant\\n\"\n    )\n    previous_actions = []\n    while i < max_turns:\n        i += 1\n        prompt = generate_hermes_prompt(\n            question=question,\n            schema=Decision.model_json_schema(),\n            today=datetime.datetime.today().strftime('%Y-%m-%d')\n        )\n        bot = ChatBot(prompt=prompt)\n        result = bot(next_prompt)\n        json_result = json.loads(result)['Decision']\n        if \"Final_Answer\" not in list(json_result.keys()):\n            scratchpad = json_result['Scratchpad'] if i == 0 else \"\"\n            thought = json_result['Thought']\n            action = json_result['Action']\n            action_input = json_result['Action_Input']\n            print(f\"\\x1b[34m Scratchpad: {scratchpad} \\x1b[0m\")\n            print(f\"\\x1b[34m Thought: {thought} \\x1b[0m\")\n            print(f\"\\x1b[36m  -- running {action}: {str(action_input)}\\x1b[0m\")\n            if action + \": \" + str(action_input) in previous_actions:\n                observation = \"You already run that action. **TRY A DIFFERENT ACTION INPUT.**\"\n            else:\n                if action==\"calculate\":\n                    try:\n                        observation = eval(str(action_input))\n                    except Exception as e:\n                        observation = f\"{e}\"\n                elif action==\"wikipedia\":\n                    try:\n                        observation = wikipedia(str(action_input))\n                    except Exception as e:\n                        observation = f\"{e}\"\n            print()\n            print(f\"\\x1b[33m Observation: {observation} \\x1b[0m\")\n            print()\n            previous_actions.append(action + \": \" + str(action_input))\n            next_prompt += (\n                \"\\nScratchpad: \" + scratchpad +\n                \"\\nThought: \" + thought +\n                \"\\nAction: \" + action  +\n                \"\\nAction Input: \" + action_input +\n                \"\\nObservation: \" + str(observation)\n            )\n        else:\n            scratchpad = json_result[\"Scratchpad\"]\n            final_answer = json_result[\"Final_Answer\"]\n            print(f\"\\x1b[34m Scratchpad: {scratchpad} \\x1b[0m\")\n            print(f\"\\x1b[34m Final Answer: {final_answer} \\x1b[0m\")\n            return final_answer\n    print(f\"\\nFinal Answer: I am sorry, but I am unable to answer your question. Please provide more information or a different question.\")\n    return \"No answer found\"\n```\n\nWe can now test our ReAct agent:\n\n```python\nprint(query(\"What's 2 to the power of 10?\"))\n# Scratchpad:\n# Thought: I need to perform a mathematical calculation to find the result of 2 to the power of 10.\n#  -- running calculate: 2**10\n#\n# Observation: 1024\n#\n# Scratchpad: 2 to the power of 10 is 1024.\n# Final Answer: 2 to the power of 10 is 1024.\n# 2 to the power of 10 is 1024.\n```\n\n```python\nprint(query(\"What does England share borders with?\"))\n# Scratchpad:\n# Thought: To answer this question, I will use the 'wikipedia' action to gather information about England's geographical location and its borders.\n#  -- running wikipedia: England borders\n#\n# Observation: Anglo-Scottish <span class=\"searchmatch\">border</span> (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal <span class=\"searchmatch\">border</span> of the United Kingdom separating Scotland and <span class=\"searchmatch\">England</span> which runs for\n#\n# Scratchpad: Anglo-Scottish border (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal border of the United Kingdom separating Scotland and England which runs for\n# Final Answer: England shares a border with Scotland.\n# England shares a border with Scotland.\n```\n\nAs mentioned in Simon's blog post, this is not a very robust implementation at all and there's a ton of room for improvement. But it is lovely how simple it is with a few lines of Python to make these extra capabilities available to the LLM. And now you can run it locally with an open weights LLM.\n\nThis example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).\n"
  },
  {
    "path": "docs/examples/read-pdfs.md",
    "content": "# PDF to structured output with vision language models\n\nA common task with language models is to ask language models questions about a PDF file.\n\nTypically, the output is unstructured text, i.e. \"talking\" to your PDF.\n\nIn some cases, you may wish to extract structured information from the PDF, like tables, lists, citations, etc.\n\nPDFs are difficult to machine read. However, you can simply convert the PDF to images, and then use a vision language model to extract structured information from the images.\n\nThis cookbook demonstrates how to\n\n1. Convert a PDF to a list of images\n2. Use a vision language model to extract structured information from the images\n\n## Dependencies\n\nYou'll need to install these dependencies:\n\n```shell\npip install outlines pillow transformers torch==2.4.0 pdf2image\n\n# Optional, but makes the output look nicer\npip install rich\n```\n\n## Import the necessary libraries\n\n```python\nfrom PIL import Image\nimport outlines\nimport torch\nfrom transformers import AutoProcessor\nfrom pydantic import BaseModel\nfrom typing import List, Optional\nfrom pdf2image import convert_from_path\nimport os\nfrom rich import print\nimport requests\n```\n\n## Choose a model\n\nWe've tested this example with [Pixtral 12b](https://huggingface.co/mistral-community/pixtral-12b) and [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct).\n\nTo use Pixtral:\n\n```python\nfrom transformers import LlavaForConditionalGeneration, LlavaProcessor\nmodel_name=\"mistral-community/pixtral-12b\"\nmodel_class=LlavaForConditionalGeneration\nprocessor_class = LlavaProcessor\n```\n\nTo use Qwen-2-VL:\n\n```python\nfrom transformers import Qwen2VLForConditionalGeneration, AutoProcessor\nmodel_name = \"Qwen/Qwen2-VL-7B-Instruct\"\nmodel_class = Qwen2VLForConditionalGeneration\nprocessor_class = AutoProcessor\n```\n\nYou can load your model into memory with:\n\n```python\n# This loads the model into memory. On your first run,\n# it will have to download the model, which might take a while.\nmodel_kwargs={\"device_map\": \"auto\", \"torch_dtype\": torch.bfloat16}\nprocessor_kwargs={\"device_map\": \"cpu\"}\ntf_model = model_class.from_pretrained(model_name, **model_kwargs)\ntf_processor = processor_class.from_pretrained(model_name, **processor_kwargs)\n\nmodel = outlines.from_transformers(tf_model, tf_processor)\n```\n\n## Convert the PDF to images\n\nWe'll use the `pdf2image` library to convert each page of the PDF to an image.\n\n`convert_pdf_to_images` is a convenience function that converts each page of the PDF to an image, and optionally saves the images to disk when `output_dir` is provided.\n\nNote: the `dpi` argument is important. It controls the resolution of the images. High DPI images are higher quality and may yield better results,\nbut they are also larger, slower to process, and require more memory.\n\n```python\nfrom pdf2image import convert_from_path\nfrom PIL import Image\nimport os\nfrom typing import List, Optional\n\ndef convert_pdf_to_images(\n    pdf_path: str,\n    output_dir: Optional[str] = None,\n    dpi: int = 120,\n    fmt: str = 'PNG'\n) -> List[Image.Image]:\n    \"\"\"\n    Convert a PDF file to a list of PIL Image objects.\n\n    Args:\n        pdf_path: Path to the PDF file\n        output_dir: Optional directory to save the images\n        dpi: Resolution for the conversion. High DPI is high quality, but also slow and memory intensive.\n        fmt: Output format (PNG recommended for quality)\n\n    Returns:\n        List of PIL Image objects\n    \"\"\"\n    # Convert PDF to list of images\n    images = convert_from_path(\n        pdf_path,\n        dpi=dpi,\n        fmt=fmt\n    )\n\n    # Optionally save images\n    if output_dir:\n        os.makedirs(output_dir, exist_ok=True)\n        for i, image in enumerate(images):\n            image.save(os.path.join(output_dir, f'page_{i+1}.{fmt.lower()}'))\n\n    return images\n```\n\nWe're going to use the [Louf & Willard paper](https://arxiv.org/pdf/2307.09702) that described the method that Outlines uses for structured generation.\n\nTo download the PDF, run:\n\n```python\n# Download the PDF file\npdf_url = \"https://arxiv.org/pdf/2307.09702\"\nresponse = requests.get(pdf_url)\n\n# Save the PDF locally\nwith open(\"louf-willard.pdf\", \"wb\") as f:\n    f.write(response.content)\n```\n\nNow, we can convert the PDF to a list of images:\n\n```python\n# Load the pdf\nimages = convert_pdf_to_images(\n    \"louf-willard.pdf\",\n    dpi=120,\n    output_dir=\"output_images\"\n)\n```\n\n## Extract structured information from the images\n\nThe structured output you can extract is exactly the same as everywhere else in Outlines -- you can use regular expressions, JSON schemas, selecting from a list of options, etc.\n\n### Extracting data into JSON\n\nSuppose you wished to go through each page of the PDF, and extract the page description, key takeaways, and page number.\n\nYou can do this by defining a JSON schema, and then using `outlines.Generator` to extract the data.\n\nFirst, define the structure you want to extract:\n\n```python\nclass PageSummary(BaseModel):\n    description: str\n    key_takeaways: List[str]\n    page_number: int\n```\n\nSecond, we need to set up the prompt. Adding special tokens can be tricky, so we use the transformers processor to apply the special tokens for us. To do so, we specify a list of messages, where each message is a dictionary with a `role` and `content` key.\n\nImages are denoted with `type: \"image\"`, and text is denoted with `type: \"text\"`.\n\n```python\nmessages = [\n    {\n        \"role\": \"user\",\n        \"content\": [\n            # The text you're passing to the model --\n            # this is where you do your standard prompting.\n            {\"type\": \"text\", \"text\": f\"\"\"\n                Describe the page in a way that is easy for a PhD student to understand.\n\n                Return the information in the following JSON schema:\n                {PageSummary.model_json_schema()}\n\n                Here is the page:\n                \"\"\"\n            },\n\n            # This a placeholder, the actual image is passed in when\n            # we call the generator function down below.\n            {\"type\": \"image\", \"image\": \"\"},\n        ],\n    }\n]\n\n# Convert the messages to the final prompt\nprompt = tf_processor.apply_chat_template(\n    messages, tokenize=False, add_generation_prompt=True\n)\n```\n\nNow we iterate through each image, and extract the structured information:\n\n```python\n# Page summarizer function\npage_summary_generator = outlines.Generator(model, PageSummary)\n\nfor image in images:\n    result = page_summary_generator({\"text\": prompt, \"images\": image})\n    print(result)\n```\n\n### Regular expressions to extract the arxiv paper identifier\n\nThe [arXiv paper identifier](https://info.arxiv.org/help/arxiv_identifier.html) is a unique identifier for each paper. These identifiers have the format `arXiv:YYMM.NNNNN` (five end digits) or `arXiv:YYMM.NNNN` (four end digits). arXiv identifiers are typically watermarked on papers uploaded to arXiv.\n\narXiv identifiers are optionally followed by a version number, i.e. `arXiv:YYMM.NNNNNvX`.\n\nWe can use a regular expression to define this patter:\n\n```python\nfrom outlines.types import Regex\n\npaper_regex = Regex(r'arXiv:\\d{2}[01]\\d\\.\\d{4,5}(v\\d)?')\n```\n\nWe can build an extractor function from the regex:\n\n```python\nid_extractor = outlines.Generator(model, paper_regex)\n```\n\nNow, we can extract the arxiv paper identifier from the first image:\n\n```python\narxiv_instruction = tf_processor.apply_chat_template(\n    [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": f\"\"\"\n                Extract the arxiv paper identifier from the page.\n\n                Here is the page:\n                \"\"\"},\n                {\"type\": \"image\", \"image\": \"\"},\n            ],\n        }\n    ],\n    tokenize=False,\n    add_generation_prompt=True\n)\n\n# Extract the arxiv paper identifier\npaper_id = id_extractor({\"text\": arxiv_instruction, \"images\": images[0]})\n```\n\nAs of the time of this writing, the arxiv paper identifier is\n\n```\narXiv:2307.09702v4\n```\n\nYour version number may be different, but the part before `vX` should match.\n\n### Categorize the paper into one of several categories\n\n`outlines.Generator` also allows the model to select one of several options by providing a Literal type hint with the categories.\n\nSuppose we wanted to categorize the paper into being about \"language models\", \"cell biology\", or \"other\". We would then define the output type as `Literal[\"llms\", \"cell biology\", \"other\"]`.\n\nLet's define a few categories we might be interested in:\n\n```python\ncategories = [\n    \"llms\",\n    \"cell biology\",\n    \"other\"\n]\n```\n\nNow we can construct the prompt:\n\n```python\ncategorization_instruction = tf_processor.apply_chat_template(\n    [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": f\"\"\"\n                Please choose one of the following categories\n                that best describes the paper.\n\n                {categories}\n\n                Here is the paper:\n                \"\"\"},\n\n                {\"type\": \"image\", \"image\": \"\"},\n            ],\n        }\n    ],\n    tokenize=False,\n    add_generation_prompt=True\n)\n```\n\nNow we can show the model the first page and extract the category:\n\n```python\nfrom typing import Literal\n\n# Build the choice extractor\ncategorizer = outlines.Generator(model, Literal[\"llms\", \"cell biology\", \"other\"])\n\n# Categorize the paper\ncategory = categorizer({\"text\": categorization_instruction, \"images\": images[0]})\nprint(category)\n```\n\nWhich should return:\n\n```\nllms\n```\n\n## Additional notes\n\nYou can provide multiple images to the model by\n\n1. Adding additional image messages\n2. Providing a list of images to the generator\n\nFor example, to have two images, you can do:\n\n```python\ntwo_image_prompt = tf_processor.apply_chat_template(\n    [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"are both of these images of hot dogs?\"},\n\n                # Tell the model there are two images\n                {\"type\": \"image\", \"image\": \"\"},\n                {\"type\": \"image\", \"image\": \"\"},\n            ],\n        }\n    ],\n    tokenize=False,\n    add_generation_prompt=True\n)\n\n# Pass two images to the model\ngenerator = outlines.Generator(model, Literal[\"hot dog\", \"not hot dog\"])\n\nresult = generator({\"text\": two_image_prompt, \"images\": [images[0], images[1]]})\nprint(result)\n```\n\nUsing the first to pages of the paper (they are not images of hot dogs), we should get\n\n```\nnot hot dog\n```\n"
  },
  {
    "path": "docs/examples/receipt-digitization.md",
    "content": "# Receipt Data Extraction with VLMs\n\n## Setup\n\nYou'll need to install the dependencies:\n\n```shell\npip install outlines torch==2.4.0 transformers accelerate pillow rich\n```\n\n## Import libraries\n\nLoad all the necessary libraries:\n\n```python\n# LLM stuff\nimport outlines\nimport torch\nfrom transformers import AutoProcessor\nfrom pydantic import BaseModel, Field\nfrom typing import Literal, Optional, List\n\n# Image stuff\nfrom PIL import Image\nimport requests\n\n# Rich for pretty printing\nfrom rich import print\n```\n\n## Choose a model\n\nThis example has been tested with `mistral-community/pixtral-12b` ([HF link](https://huggingface.co/mistral-community/pixtral-12b)) and `Qwen/Qwen2-VL-7B-Instruct` ([HF link](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)).\n\nWe recommend Qwen-2-VL as we have found it to be more accurate than Pixtral.\n\nIf you want to use Qwen-2-VL, you can do the following:\n\n```python\n# To use Qwen-2-VL:\nfrom transformers import Qwen2VLForConditionalGeneration, AutoProcessor\nmodel_name = \"Qwen/Qwen2-VL-7B-Instruct\"\nmodel_class = Qwen2VLForConditionalGeneration\nprocessor_class = AutoProcessor\n```\n\nIf you want to use Pixtral, you can do the following:\n\n```python\n# To use Pixtral:\nfrom transformers import LlavaForConditionalGeneration, LlavaProcessor\nmodel_name=\"mistral-community/pixtral-12b\"\nmodel_class=LlavaForConditionalGeneration\nprocessor_class = LlavaProcessor\n```\n\n## Load the model\n\nLoad the model into memory:\n\n```python\nmodel_kwargs={\"device_map\": \"auto\", \"torch_dtype\": torch.bfloat16}\nprocessor_kwargs={\"device_map\": \"cuda\"}\ntf_model = model_class.from_pretrained(model_name, **model_kwargs)\ntf_processor = processor_class.from_pretrained(model_name, **processor_kwargs)\n\nmodel = outlines.from_transformers(tf_model, tf_processor)\n```\n\n## Image processing\n\nImages can be quite large. In GPU-poor environments, you may need to resize the image to a smaller size.\n\nHere's a helper function to do that:\n\n```python\ndef load_and_resize_image(image_path, max_size=1024):\n    \"\"\"\n    Load and resize an image while maintaining aspect ratio\n\n    Args:\n        image_path: Path to the image file\n        max_size: Maximum dimension (width or height) of the output image\n\n    Returns:\n        PIL Image: Resized image\n    \"\"\"\n    image = Image.open(image_path)\n\n    # Get current dimensions\n    width, height = image.size\n\n    # Calculate scaling factor\n    scale = min(max_size / width, max_size / height)\n\n    # Only resize if image is larger than max_size\n    if scale < 1:\n        new_width = int(width * scale)\n        new_height = int(height * scale)\n        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)\n\n    return image\n```\n\nYou can change the resolution of the image by changing the `max_size` argument. Small max sizes will make the image more blurry, but processing will be faster and require less memory.\n\n## Load an image\n\nLoad an image and resize it. We've provided a sample image of a Trader Joe's receipt, but you can use any image you'd like.\n\nHere's what the image looks like:\n\n![Trader Joe's receipt](./images/trader-joes-receipt.jpg)\n\n```python\n# Path to the image\nimage_path = \"https://raw.githubusercontent.com/dottxt-ai/outlines/refs/heads/main/docs/cookbook/images/trader-joes-receipt.jpg\"\n\n# Download the image\nresponse = requests.get(image_path)\nwith open(\"receipt.png\", \"wb\") as f:\n    f.write(response.content)\n\n# Load + resize the image\nimage = load_and_resize_image(\"receipt.png\")\n```\n\n## Define the output structure\n\nWe'll define a Pydantic model to describe the data we want to extract from the image.\n\nIn our case, we want to extract the following information:\n\n- The store name\n- The store address\n- The store number\n- A list of items, including the name, quantity, price per unit, and total price\n- The tax\n- The total\n- The date\n- The payment method\n\nMost fields are optional, as not all receipts contain all information.\n\n```python\nclass Item(BaseModel):\n    name: str\n    quantity: Optional[int]\n    price_per_unit: Optional[float]\n    total_price: Optional[float]\n\nclass ReceiptSummary(BaseModel):\n    store_name: str\n    store_address: str\n    store_number: Optional[int]\n    items: List[Item]\n    tax: Optional[float]\n    total: Optional[float]\n    # Date is in the format YYYY-MM-DD. We can apply a regex pattern to ensure it's formatted correctly.\n    date: Optional[str] = Field(pattern=r'\\d{4}-\\d{2}-\\d{2}', description=\"Date in the format YYYY-MM-DD\")\n    payment_method: Literal[\"cash\", \"credit\", \"debit\", \"check\", \"other\"]\n```\n\n## Prepare the prompt\n\nWe'll use the `tf_processor` to convert the image and the text prompt into a format that the model can understand. Practically,\nthis is the code that adds user, system, assistant, and image tokens to the prompt.\n\n```python\n# Set up the content you want to send to the model\nmessages = [\n    {\n        \"role\": \"user\",\n        \"content\": [\n            {\n                # The image is provided as a PIL Image object\n                \"type\": \"image\",\n                \"image\": image,\n            },\n            {\n                \"type\": \"text\",\n                \"text\": f\"\"\"You are an expert at extracting information from receipts.\n                Please extract the information from the receipt. Be as detailed as possible --\n                missing or misreporting information is a crime.\n\n                Return the information in the following JSON schema:\n                {ReceiptSummary.model_json_schema()}\n            \"\"\"},\n        ],\n    }\n]\n\n# Convert the messages to the final prompt\nprompt = tf_processor.apply_chat_template(\n    messages, tokenize=False, add_generation_prompt=True\n)\n```\n\nIf you are curious, the final prompt that is sent to the model looks (roughly) like this:\n\n```\n<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>\nYou are an expert at extracting information from receipts.\nPlease extract the information from the receipt. Be as detailed as\npossible -- missing or misreporting information is a crime.\n\nReturn the information in the following JSON schema:\n\n<JSON SCHEMA OMITTED>\n<|im_end|>\n<|im_start|>assistant\n```\n\n## Run the model\n\n```python\n# Prepare a function to process receipts\nreceipt_summary_generator = outlines.Generator(model, ReceiptSummary)\n\n# Generate the receipt summary\nresult = receipt_summary_generator(\n    {\"text\": prompt, \"images\": image},\n    max_new_tokens=1024\n)\nprint(result)\n```\n\n## Output\n\nThe output should look like this:\n\n```\n{\n  \"store_name\": \"Trader Joe's\",\n  \"store_address\": \"401 Bay Street, San Francisco, CA 94133\",\n  \"store_number\": 0,\n  \"items\": [\n    {\"name\": \"BANANA EACH\", \"quantity\": 7, \"price_per_unit\": 0.23, \"total_price\": 1.61},\n    {\"name\": \"BAREBELLS CHOCOLATE DOUG\", \"quantity\": 1, \"price_per_unit\": 2.29, \"total_price\": 2.29},\n    {\"name\": \"BAREBELLS CREAMY CRISP\", \"quantity\": 1, \"price_per_unit\": 2.29, \"total_price\": 2.29},\n    {\"name\": \"BAREBELLS CHOCOLATE DOUG\", \"quantity\": 1, \"price_per_unit\": 2.29, \"total_price\": 2.29},\n    {\"name\": \"BAREBELLS CARAMEL CASHEW\", \"quantity\": 2, \"price_per_unit\": 2.29, \"total_price\": 4.58},\n    {\"name\": \"BAREBELLS CREAMY CRISP\", \"quantity\": 1, \"price_per_unit\": 2.29, \"total_price\": 2.29},\n    {\"name\": \"SPINDRIFT ORANGE MANGO 8\", \"quantity\": 1, \"price_per_unit\": 7.49, \"total_price\": 7.49},\n    {\"name\": \"Bottle Deposit\", \"quantity\": 8, \"price_per_unit\": 0.05, \"total_price\": 0.4},\n    {\"name\": \"MILK ORGANIC GALLON WHOL\", \"quantity\": 1,\"price_per_unit\": 6.79,\"total_price\": 6.79},\n    {\"name\": \"CLASSIC GREEK SALAD\", \"quantity\": 1, \"price_per_unit\": 3.49, \"total_price\": 3.49},\n    {\"name\": \"COBB SALAD\", \"quantity\": 1, \"price_per_unit\": 5.99, \"total_price\": 5.99},\n    {\"name\": \"PEPPER BELL RED XL EACH\", \"quantity\": 1, \"price_per_unit\": 1.29, \"total_price\": 1.29},\n    {\"name\": \"BAG FEE.\", \"quantity\": 1, \"price_per_unit\": 0.25, \"total_price\": 0.25},\n    {\"name\": \"BAG FEE.\", \"quantity\": 1, \"price_per_unit\": 0.25, \"total_price\": 0.25},\n  ],\n  \"tax\": 0.68,\n  \"total\": 41.98,\n  \"date\": \"2023-11-04\",\n  \"payment_method\": \"debit\"\n}\n```\n\nVoila! You've successfully extracted information from a receipt using an LLM.\n\n## Bonus: roasting the user for their receipt\n\nYou can roast the user for their receipt by adding a `roast` field to the end of the  `ReceiptSummary` model.\n\n```python\nclass ReceiptSummary(BaseModel):\n    ...\n    roast: str\n```\n\nwhich gives you a result like\n\n```\n{\n    ...\n    \"roast\": \"You must be a fan of Trader Joe's because you bought enough\n    items to fill a small grocery bag and still had to pay for a bag fee.\n    Maybe you should start using reusable bags to save some money and the\n    environment.\"\n}\n```\n\nQwen is not particularly funny, but worth a shot.\n"
  },
  {
    "path": "docs/examples/simtom.md",
    "content": "# Build perspective-taking agents with SimToM\n\nPrompting strategies like Chain-of-Thought (CoT) can improve LLMs' reasoning capabilities. However, they underwhelm in tasks that require keeping track of inconsistent world states. [SimToM](https://arxiv.org/abs/2311.10227) proposes a simple, two-stage prompting framework for LLMs inspired by Simulation Theory. The authors showed that this approach outperforms zero-shot prompting and CoT on ToMI and BigToM, two benchmarks with Theory of Mind questions.\n\nIn this example, we will implement SimToM with a few lines of code using Outlines' prompt templating and structured generation capabilities.\n\n## How SimToM works\n\nSimToM calls an LLM with two consecutive prompts:\n\n1. **Perspective-taking**: The first prompt receives a `story` and a `character`. The goal is to understand the situation based on the character's point of view and filter out the rest of the story.\n2. **Question-Answering**: The second prompt receives the character's point of view from the previous step and tasks the LLM to answer a question using that context.\n\n![Figure 2 in the paper](./images/simtom.png)\n\n## Outlines implementation\n\nTo implement SimToM with Outlines, we will need to:\n\n1. Write the prompts with [prompt templates](https://dottxt-ai.github.io/outlines/latest/reference/prompting/).\n2. Define the JSON object each prompt will return using Pydantic.\n3. Generate responses with a Mistral model using the [transformers integration](https://dottxt-ai.github.io/outlines/latest/reference/models/transformers/).\n\nLet's dive into it!\n\n### Using Prompt Templates\n\nThe authors have shared their code, prompts and data in [this GitHub repository](https://github.com/shawnsihyunlee/simulatedtom). Below, we define in Outlines the prompts they used for the ToMI dataset:\n\n```python\nfrom outlines import Template\n\nperspective_taking = Template.from_file(\"prompt_templates/simtom_prospective_taking.txt\")\nsimulation = Template.from_file(\"prompt_templates/simtom_simulation.txt\")\n```\n\n### JSON Structured Generation\n\nOutlines guarantees that the LLM will return a valid JSON object, which we can specify as a Pydantic model.\n\nWe will need two Pydantic models for SimToM, one for each prompt:\n\n```python\nfrom pydantic import BaseModel, Field\nfrom typing import List\n\nclass PerspectiveTaking(BaseModel):\n    \"\"\"This is for the first prompt.\"\"\"\n    character: str = Field(description=\"The character we extract the events for.\")\n    events: List[str] = Field(description=\"All events that the character knows about.\")\n\nclass Simulation(BaseModel):\n    \"\"\"This is for the second prompt.\"\"\"\n    answer: str\n```\n\n### Calling an LLM\n\nLet's try SimToM with an example from the ToMI dataset:\n\n```python\nstory = \"\"\"\n1 Aria entered the front_yard.\n2 Aiden entered the front_yard.\n3 The grapefruit is in the green_bucket.\n4 Aria moved the grapefruit to the blue_container.\n5 Aiden exited the front_yard.\n6 Noah entered the playroom.\n\"\"\"\nquestion = \"7 Where was the grapefruit at the beginning?\"\ncharacter = \"Aria\"\n```\n\nWe load `Mistral-7B-Instruct-v0.3`, create the prompt using the template we defined earlier, and generate a structured response. As a reminder, the goal of the first call is to get all the events a character, `Aria`, knows about.\n\n```python\nimport transformers\nimport outlines\n# Load an LLM from Hugging Face\nMODEL_NAME = \"mistral-community/Mistral-7B-Instruct-v0.3\"\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),\n    transformers.AutoTokenizer.from_pretrained(MODEL_NAME),\n)\n\nperspective_prompt = perspective_taking(story=story, character=character)\n\n# Call Mistral 7B with the first prompt\ngenerator = outlines.Generator(model, PerspectiveTaking)\nperspective = generator(perspective_prompt, max_new_tokens=1024)\n\nprint(perspective)\n# {'character': 'Aria', 'events': ['1 Aria entered the front_yard.', '3 The grapefruit is in the green_bucket.', '4 Aria moved the grapefruit to the blue_container.']}\n```\n\nNot bad! We will now generate the second prompt with those events.\n\n```python\nimport json\n\nsim_prompt = simulation(events=json.loads(perspective)[\"events\"], name=character, question=question)\n\n# Call Mistral 7B with the second prompt\ngenerator = outlines.Generator(model, Simulation)\nresult = generator(sim_prompt, max_new_tokens=1024)\n\nprint(result)\n# {'answer': 'green_bucket'}\n```\n\nAnd this is it! SimToM could be useful in agentic workflows, where agents must act based on what they know, not all available information. One caveat of SimToM is that the perspective-taking step may remove important information, leading to wrong results. As the authors note in their paper, it can feature as a simple and effective baseline for evaluating LLMs on Theory of Mind reasoning tasks.\n"
  },
  {
    "path": "docs/examples/structured_generation_workflow.md",
    "content": "# Structured Generation Workflow: Generating Synthetic Phone Numbers\n\nThis is a condensed version of [Coding for Structured Generation with LLMs](https://blog.dottxt.co/coding-for-structured-generation.html).\n\nFor this example we're going to be building an LLM program to generate **synthetic data** in the form of realistic looking phone numbers for Washington State. Using an LLM for this task *is a bit overkill* since we could just as easily accomplish this with a tool like [Faker](https://fakerjs.dev/), but this example still serves as a useful way to demonstrate a workflow for using structured generation.\n\n## Unstructured approach\n\nBefore diving into how to use structure generation for this task let's start with an unstructured example. We begin by loading our model:\n\n```python\nimport outlines\nimport transformers\n\nmodel_name = 'microsoft/Phi-3-mini-4k-instruct'\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name),\n    transformers.AutoTokenizer.from_pretrained(model_name)\n)\n```\n\nNext we need a prompt for this model. Since we're focusing on structured generation, we won't be engaging in any form of \"prompt hacking\" and will be leaving this prompt untouched for the rest of this example.\n\n```python\nprompt_phone = \"\"\"\n    Please generate a realistic phone number for Washington State in the following format\n    (555) 555-5555\n\"\"\"\n```\n\nWith our prompt ready we can now generate 10 example phone numbers\n\n```python\nphone_generator_unstruct = outlines.Generator(model)\nfor _ in range(3):\n    print(phone_generator_unstruct(prompt_phone, max_new_tokens=12))\n```\n\n> I'd be happy to help you generate a realistic phone\\\nI cannot generate a real phone number as I'm just\\\nI'm an AI and don't have the ability\\\nSure! Here is a randomly generated phone number in the format\\\nHere's a phone number that fits the format for a\\\nIn Washington State, phone numbers typically have a three-dig\\\nHere are a few examples of phone numbers that could be considered\\\nI'd be happy to help generate a realistic phone number\\\nI'd be happy to help you generate a random phone\\\nBased on the format you provided, a realistic phone number for\\\n\nAs we can see, none of these outputs are even phone numbers!\n\nLet's see  if we can improve this using structured generation.\n\n## The Structured Generation Workflow\n\nIn order to solve this problem we're going to introduce a *Structured Generation Workflow* outlined in this image:\n\n![\"Visual of Structured Generation Workflow\"](./images/coding_structure_diagram.png)\n\nLet's step through this:\n\n### Real example\n\nWe start with a real example phone number, in this case for the Seattle Public Library, that we can use to verify the structure we are creating.\n\n```python\nphone_number = \"(206) 386-4636\"\n```\n\nFor a simple example like this, we'll just be using a single phone number, for more complex examples it can be helpful to have more examples.\n\n### Draft Structure\n\nThe next step in the process is for use to define a simple regex that we feel correctly models our real data.\n\n```python\nfrom outlines.types import Regex\n\nphone_regex_1 = Regex(r'\\([0-9]{3}\\) [0-9]{3}-[0-9]{4}')\n```\n\nNext we need to validate this regex against our real data.\n\n### Validate by matching examples\n\nWhenever writing non-trivial code with structured generation it is *essential* that you first validate the code against your real data example(s).\n\nWe'll start with a simple method of validation: just checking that our regex matches the data.\n\n```\nimport re\n\nre.match(phone_regex_1.pattern, phone_number)\n# <re.Match object; span=(0, 14), match='(206) 386-4636'>\n\n```\n\nNow that we have a match, we can move on to generating structured output!\n\n### Generate Structure\n\nWe're ready to see if structured generation can make an improvement over our initial unstructured approach:\n\n```python\nphone_generator_v1 = outlines.Generator(model, phone_regex_1)\n\nfor _ in range(3):\n    print(phone_generator_v1(prompt_phone))\n```\n> (206) 555-1234\\\n(206) 555-1234\\\n(206) 555-1234\\\n(206) 555-1234\\\n(206) 555-1234\\\n(206) 555-1234\\\n(206) 123-4567\\\n(206) 555-1234\\\n(206) 555-1234\\\n(206) 555-1234\n\nAt least we have phone numbers! But I think we can do better!\n\n### Inspect output\n\nIn this case the model *did* create phone numbers and, impressively, got the area code correct. So using structured generation did improve things. However these numbers are pretty boring. Let's improve that structure!\n\n## Iteration\n\nWe've walked through the loop once, so we can go quickly now through each iteration.\n\nWe start by improving our structure:\n\n```python\nphone_regex_2 = Regex(r'\\([0-9]{3}\\) [2-46-9]{3}-[02-9]{4}')\n```\n\nBefore rushing to another round of generation, let's validate this new regex. We'll add just a bit more sophistication over our last check:\n\n```python\nre.match(phone_regex_2.pattern, phone_number)[0] == phone_number\n# True\n```\nNow that we've validated, let's generate with this new regex!\n\n```python\nphone_generator_v2 = outlines.Generator(model, phone_regex_2)\n\nfor _ in range(3):\n    print(phone_generator_v2(prompt_phone))\n```\n\n> (206) 867-5309\\\n(206) 666-7777\\\n(206) 444-3333\\\n(206) 444-3333\\\n(206) 943-2222\\\n(206) 323-6789\\\n(206) 444-3333\\\n(206) 867-5309\\\n(206) 466-2255\\\n(206) 222-3333\n\nBetter, but I don't like those repeated sequences. Like good software developers, let's iterate again!\n\n## Reiteration - with debugging\n\nHere's a fancier regex that should give us more interesting results:\n\n```python\nphone_regex_3_error = r'\\([0-9]{3}\\) [2-4][7-9][4-6]-[3-6][2-8][1-4]'\n```\n\nThis looks good to me, but there's a subtle bug, that's why we *always* need to validate our structure against real data. This time we'll make our validator do a bit more work to verify the correct string is matched:\n\n```python\nif not re.match(phone_regex_3_error, phone_number):\n    print(\"Regex fails match\")\nelse:\n    matched_string = re.match(phone_regex_3_error, phone_number)[0]\n    if matched_string == phone_number:\n    print(\"Successful match\")\n    else:\n    print(f\"Error {matched_string} != {phone_number}\")\n```\nThis prints out:\n>  Error (206) 386-463 != (206) 386-4636\n\nAh! We were missing the last digit, let's fix that and regenerate:\n\n```python\nphone_regex_3_fixed = Regex(r'\\([0-9]{3}\\) [2-4][7-9][4-6]-[3-6][2-8][1-4][6-9]')\nphone_generator_v3 = outlines.Generator(model, phone_regex_3_fixed)\n\nfor _ in range(3):\n    print(phone_generator_v3(prompt_phone))\n```\n\n>(206) 494-3216\\\n(206) 374-6218\\\n(206) 494-3337\\\n(206) 476-3216\\\n(206) 484-3548\\\n(206) 495-3218\\\n(206) 494-5517\\\n(206) 375-4636\\\n(206) 384-6216\\\n(206) 385-6218\n\nMuch better!\n\nNow you've seen a quick example of the structured generation workflow that can be used at the basis for building and iteration on much larger structured generation tasks!\n"
  },
  {
    "path": "docs/features/advanced/backends.md",
    "content": "---\ntitle: Structured Generation Backends\n---\n\n# Structured Generation Backends\n\nOutlines relies on a structured generation backend to control text generation for steerable models such thah they conform to the output type provided. One of those backends is of course `outlines-core`, but you also have access to two other libraries that fulfill the same purpose: `llguidance` and `xgrammar`.\n\n## Overview\n\nTo select the backend to use for your generation, provide a value for the `backend` argument when calling a model or a generator.\n\nFor instance:\n\n```python\nfrom typing import Literal\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"What is the capital of France?\", output_type, backend=\"llguidance\")\nprint(result) # 'Paris'\n\ngenerator = outlines.Generaor(model, output_type)\nresult = generator(\"What is the capital of France?\", backend=\"xgrammar\")\nprint(result) # 'Paris'\n```\n\nIf you do not provide a value for the `backend` argument, the default value will be used. The default value depends on the type of output type:\n\n- JSON schema: `outlines_core`\n- Regex: `outlines_core`\n- Context-free grammar: `llguidance`\n\n## Features matrix\n\nAs mentioned previously, selecting the structured generation backend is only applicable to steerable models, so `Transformers`, `LlmaCpp` and `MLXLM`. Additionaly, some backends do not support some models within those or some output types.\n\n| | outlines_core | llguidance | xgrammar |\n|---|---|---|---|\n| **Models** | | | |\n| Transformers | ✅ | ✅ | ✅ |\n| LlamaCpp | ✅ | ✅ | ❌ |\n| MLXLM | ✅ | ✅ | ✅ |\n| **Output Types** | | | |\n| JSON Schema | ✅ | ✅ | ✅ |\n| Regex | ✅ | ✅ | ✅ |\n| Grammar | ❌ | ✅ | ✅ |\n"
  },
  {
    "path": "docs/features/advanced/logits_processors.md",
    "content": "---\ntitle: Logits Processors\n---\n\n# Logits Processors\n\nLogits processors are objects that control text generation by modifying the probability distribution of possible next tokens. They do this by adjusting the logits (raw model outputs) at each generation step, effectively biasing the model's token selection.\n\nProcessors can be used to:\n\n1. Generate structured output (e.g., JSON that follows a specific schema)\n2. Prevent the model from generating specific words or tokens\n3. Implement custom token sampling strategies\n\n## Overview\n\nOutlines uses logits processors with steerable models — models that run locally and allow fine-grained control over the generation process. When using such models in Outlines, the output type provided is turned into a logits processor that is then passed to the inference engine.\n\nThere are three models that support logits processors:\n\n- LlamaCpp\n- MLXLM\n- Transformers\n\nInstead of providing an output type that will be turned into a logits processor, it is possible to directly provide a logits processor. To do so, you must create a `Generator` instance using the `processor` keyword argument. You cannot directly call the model with a logits processor.\n\nFor instance:\n\n```python\nimport transformers\nfrom outlines import Generator, from_transformers\nfrom outlines.processors import RegexLogitsProcessor\n\n# Create a model\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"NousResearch/Hermes-2-Pro-Llama-3-8B\"),\n    transformers.AutoTokenizer.from_pretrained(\"NousResearch/Hermes-2-Pro-Llama-3-8B\")\n)\n\n# Create a regex logits processor that only returns hex unicode notations\nlogits_processor = RegexLogitsProcessor(r\"U\\+[0-9A-Fa-f]{4,6}\", model.tokenizer, model.tensor_library_name)\n\n# Create a generator with the logits processor and use it to generate text\ngenerator = Generator(model, processor=logits_processor)\nresponse = generator(\"What's the unicode for the hugging face emoji\")\n\nprint(response) # U+1F917\n```\n\n## Creating Custom Logits Processors\n\nYou can create your own logits processor by subclassing the `OutlinesLogitsProcessor` class. This allows you to implement specific logic to modify logits as needed.\nYour logits processor needs to implement the `process_logits` method to modify the logits.\n`process_logits` accepts:\n- `input_ids`: the ids of the tokens of the existing sequences in a 2D tensor.\n- `logits`: the logits for the current generation step in a 2D tensor.\n\nIn the example below, we create a custom logits processor to force the model to provide a response using only binary representation (so only the tokens for 0 and 1 are allowed):\n\n```python\nfrom outlines.processors.base_logits_processor import OutlinesLogitsProcessor, TensorType\nfrom outlines import Generator, from_transformers\nimport transformers\n\nALLOWED_TOKENS = [15, 16]  # token IDs corresponding to '0' and '1' in the model's vocabulary\n\n# Subclass OutlinesLogitsProcessor\nclass BinaryLogitsProcessor(OutlinesLogitsProcessor):\n\n    def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType:\n        # Create a mask for all tokens\n        mask = self.tensor_adapter.boolean_ones_like(logits)\n        # Set mask to False for the allowed tokens\n        for token_id in ALLOWED_TOKENS:\n            mask[:, token_id] = False\n        # Set non-allowed tokens to -inf so they are not selected\n        logits[mask] = float(\"-inf\")\n        return logits\n\n# Create a regular model\ntf_tokenizer = transformers.AutoTokenizer.from_pretrained(\"NousResearch/Hermes-2-Pro-Llama-3-8B\")\ntf_model = transformers.AutoModelForCausalLM.from_pretrained(\"NousResearch/Hermes-2-Pro-Llama-3-8B\")\nmodel = from_transformers(tf_model, tf_tokenizer)\n\n# Instantiate your custom logits processor\nlogits_processor = BinaryLogitsProcessor(model.tensor_library_name)\n\nprompt = \"Write the number 47 in binary. For example, 1010 is the binary representation of 10. Answer just with the binary number composed of 0s and 1s.\"\nformatted_prompt = tf_tokenizer.apply_chat_template(\n    [{\"role\": \"user\", \"content\": prompt}],\n    tokenize=False\n)\n\n# Create a generator with the custom logits processor instance and use it to generate text\ngenerator = Generator(model, processor=logits_processor)\nresponse = generator(formatted_prompt)\n\nprint(response) # \"101111\"\n```\n"
  },
  {
    "path": "docs/features/core/generator.md",
    "content": "---\ntitle: Generator API\n---\n\n# Generator\n\n\nThe `Generator` class is the core component of Outlines v1. `Generator` accepts a [model](../models/index.md) and an optional [output type](../core/output_types.md). If no output type is provided, the `Generator` will return unstructured text.\n\n!!! note\n\n    `Generator` is new as of Outlines v1, and replaces previous generator constructors:\n\n    - `generate.cfg`\n    - `generate.choice`\n    - `generate.format`\n    - `generate.fsm`\n    - `generate.json`\n    - `generate.regex`\n    - `generate.text`\n\n## Methods\n\nGenerators implement the same methods as models:\n\n- `__call__`\n- `batch`\n- `stream`\n\nAll of them take a single positional argument: the [model input](../core/inputs.md) from which text is generated. Contrarily to the equivalent methods of models, you do not need to provide an output type as it has already been defined when initializing the generator.\n\n## Basic Usage\n\n```python\nfrom outlines import Generator, from_transformers\nimport transformers\n\n# Initialize a model\nmodel_name = \"HuggingFaceTB/SmolLM2-135M-Instruct\"\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name),\n    transformers.AutoTokenizer.from_pretrained(model_name),\n)\n\n# Create a generator for plain text\ngenerator = Generator(model)\nresult = generator(\"Write a short poem about AI.\")\n\n# Print the result\nprint(result)\n```\n\n## Structured Generation\n\n```python\nfrom pydantic import BaseModel\nfrom outlines import Generator, from_transformers\nimport transformers\n\n# Define a Pydantic model for structured output\nclass BookRecommendation(BaseModel):\n    title: str\n    author: str\n    year: int\n\n# Initialize a model\nmodel_name = \"HuggingFaceTB/SmolLM2-135M-Instruct\"\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name),\n    transformers.AutoTokenizer.from_pretrained(model_name),\n)\n\n# Create a generator for JSON output\ngenerator = Generator(model, BookRecommendation)\n\n# Generate a book recommendation\nresult = generator(\"Recommend a science fiction book.\")\n\n# Parse the JSON result into a Pydantic model\nbook = BookRecommendation.model_validate_json(result)\nprint(f\"{book.title} by {book.author} ({book.year})\")\n```\n\n## Parameters\n\n- `model`: The language model to use for generation\n- `output_type`: Optional. The type of output to generate\n\n## Generation Parameters\n\nWhen calling the generator, you can pass additional parameters to control the generation process. These parameters are passed through to the underlying model, so they depend on the specific model being used.\n\nCommon parameters for most models include:\n- `max_new_tokens`: Maximum number of tokens to generate\n- `temperature`: Controls randomness (higher values = more random)\n- `top_p`: Controls diversity via nucleus sampling\n- `stop_strings`: String or list of strings at which to stop generation\n\nExample:\n```python\nresult = generator(\n    \"Write a short story.\",\n    max_new_tokens=200,\n    temperature=0.7,\n    top_p=0.9,\n    stop_strings=[\"THE END\", \"###\"]\n)\n```\n\n## Return Value\n\nThe generator always returns a raw string containing the generated text. When generating structured outputs, you need to parse this string into the desired format.\n\nUnlike in Outlines v0, where the return type could be a parsed object, in v1 you are responsible for parsing the output when needed:\n\n```python\n# Outlines v1 approach\nfrom pydantic import BaseModel\nfrom outlines import Generator\n\nclass Person(BaseModel):\n    name: str\n    age: int\n\ngenerator = Generator(model, Person)\nresult = generator(\"Generate a person:\")\n\n# Parse the result yourself\nperson = Person.model_validate_json(result)\n```\n\n::: outlines.generator.Generator\n"
  },
  {
    "path": "docs/features/core/inputs.md",
    "content": "---\ntitle: Model Inputs\n---\n\n# Model Inputs\n\nOutlines models accept various types of inputs to generate text. The input format depends on the capabilities of the underlying model and the type of task you want to perform. The most basic type of input is a single string prompt, it's accepted by all models.\n\n## Overview\n\nThe model input is the first argument of the `__call__`, `stream` and `batch` methods of both models and generators.\n\nThere are 3 types of model inputs:\n\n- **Text prompts** - Simple strings\n- **Multimodal inputs** - List containning a string prompt along with assets\n- **Chat inputs** - `Chat` instances containing messages\n\n## Text Prompts\n\nThe simplest form of input is a plain text string. This works with all models and is suitable for standard text generation tasks.\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create a model\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n)\n\n# Simple text prompt\nresponse = model(\"What's the capital of France?\", max_new_tokens=20)\nprint(response)  # 'Paris'\n```\n\n## Multimodal Inputs (Vision)\n\nFor models that support them, you can provide a list containing a text prompt and one or more assets.\n\nThere are 3 types of assets defined in Outlines:\n\n- `Image`: contains a PIL Image\n- `Video`: contains any object (you must choose a format that is supported by your model)\n- `Audio`: contains any object (you must choose a format that is supported by your model)\n\nAmong those, `Image` is by far the most important as multiple models support vision inputs.\n\nFor instance with vision input:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nimport openai\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n## Chat Inputs\n\nFor conversational models, you can use the `Chat` class to provide a conversation history with multiple messages.\n\nA `Chat` instance is instantiated with an optional list of messages. Each message must be a dictionary containing two mandatory keys:\n- `role`: must be one of `system`, `assistant` or `user`\n- `content`: must be either a string or a multimodal input (if the model supports it)\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nfrom outlines.inputs import Chat, Image\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\nprint(prompt)\n# {'role': 'system', 'content': 'You are a helpful assistant.'}\n# {'role': 'user', 'content': ['Describe the image', Image(image=<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=400x300 at 0x7FFA526CCC40>)]}\n```\n\nAfter having created a `Chat` instance, you can add one or several messages thanks to the `append` and `extend` methods. You can also remove the last message of the Chat with the `pop` method.\n\nFor instance:\n\n```python\nfrom outlines.inputs import Chat\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n])\n\n# Add a message\nprompt.append({\"role\": \"user\", \"content\": \"How are you doing today?\"})\nprint(prompt)\n# {'role': 'system', 'content': 'You are a helpful assistant.'}\n# {'role': 'user', 'content': 'How are you doing today?'}\n\n# Remove the last messsage\nlast_message = prompt.pop()\nprint(last_message)\n# {'role': 'user', 'content': 'How are you doing today?'}\nprint(prompt)\n# {'role': 'system', 'content': 'You are a helpful assistant.'}\n\n# RAdd several messages\nprompt.extend([\n    {\"role\": \"user\", \"content\": \"How are you doing today?\"},\n    {\"role\": \"assistant\", \"content\": \"Excellent, thanks!\"}\n])\nprint(prompt)\n# {'role': 'system', 'content': 'You are a helpful assistant.'}\n# {'role': 'user', 'content': 'How are you doing today?'}\n# {'role': 'assistant', 'content': 'Excellent, thanks!'}\n```\n\nFinally, there are three convenience method to easily add a message:\n\n- add_system_message\n- add_user_message\n- add_assistant_message\n\nAs the role is already set, you only need to provide the content.\n\nFor instance:\n\n```python\nfrom outlines.inputs import Chat\n\n# Create the chat input\nprompt = Chat()\n\nprompt.add_system_message(\"You are a helpful assistant.\")\nprompt.add_system_message(\"How are you doing today?\")\nprompt.add_system_message(\"Excellent, thanks!\")\n\nprint(prompt)\n# {'role': 'system', 'content': 'You are a helpful assistant.'}\n# {'role': 'user', 'content': 'How are you doing today?'}\n# {'role': 'assistant', 'content': 'Excellent, thanks!'}\n```\n\n## Batching\n\nIn the case of batching, for models that support it, you just have to provide several instances of the model inputs described above in a list.\n\nFor instance:\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create model\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create a list of prompts that will be used in a single batch\nprompts = [\n    \"What's the capital of Lithuania?\",\n    \"What's the capital of Latvia?\",\n    \"What's the capital of Estonia?\"\n]\n\n# Call it to generate text\nresult = model.batch(prompts, max_new_tokens=20)\nprint(result) # ['Vilnius', 'Riga', 'Tallinn']\n```\n"
  },
  {
    "path": "docs/features/core/output_types.md",
    "content": "---\ntitle: Output Types\n---\n\n# Output Types\n\nOutlines provides a simple and intuitive way of defining the output structure of text generation. Possible output formats include basic Python types, multiple-choices, JSON schemas, regular expressions and context-free grammars.\n\n## Overview\n\nOutlines models accept a __prompt__ and an __output type__ when they are invoked, as well as additional inference keyword arguments that are forwarded on to the underlying model.\n\nOutput types can be from the general Python ecosystem, including:\n- Most native Python types, such as `int` or `str`\n- Types from the `typing` module, such as `Literal`, `List`, `Dict`, `Enum`, etc\n- Types from popular third party libraries such as Pydantic or GenSON.\n\nOutlines also provides special classes for certain output structures (more details below):\n- Multiple choices with `Choice`\n- JSON schemas with `JsonSchema`\n- Regular expressions with `Regex`\n- Context-free grammars with `CFG`\n\nThe general idea is that you should provide as an output type what you would give as the type hint of the return type of a function.\n\nConsider the following functions for instance:\n\n```python\nfrom datetime import date\nfrom typing import Dict, List, Literal, Union\nfrom pydantic import BaseModel\n\nclass Character(BaseModel):\n    name: str\n    birth_date: date\n    skills: Union[Dict, List[str]]\n\ndef give_int() -> int:\n    ...\n\ndef pizza_or_burger() -> Literal[\"pizza\", \"burger\"]:\n    ...\n\ndef create_character() -> Character:\n    ...\n```\n\nWith an Outlines model, you can generate text that respects the type hints above by providing those as the output type:\n\n```python\nmodel(\"How many minutes are there in one hour\", int) # \"60\"\nmodel(\"Pizza or burger\", Literal[\"pizza\", \"burger\"]) # \"pizza\"\nmodel(\"Create a character\", Character, max_new_tokens=100) # '{\"name\": \"James\", \"birth_date\": \"1980-05-10)\", \"skills\": [\"archery\", \"negotiation\"]}'\n```\n\nAn important difference with function type hints though is that an Outlines generator always returns a string.\nYou have to cast the response into the type you want yourself.\n\nFor instance:\n\n```python\nresult = model(\"Create a character\", Character, max_new_tokens=100)\ncasted_result = Character.model_validate_json(result)\nprint(result) # '{\"name\": \"Aurora\", \"birth_date\": \"1990-06-15\", \"skills\": [\"Stealth\", \"Diplomacy\"]}'\nprint(casted_result) # name=Aurora birth_date=datetime.date(1990, 6, 15) skills=['Stealth', 'Diplomacy']\n```\n\n## Output Type Categories\n\nWe can group possible output types in several categories based on the use case they correspond to. While most of those types are native python or types coming from well-known third-party libraries, there are three Outlines-specific types: `JsonSchema`, `Regex` and `CFG`. Their use is explained below.\n\n### Basic Python Types\n\nThe most straightforward form of structured generation is to return an answer that conforms to a given basic type such as an int or a python list. You can use the basic Python types and the types from the `typing` library. For instance:\n\n```python\nfrom typing import Dict\n\noutput_type = float # example of valid value: \"0.05\"\noutput_type = bool # example of valid value: \"True\"\noutput_type = Dict[int, str] # example of valid value: \"{1: 'hello', 2: 'there'}\"\n```\n\nYou can combine types to create more complex response formats by relying on collection types and types such as `Union` and `Optional`. Let's consider for instance the output type below used to represent semi-structured data:\n\n```python\nfrom typing import Dict, List, Optional, Tuple, Union\n\noutput_type = Dict[str, Union[int, str, List[Tuple[str, Optional[float]]]]]\n```\n\nValues created with this output type would be dictionaries with string as keys and values made of either an integer, a string or a list of two elements tuples: a string and either a float or None. Example of a valid response for text generated with this output type (it would be contained in a string):\n\n```json\n{\n    \"name\": \"Alice\",\n    \"age\": 30,\n    \"metrics\": [(\"engagement\", 0.85), (\"satisfaction\", None)]\n}\n```\n\n### Multiple Choices\n\nOutlines supports multiple choice classification by using the `Literal` or `Enum` output types. For instance:\n\n```python\nfrom enum import Enum\nfrom typing import Literal\n\nclass PizzaOrBurger(Enum):\n    pizza = \"pizza\"\n    burger = \"burger\"\n\n# Equivalent multiple-choice output types\noutput_type = Literal[\"pizza\", \"burger\"]\noutput_type = PizzaOrBurger\n```\n\nAdditionally, you can use the Outlines-specific type `Choice` that takes a `list` as an argument. This type is useful in situations in which the list of choices is dynamic.\n\nFor instance:\n\n```python\nfrom outlines.types import Choice\n\ndef get_multiple_choices() -> list:\n    # we could have something complex here\n    return [\"pizza\", \"burger\"]\n\noutput_type = Choice(get_multiple_choices())\n```\n\n### JSON Schemas\n\nMultiple different common Python types are often used to store information equivalent to a JSON schema. The following can be used in Outlines to generate text that respects a JSON schema:\n\n- A Pydantic class\n- A Dataclass\n- A TypedDict\n- A [GenSON](https://github.com/wolverdude/GenSON) `SchemaBuilder`\n- A Callable (the parameters are turned into the keys and the type hinting is used to define the types of the values)\n\nFor instance:\n\n```python\nfrom dataclasses import dataclass\n\n@dataclass\nclass Character:\n    name: str\n    age: int\n\noutput_type = Character\n\ndef character(name: str, age: int):\n    return None\n\noutput_type = character\n```\n\nThere are two other JSON schema formats that require Outlines-specific classes: JSON schema strings and dictionaries.\n\nAs those are contained in regular Python strings or dictionaries, the associated output format would be ambiguous if they were to be provided directly. As a result, Outlines requires them to be wrapped in a `outlines.types.JsonSchema` object. For instance:\n\n```python\nfrom outlines.types import JsonSchema\n\nschema_string = '{\"type\": \"object\", \"properties\": {\"answer\": {\"type\": \"number\"}}}'\noutput_type = JsonSchema(schema_string)\n\nschema_dict = {\n    \"type\": \"object\",\n    \"properties\": {\n        \"answer\": {\"type\": \"number\"}\n    }\n}\noutput_type = JsonSchema(schema_dict)\n```\n\n`JsonSchema` accepts two optional parameters:\n- `whitespace_pattern` (defaults to `None`): specifies the pattern to use for JSON syntactic whitespace. If none is provided, the default permissive JSON whitespace rules are used.\n- `ensure_ascii` (defaults to `True`): defines the value to use for the argument `ensure_ascii` of the `json.dumps` method. If false, non-ASCII characters will be turned into unicodes.\n\n### Regex Patterns\n\nOutlines provides support for text generation constrained by regular expressions. Since regular expressions are expressed as simple raw string literals, regex strings must wrapped in an `outlines.types.Regex` object.\n\n```python\nfrom outlines.types import Regex\n\nregex = r\"[0-9]{3}\"\noutput_type = Regex(regex)\n```\n\nThe `outlines.types` module contains a few common regex patterns stored in variables you can import and directly use as output types. Common patterns include a sentence, an email address and an [ISBN reference](https://en.wikipedia.org/wiki/ISBN). For instance:\n\n```python\nfrom outlines.types import sentence\n\nprint(type(sentence)) # outlines.types.dsl.Regex\nprint(sentence.pattern) # [A-Z].*\\s*[.!?]\n```\n\nTo help you create complex regex patterns yourself, you can use the Outlines [regex DSL](../../utility/regex_dsl).\n\n### Context-Free Grammars\n\nOutlines allows you to generate text that respects the syntax of a context-free grammar. Context-free grammars are defined using [Lark](https://lark-parser.readthedocs.io/en/latest/index.html), a grammar language. Since grammars are expressed as a string, Large CFG strings should be be wrapped in an `outlines.types.CFG` object. For instance:\n\n```python\nfrom outlines.types import CFG\n\ngrammar_string = \"\"\"\n    start: expr\n    expr: \"{\" expr \"}\" | \"[\" expr \"]\" |\n\"\"\"\noutput_type = CFG(grammar_string)\n```\n\nYou can find a few Lark grammar examples in the [grammars module](../../api_reference/grammars.md).\n\n## Output type availability\n\nThe output types presented above are not available for all models as some have only limited support for structured outputs. Please refer to the documentation of the specific model you wish to use to know what output types it supports.\n"
  },
  {
    "path": "docs/features/index.md",
    "content": "# Features\n\nThis section presents in details the different features of Outlines.\n\n## Core Concepts\n\n- [Models](./models/index.md)\n- [Model Inputs](./core/inputs.md)\n- [Output Types](./core/output_types.md)\n- [Generators](./core/generator.md)\n\n## Utilities\n\n- [Applications](./utility/application.md)\n- [Templates](./utility/templates.md)\n- [Regex DSL](./utility/regex_dsl.md)\n\n## Advanced\n\n- [Logits Processors](./advanced/logits_processors.md)\n"
  },
  {
    "path": "docs/features/models/anthropic.md",
    "content": "---\ntitle: Anthropic\n---\n\n# Anthropic\n\n!!! Installation\n\n    You need to install the `anthropic` library to be able to use the Anthropic API in Outlines. Install all optional dependencies of the `Anthropic` model with: `pip install \"outlines[anthropic]\"`.\n\n    You also need to have an Anthropic API key. This API key must either be set as an environment variable called `ANTHROPIC_API_KEY` or be provided to the `anthropic.Anthropic` class when instantiating it.\n\n## Model Initialization\n\nTo create an Anthropic model instance, you can use the `from_anthropic` function. It takes 2 arguments:\n\n- `client`: an `anthropic.Anthropic` instance\n- `model_name`: the name of the model you want to use in subsequent model calls (optional)\n\nFor instance:\n\n```python\nfrom anthropic import Anthropic\nimport outlines\n\n# Create the Anthropic client\nclient = Anthropic()\n\n# Create the model\nmodel = outlines.from_anthropic(\n    client,\n    \"claude-3-5-sonnet-latest\"\n)\n```\n\nCheck the [Anthropic documentation](https://docs.anthropic.com/en/docs/about-claude/models) for an up-to-date list of available models.\n\n## Text Generation\n\nOnce you've created your Outlines `Anthropic` model instance, you're all set to generate text with this provider. You can simply call the model with a text prompt.\n\nFor instance:\n\n```python\nfrom anthropic import Anthropic\nimport outlines\n\n# Create the model\nmodel = outlines.from_anthropic(\n    Anthropic(),\n    \"claude-3-5-sonnet-latest\"\n)\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(response) # 'Riga'\n```\n\n#### Vision\n\nSome Anthropic models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nfrom anthropic import Anthropic\nfrom outlines import from_anthropic\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = from_anthropic(\n    Anthropic(),\n    \"claude-3-5-sonnet-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `Anthropic` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nfrom anthropic import Anthropic\nfrom outlines import from_anthropic\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = from_anthropic(\n    Anthropic(),\n    \"claude-3-5-sonnet-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"user\", \"content\": \"You are a helpful assistant that helps me described pictures.\"},\n    {\"role\": \"assistant\", \"content\": \"I'd be happy to help you describe pictures! Please go ahead and share an image\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `Anthropic` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nfrom anthropic import Anthropic\nimport outlines\n\n# Create the model\nmodel = outlines.from_anthropic(\n    Anthropic(),\n    \"claude-3-5-sonnet-latest\"\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n    print(chunk) # 'Once...'\n```\n\n## Inference arguments\n\nWhen calling the model or streaming, you can provide keyword arguments that will be passed down to the Anthropic client. Make sure to include all the arguments you need to configure the client's behavior to your expected behavior. Some of the most common arguments include `max_tokens`, `temperature`, `stop_sequences` and `top_k`.\n\nSee the [Anthropic API documentation](https://docs.anthropic.com/en/api/messages) for the full list of available arguments.\n\n!!! Warning\n\n    You must set a value for `max_tokens` with Anthropic models.\n"
  },
  {
    "path": "docs/features/models/dottxt.md",
    "content": "---\ntitle: Dottxt\n---\n\n# Dottxt\n\n!!! Installation\n\n    You need to install the `dottxt` python sdk to be able to use the Dottxt API in Outlines. Install all optional dependencies of the `Dottxt` model with: `pip install \"outlines[dottxt]\"`.\n\n    You also need to have a Dottxt API key. This API key must either be set as an environment variable called `DOTTXT_API_KEY` or be provided to the `dottxt.client.Dottxt` class when instantiating it.\n\n## Model Initialization\n\nTo create an Dottxt model instance, you can use the `from_dottxt` function. It takes 3 arguments:\n\n- `client`: a `dottxt.client.Dottxt` instance\n- `model_name`: the name of the model you want to use in subsequent model calls (optional)\n- `model_revision`: the name of the revision to use for the model selected (optional)\n\nFor instance:\n\n```python\nfrom dottxt.client import Dottxt\nimport outlines\n\n# Create client\nclient = Dottxt(api_key=\"...\")\n\n# Create the model\nmodel = outlines.from_dottxt(\n    client,\n    \"meta-llama/Llama-3.1-8B\",\n    \"d04e592bb4f6aa9cfee91e2e20afa771667e1d4b\"\n)\n```\n\nUse the `list_models` method of the Dottxt client to get a list of available model names and revisions for your account.\n\n## Text Generation\n\nDottxt only supports constrained generation with JSON schema output types. You must always provide a value for the `output_type` parameter as unconstrained generation is not available.\n\nFor instance:\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nfrom dottxt.client import Dottxt\nimport outlines\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_dottxt(\n    Dottxt(),\n    \"meta-llama/Llama-3.1-8B\",\n    \"d04e592bb4f6aa9cfee91e2e20afa771667e1d4b\"\n)\n\n# Generate structured text\nresult = model(\"Create a character\", Character)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n## Inference arguments\n\nYou can provide the same optional parameters you would pass to the `dottxt` sdk's client both during the initialization of the `Dottxt` class and when generating text. Some of the common inference arguments include `max_tokens`, `frequency_penalty`, `presence_penalty` and `temperature`.\n\nConsult the [dottxt python sdk GitHub repository](https://github.com/dottxt-ai/dottxt-python) for the full list of parameters.\n"
  },
  {
    "path": "docs/features/models/gemini.md",
    "content": "# Gemini\n\n!!! Installation\n\n    You need to install the `google.genai` libray to be able to use the Gemini API in Outlines. Install all optional dependencies of the `Gemini` model with: `pip install \"outlines[gemini]\"`.\n\n    You also need to have a Gemini API key. This API key must either be set as an environment variable called `GEMINI_API_KEY` or be provided to the `google.genai.Client` class when instantiating it.\n\n## Model Initialization\n\nTo create a Gemini model instance, you can use the `from_gemini` function. It takes 2 arguments:\n\n- `client`: a `google.genai.Client` instance\n- `model_name`: the name of the model you want to use in subsequent model calls (optional)\n\nFor instance:\n\n```python\nimport outlines\nfrom google import genai\n\n# Create the client\nclient = genai.Client()\n\n# Create the model\nmodel = outlines.from_gemini(\n    client,\n    \"gemini-1.5-flash-latest\"\n)\n```\n\nCheck the [Gemini documentation](https://github.com/googleapis/python-genai) for an up-to-date list of available models.\n\n## Text Generation\n\nOnce you've created your Outlines `Gemini` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nfrom google.genai import Client\n\n# Create the model\nmodel = outlines.from_gemini(\n    Client(),\n    \"gemini-1.5-flash-latest\"\n)\n\n# Call it to generate text\nresult = model(\"What's the capital of Latvia?\", max_output_tokens=20)\nprint(result) # 'Riga'\n```\n\n#### Vision\n\nSome Gemini models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nfrom google.genai import Client\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_gemini(\n    Client(),\n    \"gemini-1.5-flash-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_output_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `Gemini` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nfrom google.genai import Client\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_gemini(\n    Client(),\n    \"gemini-1.5-flash-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"user\", \"content\": \"You are a helpful assistant that helps me described pictures.\"},\n    {\"role\": \"assistant\", \"content\": \"I'd be happy to help you describe pictures! Please go ahead and share an image\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_output_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `Gemini` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport outlines\nfrom google.genai import Client\n\n# Create the model\nmodel = outlines.from_gemini(\n    Client(),\n    \"gemini-1.5-flash-latest\"\n)\n\n# Stream text\nfor chunk in model.stream(\"Write a short story about a cat.\", max_output_tokens=20):\n    print(chunk) # 'In...'\n```\n\n## Structured Generation\n\nGemini provides supports for some forms of structured output: multiple choice, JSON schema (with caveats) and lists of structured objects. To use it, call the model with an `output_type` on top of your prompt.\n\n#### Multiple Choice\n\n```python\nimport outlines\nfrom google import genai\nfrom enum import Enum\n\nclass PizzaOrBurger(Enum):\n    pizza = \"pizza\"\n    burger = \"burger\"\n\n# Create the model\nmodel = outlines.from_gemini(genai.Client(), \"gemini-1.5-flash-latest\")\n\n# Call it with the ouput type to generate structured text\nresult = model(\"Pizza or burger?\", PizzaOrBurger, max_output_tokens=20)\nprint(result) # 'pizza'\n```\n\n#### JSON Schema\n\nGemini supports only three types of objects used to define a JSON Schema:\n\n- Pydantic classes\n- Dataclasses\n- TypedDicts\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nfrom google import genai\nimport outlines\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_gemini(genai.Client(), \"gemini-1.5-flash-latest\")\n\n# Call it with the ouput type to generate structured text\nresult = model(\"Create a character\", Character)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n#### Lists of Structured Objects\n\nA specificity of Gemini is that, despite not supporting regex, it does support a list of structured objects as an output type. To use it, put any of three available types described above in the typing `List` class\n\n```python\nfrom dataclasses import dataclass\nfrom google import genai\nimport outlines\n\n@dataclass\nclass Character:\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_gemini(genai.Client(), \"gemini-1.5-flash-latest\")\n\n# Call it with the ouput type to generate structured text\nresult = model(\"Create a character\", list[Character])\nprint(result) # '[{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}, {[\"name\":...'\n```\n\n!!! Attention\n\n    The structured objects must be in a built-in `list`, not a `List` from the `typing` library\n\n## Inference arguments\n\nYou can provide the same optional parameters you would pass to the `google.genai.Client` client both during the initialization of the Gemini model and when generating text. Some of the common inference arguments include `max_output_tokens`, `temperature`, and other generation parameters.\n\nConsult the [Google Generative AI documentation](https://github.com/googleapis/python-genai) for the full list of parameters.\n"
  },
  {
    "path": "docs/features/models/index.md",
    "content": "---\ntitle: Models\n---\n\n# Models\n\n## Overview\n\nOutlines models are objects that wrap an inference client or engine. Models provide a standardized interface to generate structured text.\n\nAll Outlines model classes have an associated loader function to facilitate initializing a model instance. The name of this function is `from_` plus the name of the model in lower-case letters. For instance, Outlines has a `Transformers` model and an associated `from_transformers` loader function. The parameters to load a model are specific to each provider, please consult the documentation of the model you want to use for more information.\n\nAfter having created a model instance, you can either directly call it to generate text or first create a reusable generator that you would then call.\n\nThe input you must provide to a model to generate text can be a simple text prompt or a vision or chat input for models that support them. See the [model inputs section](../core/inputs.md) for more information on model inputs formats.\n\nIn all cases, you can provide an `output_type` to constrain the format of the generation output. See the [output types section](../core/output_types.md) for more information on constrained generation.\n\nFor instance:\n\n```python\nfrom outlines import from_transformers, Generator\nimport transformers\n\n# Create a model\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    transformers.AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n)\n\n# Call it directly\nresponse = model(\"How many countries are there in the world\", max_new_tokens=20)\nprint(response) # 'There are 200 countries in the world.'\n\n# Call it directly with an output_type\nresponse = model(\"How many countries are there in the world\", int, max_new_tokens=20)\nprint(response) # '200'\n\n# Create a generator first and then call it\ngenerator = Generator(model, int)\nresponse = generator(\"How many countries are there in the world\")\nprint(response) # '200'\n```\n\nSome models support streaming through a `stream` method. It takes the same argument as the `__call__` method, but returns an iterator instead of a string.\n\nFor instance:\n\n```python\nfrom outlines import from_openai, Generator\nimport openai\n\n# Create the model\nmodel = from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell a short story about a cat.\", max_tokens=50):\n    print(chunk) # 'This...'\n```\n\nAdditionally, some models support batch processing through a `batch` method. It's similar to the `__call__` method, but takes a list of prompts instead of a single prompt and returns a list of strings.\n\nFor instance:\n\n```python\nfrom outlines import from_transformers, Generator\nimport transformers\n\n# Create a model\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    transformers.AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n)\n\n# Call it directly\nresponse = model.batch([\"What's the capital of Latvia?\", \"What's the capital of Estonia?\"], max_new_tokens=20)\nprint(response) # ['Riga', 'Tallinn']\n```\n\n## Features Matrix\n\nIn alphabetical order:\n\n| | [Anthropic](../../models/anthropic) | [Dottxt](../../models/dottxt) | [Gemini](../../models/gemini) | [LlamaCpp](../../models/llamacpp) | [MLXLM](../../models/mlxlm) | [Mistral](../../models/mistral) | [Ollama](../../models/ollama) | [OpenAI](../../models/openai) | [SGLang](../../models/sglang) | [TGI](../../models/tgi) | [Transformers](../../models/transformers) | [Transformers MultiModal](../../models/transformers_multimodal) | [VLLM](../../models/vllm) | [VLLMOffline](../../models/vllm_offline) |\n|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n| **Output Types** | | | | | | | | | | | | | | |\n| Simple Types | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |\n| JSON Schema | ❌ | ✅ | 🟠 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |\n| Multiple Choice | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |\n| Regex | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |\n| Grammar | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | 🟠 | ❌ | ✅ | ✅ | ✅ | ✅ |\n| **Generation Features** | | | | | | | | | | | | | | |\n| Async | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |\n| Streaming | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |\n| Vision | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |\n| Batching | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ |\n\n## Model Types\n\nModels can be divided into two categories: local models and server-based models.\n\nIn the case of local models, the text generation happens within the inference library object used to instantiate the model. This gives Outlines direct access to the generation process (through a logits processor) and means all structured generation output types are available.\n\nThe local models available are the following:\n\n- LlamaCpp\n- MLXLM\n- Transformers\n- TransformersMultiModal\n- VLLMOffline\n\nIn the case of server-based models, the model is initialized with a client that sends a request to a server that is in charge of the actual text generation. As a result, we have limited control over text generation and some output types are not supported. The server on which the text generation happens can either be remote (with OpenAI or Anthopic for instance) or local (with SGLang for instance).\n\nThe server-based models available are the following:\n\n- Anthropic\n- Dottxt\n- Gemini\n- Mistral\n- Ollama\n- OpenAI\n- SgLang\n- TGI\n- VLLM\n\nSome models have an async version. To use them, just pass the async version of the provider object to their loading function. It will then return a `Async<ModelName>` instance with the same methods and features as the regular sync instance.\n\nFor instance:\n\n```python\nfrom outlines import from_tgi\nfrom huggingface_hub import AsyncInferenceClient\n\nmodel = from_tgi(\n    AsyncInferenceClient(\"http://localhost:8000/v1\")\n)\nprint(type(model)) # outlines.models.tgi.AsyncTGI\n```\n\nThe models that have an async version are the following:\n\n- Mistral\n- Ollama\n- OpenAI\n- SgLang\n- TGI\n- VLLM\n"
  },
  {
    "path": "docs/features/models/llamacpp.md",
    "content": "---\ntitle: llama.cpp\n---\n\n# llama.cpp\n\nOutlines provides an integration with [Llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python library](https://github.com/abetlen/llama-cpp-python). Llamacpp allows to run quantized models on machines with limited compute.\n\n!!! Installation\n\n    You need to install the `llama-cpp-python` library to use the llama.cpp integration. Install all optional dependencies of the `LlamaCpp` model with: `\"pip install \"outlines[llamacpp]\"`.\n\n    See the [llama-cpp-python Github page](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#supported-backends) for instructions on installing with CUDA, Metal, ROCm and other backends.\n\n\n## Model Initialization\n\nTo load the model, you can use the `from_llamacpp` function. The first argument of the function is a `Llama` model instance from the `llama_cpp` library. Consult the [Llama class API reference](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) for detailed information on how to create a model instance and on the various available parameters.\n\nYou can also pass a `chat_mode` argument to `from_llamacpp`. If `True` (default), the model will regard all `str` inputs as user messages in a chat conversation. If `False`, the model will regard all `str` inputs as plain text prompts.\n\nFor instance:\n\n```python\nimport outlines\nfrom llama_cpp import Llama\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n```\n\nYou can also disable chat mode:\n\n```python\nimport outlines\nfrom llama_cpp import Llama\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    ),\n    chat_mode=False,\n)\n```\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nfrom llama_cpp import Llama\n\n# Create the model\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\n# Call it to generate text\nresult = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(result) # 'Riga'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `LlamaCpp` model. To do so, call the model with a `Chat` instance.\n\nFor instance:\n\n```python\nimport outlines\nfrom llama_cpp import Llama\nfrom outlines.inputs import Chat\n\n# Create the model\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\n# Create the prompt containing the text and the image\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"assistant\", \"content\": \"What's the capital of Latvia?\"},\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'Riga.'\n```\n\n#### Streaming\n\nThe `LlamaCpp` model also supports streaming.\n\nFor instance:\n\n```python\nimport outlines\nfrom llama_cpp import Llama\n\n# Create the model\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\n# Stream text\nfor chunk in model.stream(\"Write a short story about a cat.\", max_tokens=100):\n    print(chunk) # 'In...'\n```\n\n## Structured Generation\n\nThe `LlamaCpp` model supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model.\n\n### Basic Type\n\n```python\nimport outlines\nfrom llama_cpp import Llama\n\noutput_type = int\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```\n\n### JSON Schema\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nimport outlines\nfrom llama_cpp import Llama\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\nresult = model(\"Create a character.\", output_type=Character, max_tokens=200)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n### Multiple Choice\n\n```python\nfrom typing import Literal\nimport outlines\nfrom llama_cpp import Llama\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\nresult = model(\"What is the capital of France?\", output_type)\nprint(result) # 'Paris'\n```\n\n### Regex\n\n```python\nfrom outlines.types import Regex\nimport outlines\nfrom llama_cpp import Llama\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\nresult = model(\"Generate a fake social security number.\", output_type)\nprint(result) # '782-32-3789'\n```\n\n### Context-free grammar\n\n```python\nfrom outlines.types import CFG\nimport outlines\nfrom llama_cpp import Llama\n\noutput_type = CFG(\"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\")\n\nmodel = outlines.from_llamacpp(\n    Llama.from_pretrained(\n        repo_id=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\",\n        filename=\"mistral-7b-instruct-v0.2.Q5_K_M.gguf\",\n    )\n)\n\nresult = model(\"Are you feeling good today?\", output_type)\nprint(result) # 'yes'\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `__call__` method of the `llama_cpp.Llama` model. Some common inference arguments include `max_tokens`, `temperature`, `frequency_penalty` and `top_p`.\n\nSee the [llama-cpp-python documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__) for more information on inference parameters.\n"
  },
  {
    "path": "docs/features/models/mistral.md",
    "content": "# Mistral\n\n!!! Installation\n\n    You need to install the `mistralai` library to be able to use the Mistral API in Outlines. Install all optional dependencies of the `Mistral` model with: `pip install \"outlines[mistral]\"`.\n\n    You also need to have an Mistral API key. This API key must either be set as an environment variable called `MISTRAL_API_KEY` or be provided to the `mistralai.Mistral` class when instantiating it.\n\n## Model Initialization\n\nTo create an `Mistral` or `AsyncMistral` model instance, you can use the `from_mistral` function. It takes 3 arguments:\n\n- `client`: a `mistralai.Mistral` instance\n- `model_name` (optional): the name of the model you want to use\n- `async_client` (optional): whether it should create a sync or an async model\n\nAs the `mistralai` library uses a single class to handle both sync and async requests, you must set the `async_client` argument to True to get an `AsyncMistral` model.\n\nFor instance:\n\n```python\nimport mistralai\nimport outlines\n\n# Create the Mistral client\nclient = mistral.Mistral()\n\n# Create a sync model\nmodel = outlines.from_mistral(\n    client,\n    \"mistral-large-latest\"\n)\n\n# Create aa async model\nmodel = outlines.from_mistral(\n    client,\n    \"mistral-large-latest\",\n    True\n)\n```\n\nThe mistralai python SDK provides methods to query the API for a list of [all available models](https://docs.mistral.ai/getting-started/models/models_overview/#api-versioning),\nincluding paid endpoints for [premium models](https://docs.mistral.ai/getting-started/models/models_overview/) in addition to open weights.\n\n## Text Generation\n\nOnce you've created your Outlines `Mistral` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport mistralai\nimport outlines\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(response) # 'Riga'\n```\n\n#### Vision\n\nSome Mistral models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nimport mistralai\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `Mistral` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport mistralai\nimport outlines\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `Mistral` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport mistralai\nimport outlines\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n    print(chunk) # 'Once...'\n```\n\n## Structured Generation\n\nMistral provides supports for some forms of structured output: JSON schemas and JSON syntax. To use it, call the model with an `output_type` on top of your prompt.\n\n#### JSON Schema\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nimport mistralai\nimport outlines\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n# Call it with the output type to generate structured text\nresult = model(\"Create a character, use the json format.\", Character, top_p=0.1)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n#### JSON Syntax\n\nWhat we mean by JSON syntax is what is sometimes called JSON mode, meaning that the model will return a valid JSON, but you do not get to specify its structure. To use this JSON mode, provide the `dict` type as an output type.\n\n```python\nimport mistralai\nimport outlines\n\n## Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\"\n)\n\n\n# Call it with the output type to generate structured text\nresult = model(\"Create a character, use the json format.\", dict, temperature=0.5)\nprint(result) # '{\"first_name\": \"Henri\", \"last_name\": \"Smith\", \"height\": \"170\"}'\n```\n\n## Asynchronous Calls\n\nAll features presented above for the sync model are also available for the async model.\n\nFor instance:\n\n```python\nimport asyncio\nimport mistralai\nimport outlines\nfrom pydantic import BaseModel\nfrom typing import List\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_mistral(\n    mistralai.Mistral(),\n    \"mistral-large-latest\",\n    True\n)\n\nasync def text_generation():\n    # Regular generation\n    response = await model(\"What's the capital of Latvia?\", max_tokens=20)\n    print(response) # 'Riga'\n\n    # Streaming\n    async for chunk in  model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n        print(chunk, end=\"\") # 'Once...'\n\n    # Structured generation\n    result = await model(\"Create a character, use the json format.\", Character, top_p=0.1)\n    print(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\n    print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n\nasyncio.run(text_generation())\n```\n\n## Inference arguments\n\nWhen calling the model, you can provide keyword arguments that will be passed down to the `chat.complete` method of the Mistral client and its async and streaming equivalents. Some of the most common arguments include `max_tokens`, `temperature`, `stop` and `top_p`.\n\nAnother keyword argument of interest is `n`. If set with an integer value superior to 1, Mistral will generate several sample responses and you will receive a list of strings as a response to your model call.\n\nSee the [Mistral API documentation](https://docs.mistral.ai/api/#tag/chat) for the full list of available arguments.\n\n\n## Troubleshooting\n\n- **ImportError: No module named 'mistralai'**\n  → Run `pip install mistralai`.\n\n- **Authentication Error**\n  → Verify `MISTRAL_API_KEY` is set and valid. Test with the [Mistral Playground](https://chat.mistral.ai).\n\n- **Schema Error (e.g., \"Mistral does not support your schema\")**\n  → Ensure no `pattern` fields in Pydantic (Outlines sets `additionalProperties: false`); try a simpler schema or a different Outlines model (local models in particular).\n\n- **Model Not Found Error**\n  → Confirm the model name (e.g., `\"mistral-small-latest\"`) and your subscription tier. Check [docs](https://docs.mistral.ai/getting-started/models/).\n\n- **Rate Limits or Quotas**\n  → Monitor usage in the Mistral console; upgrade your plan for higher limits.\n\n- **Input Validation Errors**\n  → Ensure Chat messages use valid roles (`system`, `user`, `assistant`); list inputs start with strings.\n\n\n*Last updated: October 2, 2025*\n"
  },
  {
    "path": "docs/features/models/mlxlm.md",
    "content": "---\ntitle: mlx-lm\n---\n\n# mlx-lm\n\nOutlines provides an integration with [mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms), allowing models to be run quickly on Apple Silicon via the [mlx](https://ml-explore.github.io/mlx/build/html/index.html) library.\n\n!!! Note \"Installation\"\n\n    You need a device that [supports Metal](https://support.apple.com/en-us/102894) to use the mlx-lm integration.\n\n    You need to install the `mlx` and `mlx-lm` libraries to be able to use mlx in Outlines. Install all optional dependencies of the `MLXLM` model with: `pip install \"outlines[mlxlm]\"`.\n\n## Model Initialization\n\nTo create a MLXLM model instance, you can use the `from_mlxlm` function. It takes 2 arguments:\n\n- `model`: an `mlx.nn.Module` instance\n- `tokenizer`: a `transformers.PreTrainedTokenizer` instance\n\nHowever, we recommend you simply pass on the output of the `mlx_lm.load` function (it takes a model name as an argument).\n\nFor instance:\n\n```python\nimport outlines\nimport mlx_lm\n\n# Create the model\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n```\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nimport mlx_lm\n\n# Load the model\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\n# Call it to generate text\nresult = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(result) # 'Riga'\n```\n\n#### Chat\n\nYou can use chat inputs with the `MLXLM` model. To do so, call the model with a `Chat` instance.\n\nFor instance:\n\n```python\nimport outlines\nimport mlx_lm\nfrom outlines.inputs import Chat\n\n# Load the model\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\n# Create the prompt containing the text and the image\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"assistant\", \"content\": \"What's the capital of Latvia?\"},\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'Riga.'\n```\n\n#### Streaming\n\nThe `MLXLM` model also supports streaming. For instance:\n\n```python\nimport outlines\nimport mlx_lm\n\n# Load the model\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\n# Stream text\nfor chunk in model.stream(\"Write a short story about a cat.\", max_tokens=100):\n    print(chunk) # 'In...'\n```\n\n#### Batch Generation\n\nThe `MLXLM` model supports generating text in batches. To do so, use the `batch` method and provide a list of strings as a model input. However, constrained generation is not supported with batching, so you cannot provide an `output_type`. For instance:\n\n```python\nimport outlines\nimport mlx_lm\n\n# Load the model\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\n# Generate text in batches\nresult = model.batch([\"What's the capital of Lithuania?\", \"What's the capital of Latvia?\"], max_tokens=20)\nprint(result) # ['Vilnius', 'Riga']\n```\n\n## Structured Generation\n\nAs a local model, `MLXLM` supports all forms of structured generation available in Outlines.\n\n#### Basic Type\n\n```python\nimport outlines\nimport mlx_lm\n\noutput_type = int\n\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```\n\n#### JSON Schema\n\n```python\nfrom pydantic import BaseModel\nfrom typing import List\nimport outlines\nimport mlx_lm\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\nresult = model(\"Create a character.\", output_type=Character)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n#### Multiple Choice\n\n```python\nfrom typing import Literal\nimport outlines\nimport mlx_lm\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\nresult = model(\"What is the capital of France?\", output_type)\nprint(result) # 'Paris'\n```\n\n#### Regex\n\n```python\nfrom outlines.types import Regex\nimport outlines\nimport mlx_lm\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\nresult = model(\"Generate a fake social security number.\", output_type)\nprint(result) # '782-32-3789'\n```\n\n#### Context-Free Grammar\n\n```python\nfrom outlines.types import CFG\nimport outlines\nimport mlx_lm\n\narithmetic_grammar = \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\noutput_type = CFG(arithmetic_grammar)\n\nmodel = outlines.from_mlxlm(\n    *mlx_lm.load(\"mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit\")\n)\n\nresult = model(\"Write an addition.\", output_type, max_tokens=20)\nprint(result) # '23 + 48'\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `mlx_lm.generate` function used to generate text.\n\nSee the [MLXLM documentation](https://github.com/ml-explore/mlx-lm) for more information on inference parameters.\n"
  },
  {
    "path": "docs/features/models/ollama.md",
    "content": "---\ntitle: Ollama\n---\n\n# Ollama\n\n!!! Installation\n\n    To be able to use Ollama in Outlines, you must install both Ollama and the optional dependency libraries of the model.\n\n    - To download Ollama: https://ollama.com/download\n    - To install the ollama python sdk: `pip install \"outlines[ollama]\"`\n\n    Consult the [`ollama` documentation](https://github.com/ollama/ollama-python) for detailed information on installation and client initialization.\n\n## Model Initialization\n\nTo create an Ollama model instance, you can use the `from_ollama` function. It takes 2 arguments:\n\n- `client`: an `ollama.Client` or `ollama.AsyncClient` instance\n- `model_name`: the name of the model you want to use\n\nBased on whether the inference client instance is synchronous or asynchronous, you will receive an `Ollama` or an `AsyncOllama` model instance.\n\nFor instance:\n\n```python\nimport ollama\nimport outlines\n\n# Create the client or async client\nclient = ollama.Client()\nasync_client = ollama.AsyncClient()\n\n# Create a sync model\nmodel = outlines.from_ollama(\n    client,\n    \"qwen2.5vl:3b\",\n)\n\n# Create an async model\nmodel = outlines.from_ollama(\n    async_client,\n    \"qwen2.5vl:3b\",\n)\n```\n\nYou can find the list of available models on the [Ollama library](https://ollama.com/library).\n\n## Text Generation\n\nOnce you've created your Outlines `Ollama` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport ollama\nimport outlines\n\n# Create the model\nmodel = outlines.from_ollama(ollama.Client(), \"qwen2.5vl:3b\")\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\")\nprint(response) # 'Riga'\n```\n\n#### Vision\n\nSome Ollama models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\n```python\nimport io\nimport requests\nimport PIL\nimport ollama\nimport outlines\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_ollama(\n    ollama.Client(),\n    \"qwen2.5vl:3b\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Generate text\nresponse = model(prompt)\nprint(response) # The image shows a black puppy with a curious and attentive expression.\n```\n\n#### Chat\n\nYou can also use chat inputs with the `Ollama` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport ollama\nimport outlines\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_ollama(\n    ollama.Client(),\n    \"qwen2.5vl:3b\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `Anthropic` model supports streaming through the `stream` method.\n\n```python\nimport ollama\nimport outlines\n\n# Create the model\nmodel = outlines.from_ollama(ollama.Client(), \"qwen2.5vl:3b\")\n\n# Stream text\nfor chunk in model.stream(\"Write a short story about a cat\"):\n    print(chunk) # 'In...'\n```\n\n## Asynchronous Calls\n\nOllama supports asynchronous operations by passing an `AsyncClient` instead of a regular `Client`. This returns an `AsyncOllama` model instance that supports async/await patterns.\n\n### Basic Async Generation\n\n```python\nimport asyncio\nimport outlines\nimport ollama\n\nasync def generate_text():\n    # Create an async model\n    async_client = ollama.AsyncClient()\n    async_model = outlines.from_ollama(async_client, \"qwen2.5vl:3b\")\n\n    result = await async_model(\"Write a haiku about Python.\")\n    print(result)\n\nasyncio.run(generate_text())\n```\n\n### Async Streaming\n\nThe async model also supports streaming with async iteration:\n\n```python\nimport asyncio\nimport outlines\nimport ollama\n\nasync def stream_text():\n    async_client = ollama.AsyncClient()\n    async_model = outlines.from_ollama(async_client, \"qwen2.5vl:3b\")\n\n    async for chunk in async_model.stream(\"Tell me a story about a robot.\"):\n        print(chunk, end=\"\")\n\nasyncio.run(stream_text())\n```\n\n### Concurrent Async Requests\n\nOne of the main benefits of async calls is the ability to make multiple concurrent requests:\n\n```python\nimport asyncio\nimport outlines\nimport ollama\n\nasync def generate_multiple():\n    async_client = ollama.AsyncClient()\n    async_model = outlines.from_ollama(async_client, \"qwen2.5vl:3b\")\n\n    # Define multiple prompts\n    prompts = [\n        \"Write a tagline for a coffee shop.\",\n        \"Write a tagline for a bookstore.\",\n        \"Write a tagline for a gym.\"\n    ]\n\n    tasks = [async_model(prompt) for prompt in prompts]\n    results = await asyncio.gather(*tasks)\n\n    for prompt, result in zip(prompts, results):\n        print(f\"{prompt}\\n{result}\\n\")\n\nasyncio.run(generate_multiple())\n```\n\n## Structured Generation\n\nOllama only provides support for structured generation based on a JSON schema. To use it, call the model with a JSON schema object as an `output_type` on top of your prompt.\n\nFor instance:\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nimport ollama\nimport outlines\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_ollama(ollama.Client(), \"tinyllama\")\n\n# Call it with the output type to generate structured text\nresult = model(\"Create a character\", Character)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n## Inference arguments\n\nWhen calling the model, you can provide keyword arguments that will be passed down to the `generate` method of the Ollama client.\n\nConsult the [Ollama REST API documentation](https://github.com/ollama/ollama/blob/main/docs/api#generate-a-completion) for the full list of inference parameters.\n"
  },
  {
    "path": "docs/features/models/openai.md",
    "content": "# OpenAI\n\n!!! Installation\n\n    You need to install the `openai` library to be able to use the OpenAI API in Outlines. Install all optional dependencies of the `OpenAI` model with: `pip install \"outlines[openai]\"`.\n\n    You also need to have an OpenAI API key. This API key must either be set as an environment variable called `OPENAI_API_KEY` or be provided to the `openai.OpenAI` class when instantiating it.\n\n## Model Initialization\n\nTo create an OpenAI model instance, you can use the `from_openai` function. It takes 2 arguments:\n\n- `client`: an `openai.OpenAI`, `openai.AzureOpenAI`, `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance\n- `model_name`: the name of the model you want to use\n\nBased on whether the inference client instance is synchronous or asynchronous, you will receive an `OpenAI` or an `AsyncOpenAI` model instance.\n\nFor instance:\n\n```python\nimport outlines\nimport openai\n\n# Create the client or async client\nclient = openai.OpenAI()\nasync_client = openai.AsyncOpenAI()\n\n# Create a sync model\nmodel = outlines.from_openai(\n    client,\n    \"gpt-4o\"\n)\n\n# Create aa async model\nmodel = outlines.from_openai(\n    async_client,\n    \"gpt-4o\"\n)\n```\n\nCheck the [OpenAI documentation](https://platform.openai.com/docs/models) for an up-to-date list of available models. As shown above, you can use Azure OpenAI in Outlines the same way you would use OpenAI, just provide an `openai.AzureOpenAI` instance to the Outlines model class.\n\n## Text Generation\n\nOnce you've created your Outlines `OpenAI` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(response) # 'Riga'\n```\n\n#### Vision\n\nSome OpenAI models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nimport openai\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `OpenAI` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport openai\nimport outlines\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `OpenAI` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.OpenAI(),\n    \"gpt-4o\"\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n    print(chunk) # 'Once...'\n```\n\n## Structured Generation\n\nOpenAI provides supports for some forms of structured output: JSON schemas and JSON syntax. To use it, call the model with an `output_type` on top of your prompt.\n\n#### JSON Schema\n\n```python\nfrom typing import List\nfrom pydantic import BaseModel\nimport openai\nimport outlines\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(), \"gpt-4o\")\n\n# Call it with the output type to generate structured text\nresult = model(\"Create a character, use the json format.\", Character, top_p=0.1)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n#### JSON Syntax\n\nWhat we mean by JSON syntax is what is sometimes called JSON mode, meaning that the model will return a valid JSON, but you do not get to specify its structure. To use this JSON mode, provide the `dict` type as an output type.\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(), \"gpt-4o\")\n\n# Call it with the output type to generate structured text\nresult = model(\"Create a character, use the json format.\", dict, temperature=0.5)\nprint(result) # '{\"first_name\": \"Henri\", \"last_name\": \"Smith\", \"height\": \"170\"}'\n```\n\n## Asynchronous Calls\n\nAll features presented above for the sync model are also available for the async model.\n\nFor instance:\n\n```python\nimport asyncio\nimport openai\nimport outlines\nfrom pydantic import BaseModel\nfrom typing import List\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\n# Create the model\nmodel = outlines.from_openai(\n    openai.AsyncOpenAI(),\n    \"gpt-4o\"\n)\n\nasync def text_generation():\n    # Regular generation\n    response = await model(\"What's the capital of Latvia?\", max_tokens=20)\n    print(response) # 'Riga'\n\n    # Streaming\n    async for chunk in  model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n        print(chunk, end=\"\") # 'Once...'\n\n    # Structured generation\n    result = await model(\"Create a character, use the json format.\", Character, top_p=0.1)\n    print(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\n    print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n\nasyncio.run(text_generation())\n```\n\n## Inference arguments\n\nWhen calling the model, you can provide keyword arguments that will be passed down to the `chat.completions.create` method of the OpenAI client. Some of the most common arguments include `max_tokens`, `temperature`, `stop` and `top_p`.\n\nAnother keyword argument of interest is `n`. If set with an integer value superior to 1, OpenAI will generate several sample responses and you will receive a list of strings as a response to your model call.\n\nSee the [OpenAI API documentation](http://platform.openai.com/docs/api-reference/chat/create) for the full list of available arguments.\n"
  },
  {
    "path": "docs/features/models/openai_compatible.md",
    "content": "# OpenAI-Compatible APIs\n\nMany inference providers offer OpenAI-compatible APIs, allowing you to use the familiar OpenAI SDK while connecting to different backends. Outlines allows you can leverage various providers while maintaining consistent code.\n\n## What are OpenAI-Compatible APIs?\n\nOpenAI-compatible APIs implement the same REST endpoints and request/response formats as OpenAI's API, but serve different models or run on different infrastructure. This allows you to use the `openai` Python library with any compatible provider by simply changing the `base_url`.\n\n!!! Installation\n\n    You need to install the `openai` library to be able to use the OpenAI-compatible APIs in Outlines. Install all optional dependencies of the `OpenAI` model with: `pip install \"outlines[openai]\"`.\n\n## General Usage Pattern\n\nThe standard approach is to use the OpenAI SDK with a custom base URL:\n\n```python\nimport openai\nimport outlines\n\n# Point to your OpenAI-compatible endpoint\nclient = openai.OpenAI(\n    base_url=\"https://your-provider.com/v1\",  # Custom endpoint\n    api_key=\"your-api-key\"\n)\n\n# Use with Outlines\nmodel = outlines.from_openai(client, \"model-name\")\n```\n\n## Important: Provider-Specific Parameters\n\n!!! Warning \"API-Specific Parameters\"\n\n    Some providers require additional parameters in the API request for structured generation to work properly. These are typically passed as extra arguments when calling the model.\n\n    For example, some providers may need special parameters in the request body to enable guided generation or specify constraints. Always consult your provider's documentation for structured generation requirements.\n\n## Popular OpenAI-Compatible Providers\n\nMany providers offer OpenAI-compatible endpoints:\n\n- **Groq**\n- **Together AI**\n- **Anyscale**\n- **Fireworks AI**\n- **Perplexity**\n- **Local servers** (LocalAI, etc.)\n\n## Configuration Examples\n\n### Basic Setup\n```python\nimport openai\nimport outlines\n\n# Generic OpenAI-compatible setup\nclient = openai.OpenAI(\n    base_url=\"https://api.your-provider.com/v1\",\n    api_key=\"your-api-key\"\n)\n\nmodel = outlines.from_openai(client, \"provider-model-name\")\n```\n\n### With Authentication Headers\n```python\nimport openai\nimport outlines\n\n# Some providers need custom headers\nclient = openai.OpenAI(\n    base_url=\"https://api.your-provider.com/v1\",\n    api_key=\"your-api-key\",\n    default_headers={\"Custom-Header\": \"value\"}\n)\n\nmodel = outlines.from_openai(client, \"provider-model-name\")\n```\n\n## Related Documentation\n\nFor specific implementations that use OpenAI-compatible APIs:\n\n- [SGLang](sglang.md): Local inference server with OpenAI-compatible endpoints\n- [vLLM](vllm.md): High-performance inference with OpenAI-compatible API\n- [OpenAI](openai.md): The original OpenAI API implementation\n"
  },
  {
    "path": "docs/features/models/openrouter.md",
    "content": "# Openrouter\n\n!!! Installation\n\n    [OpenRouter](https://openrouter.ai/docs/api-reference/overview) uses the same API as OpenAI, so both services are [interoperable](./openai_compatible.md) using the `openai` library. Install all optional dependencies of the `OpenAI` model with: `pip install \"outlines[openai]\"`.\n\n    You also need to have an Openrouter API key. This API key must either be set as an environment variable called `OPENAI_API_KEY` or be provided to the `openai.OpenAI` class when instantiating it.\n\n## Model Initialization\n\nTo create a model instance, you can use the `from_openai` function. It takes 2 arguments:\n\n- `client`: an `openai.OpenAI` instance\n- `model_name`: the name of the model you want to use, defined as `provider/model`\n\nFor instance:\n\n```python\nimport outlines\nimport openai\n\n# Create the client\nclient = openai.OpenAI(\n    base_url=\"https://openrouter.ai/api/v1\",\n    api_key=\"OPENAI_API_KEY\",\n)\n\n# Create the model\nmodel = outlines.from_openai(\n    client,\n    \"x-ai/grok-4\"\n)\n```\n\nLeaving an empty string in the model name field will lead OpenRouter to use your default model defined in [settings](https://openrouter.ai/settings/preferences).\n\nThe [OpenRouter](https://openrouter.ai/models) website lists available models. Keep in mind that some models do not support `json_schema` response formats and may return a 400 error code as a result.\n\n## Related Documentation\n\nFor specific implementations that use OpenAI-compatible APIs:\n\n- [OpenAI](./openai.md): The original OpenAI API implementation\n- [OpenAI compatible API](./openai_compatible.md): Details on how to use OpenAI-compatible APIs\n"
  },
  {
    "path": "docs/features/models/sglang.md",
    "content": "---\ntitle: SGLang\n---\n\n# SGLang\n\n## Prerequisites\n\nThe Outlines `SGLang` model is intended to be used along with an SGLang instance running on a separate server (can be local or remote). Make sure you have a SGLang server running and accessible before using the `SGLang` model. For instance by running:\n\n```shell\npip install \"sglang[all]\"\n\npython -m sglang.launch_server \\\n  --model-path NousResearch/Meta-Llama-3-8B-Instruct \\\n  --host 0.0.0.0 \\\n  --port 30000\n```\n\nFollow the [Installation instructions](https://docs.sglang.ai/start/install.html) for more information on how to set up a SGLang server for your particular setup.\n\nAs the SGLang client relies on the `openai` python sdk, you need to have the `openai` package installed. Install all optional dependencies of the `SGLang` model with: `pip install \"outlines[sglang]\"`.\n\nWhen launching your SGLang server, you can specify the backend engine to use for structured generation through the `grammar-backend` cli argument. Add `--grammar-backend outlines` to your command to use Outlines instead of the default engine.\n\n## Model Initialization\n\nTo load the model, you can use the `from_sglang` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Make sure the value of the `base_url` argument of the `OpenAI` client points to your running SGLang server. Consult the [SGLang documentation](https://docs.sglang.ai/backend/send_request.html) on using an OpenAI client with an SGLang server for more information.\n\nBased on whether the `openai` client instance is synchronous or asynchronous, you will receive a `SGLang` or `AsyncSGLang` model instance.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the OpenAI client\nsync_openai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nasync_openai_client = openai.AsyncOpenAI(base_url=\"http://localhost:11434\")\n\n# Create a sync model\nsync_model = outlines.from_sglang(sync_openai_client)\nprint(type(sync_model)) # <class 'outlines.models.sglang.SGLang'>\n\n# Create an async model\nasync_model = outlines.from_sglang(async_openai_client)\nprint(type(async_model)) # <class 'outlines.models.sglang.AsyncSGLang'>\n```\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(base_url=\"http://localhost:11434\"))\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(response) # 'Riga'\n```\n\n#### Vision\n\nSome models you can run with SGLang support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nimport openai\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(base_url=\"http://localhost:11434\"))\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `SGLang` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport openai\nimport outlines\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(base_url=\"http://localhost:11434\"))\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Streaming\n\nFinally, the `SGLang` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_openai(openai.OpenAI(base_url=\"http://localhost:11434\"))\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n    print(chunk) # 'Once...'\n```\n\n## Structured Generation\n\nSGLang supports all output types available in Outlines (context-free grammars with caveats though, see the subsection below for more details). Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models.\n\n### Simple Type\n\n```python\nimport openai\nimport outlines\n\noutput_type = int\n\nopenai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nmodel = outlines.from_sglang(openai_client)\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```\n\n### JSON Schema\n\n```python\nimport openai\nimport outlines\nfrom pydantic import BaseModel\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nopenai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nmodel = outlines.from_sglang(openai_client)\n\nresult = model(\"Create a character.\", Character, frequency_penalty=1.5)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n### Multiple Choice\n\n```python\nfrom typing import Literal\nimport openai\nimport outlines\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nopenai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nmodel = outlines.from_sglang(openai_client)\n\nresult = model(\"What is the capital of France?\", output_type, temperature=0)\nprint(result) # 'Paris'\n```\n\n### Regex\n\n```python\nimport openai\nimport outlines\nfrom outlines.types import Regex\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nopenai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nmodel = outlines.from_sglang(openai_client)\n\nresult = model(\"Generate a fake social security number.\", output_type, top_p=0.1)\nprint(result) # '782-32-3789'\n```\n\n### Context-Free Grammar\n\nSGLang supports grammars, but expects an EBNF format instead of the Lark format Outlines uses. Thus, to use a context-free grammar with SGLang, provide a string using the EBNF syntax to the Outlines `CFG` object.\n\n```python\nimport openai\nimport outlines\nfrom outlines.types import CFG\n\nebnf_grammar = \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\noutput_type = CFG(ebnf_grammar)\n\nopenai_client = openai.OpenAI(base_url=\"http://localhost:11434\")\nmodel = outlines.from_sglang(openai_client)\n\nresult = model(\"Is the weather good today?\", output_type)\nprint(result) # 'yes'\n```\n\n### Async Structured Generation\n\nAll structured generation features work seamlessly with async models:\n\n```python\nimport asyncio\nimport openai\nimport outlines\nfrom typing import List\nfrom pydantic import BaseModel\n\nclass User(BaseModel):\n    name: str\n    email: str\n    age: int\n\nasync def generate_user():\n    async_client = openai.AsyncOpenAI(base_url=\"http://localhost:11434\")\n    async_model = outlines.from_sglang(async_client)\n\n    result = await async_model(\"Generate a random user profile.\", output_type=User)\n    user = User.model_validate_json(result)\n    print(f\"Name: {user.name}, Email: {user.email}, Age: {user.age}\")\n\nasyncio.run(generate_user())\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `chat.completions.create` method of the OpenAI client.\n\nAn optional parameter of particular interest is `extra_body`, which is a dictionary containing arguments that are specific to SGLang and are not part of the standard `openai` interface.\n\nSee the [SGLang documentation](https://docs.sglang.ai/backend/openai_api_completions.html) on parameters for the OpenAI-compatible server for more information on inference parameters.\n"
  },
  {
    "path": "docs/features/models/tgi.md",
    "content": "---\ntitle: TGI\n---\n\n# TGI\n\n## Prerequisites\n\nThe Outlines `TGI` model is intended to be used along with a HuggingFace `Text Generation Inference` server (running locally or remotely). Make sure you have a TGI server running before using the `TGI` model. For instance running:\n\n```shell\ndocker run \\\n  --gpus all \\\n  --shm-size 1g \\\n  -p 8080:80 \\\n  ghcr.io/huggingface/text-generation-inference:3.3.4 \\\n  --model-id NousResearch/Meta-Llama-3-8B-Instruct\n```\n\nPlease consult the [installation guide](https://huggingface.co/docs/text-generation-inference/en/quicktour) for more information about how to run TGI with your particular setup.\nAs the TGI client relies on the `huggingface_hub` python package, you need to have it installed. Install all optional dependencoes of the `TGI` model with: `pip install \"outlines[tgi]\"`\n\n## Model Initialization\n\nTo load the model, you can use the `from_tgi` function. The argument of the function is either an `InferenceClient` or `AsyncInferenceClient` instance from the `huggingface_hub` library. Consult the [HuggingFace documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client) for more information on their inference client.\n\nBased on whether the inference client instance is synchronous or asynchronous, you will receive a `TGI` or an `AsyncTGI` model instance.\n\nFor instance:\n\n```python\nimport outlines\nimport huggingface_hub\n\n# Create the inference client\nclient = huggingface_hub.InferenceClient(\"http://localhost:11434\")\nasync_client = huggingface_hub.AsyncInferenceClient(\"http://localhost:11434\")\n\n# Create a sync model\nsync_model = outlines.from_tgi(client)\nprint(type(sync_model))  # <class 'outlines.models.tgi.TGI'>\n\n# Create an async model\nasync_model = outlines.from_tgi(async_client)\nprint(type(async_model))  # <class 'outlines.models.tgi.AsyncTGI'>\n```\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nimport huggingface_hub\n\n# Create the model\nclient = huggingface_hub.InferenceClient(\"http://localhost:11434\")\nmodel = outlines.from_tgi(client)\n\n# Call it to generate text\nresult = model(\"Write a short story about a cat.\", stop_sequences=[\".\"])\nprint(result) # 'In a quiet village where the cobblestones hummed softly beneath the morning mist...'\n```\n\nThe `TGI` model supports streaming. For instance:\n\n```python\nimport outlines\nimport huggingface_hub\n\n# Create the model\nclient = huggingface_hub.InferenceClient(\"http://localhost:11434\")\nmodel = outlines.from_tgi(client)\n\n# Stream text\nfor chunk in model.stream(\"Write a short story about a cat.\", stop_sequences=[\".\"]):\n    print(chunk) # 'In ...'\n```\n\n## Asynchronous Calls\n\nTGI supports asynchronous operations by passing an `AsyncInferenceClient` instead of a regular `InferenceClient`. This returns an `AsyncTGI` model instance that supports async/await patterns.\n\n### Basic Async Generation\n\n```python\nimport asyncio\nimport outlines\nimport huggingface_hub\n\nasync def generate_text():\n    # Create an async model\n    async_client = huggingface_hub.AsyncInferenceClient(\"http://localhost:11434\")\n    async_model = outlines.from_tgi(async_client)\n\n    result = await async_model(\"Write a haiku about Python.\", max_new_tokens=50)\n    print(result)\n\nasyncio.run(generate_text())\n```\n\n### Async Streaming\n\nThe async model also supports streaming with async iteration:\n\n```python\nimport asyncio\nimport outlines\nimport huggingface_hub\n\nasync def stream_text():\n    async_client = huggingface_hub.AsyncInferenceClient(\"http://localhost:11434\")\n    async_model = outlines.from_tgi(async_client)\n\n    async for chunk in async_model.stream(\"Tell me a story about a robot.\", max_new_tokens=100):\n        print(chunk, end=\"\")\n\nasyncio.run(stream_text())\n```\n\n### Concurrent Async Requests\n\nOne of the main benefits of async calls is the ability to make multiple concurrent requests:\n\n```python\nimport asyncio\nimport outlines\nimport huggingface_hub\n\nasync def generate_multiple():\n    async_client = huggingface_hub.AsyncInferenceClient(\"http://localhost:11434\")\n    async_model = outlines.from_tgi(async_client)\n\n    # Define multiple prompts\n    prompts = [\n        \"Write a tagline for a coffee shop.\",\n        \"Write a tagline for a bookstore.\",\n        \"Write a tagline for a gym.\"\n    ]\n\n    tasks = [async_model(prompt, max_new_tokens=30) for prompt in prompts]\n    results = await asyncio.gather(*tasks)\n\n    for prompt, result in zip(prompts, results):\n        print(f\"{prompt}\\n{result}\\n\")\n\nasyncio.run(generate_multiple())\n```\n\n## Structured Generation\n\nTGI supports all output types available in Outlines except for context-free grammars. Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models.\n\n### Simple Type\n\n```python\nimport outlines\nimport huggingface_hub\n\noutput_type = int\n\ntgi_client = huggingface_hub.InferenceClient(\"http://localhost:8080\")\nmodel = outlines.from_tgi(tgi_client)\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```### JSON Schema\n\n```python\nimport outlines\nimport huggingface_hub\nfrom typing import List\nfrom pydantic import BaseModel\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\ntgi_client = huggingface_hub.InferenceClient(\"http://localhost:8080\")\nmodel = outlines.from_tgi(tgi_client)\n\nresult = model(\"Create a character.\", output_type=Character, frequency_penalty=1.5)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```### Multiple Choice\n\n```python\nimport outlines\nimport huggingface_hub\nfrom typing import Literal\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\ntgi_client = huggingface_hub.InferenceClient(\"http://localhost:8080\")\nmodel = outlines.from_tgi(tgi_client)\n\nresult = model(\"What is the capital of France?\", output_type, temperature=0)\nprint(result) # 'Paris'\n```### Regex\n\n```python\nimport outlines\nimport huggingface_hub\nfrom outlines.types import Regex\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\ntgi_client = huggingface_hub.InferenceClient(\"http://localhost:8080\")\nmodel = outlines.from_tgi(tgi_client)\n\nresult = model(\"Generate a fake social security number.\", output_type, top_p=0.1)\nprint(result) # '782-32-3789'\n```\n\n### Async Structured Generation\n\nAll structured generation features work seamlessly with async models:\n\n```python\nimport asyncio\nimport outlines\nimport huggingface_hub\nfrom pydantic import BaseModel\n\nclass User(BaseModel):\n    name: str\n    email: str\n    age: int\n\nasync def generate_user():\n    async_client = huggingface_hub.AsyncInferenceClient(\"http://localhost:11434\")\n    async_model = outlines.from_tgi(async_client)\n\n    result = await async_model(\"Generate a random user profile.\", output_type=User)\n    user = User.model_validate_json(result)\n    print(f\"Name: {user.name}, Email: {user.email}, Age: {user.age}\")\n\nasyncio.run(generate_user())\n```\n\n## Inference parameters\n\nWhen calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `text_generation` method of the TGI client.\n\nCommon parameters include `max_new_tokens`, `stop_sequences`, `temperature`, `top_k`, `top_p`, and others as specified in the [TGI inference client documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient).\n"
  },
  {
    "path": "docs/features/models/transformers.md",
    "content": "---\ntitle: Transformers\n---\n\n# Transformers\n\n!!! Installation\n\n    You need to install the `transformers` library to be able to use the Transformers in Outlines. Install all optional dependencies of the `Transformers` model with: `pip install \"outlines[transformers]\"`.\n\n    See the [HuggingFace documentation](https://huggingface.co/docs/transformers/en/installation) for more information on installing `transformers` with CPU, GPU...\n\n## Model Initialization\n\nTo load the model, you can use the `from_transformers` function. It takes 3 arguments:\n\n- `model`: a `transformers` model (created with `AutoModelForCausalLM` for instance)\n- `tokenizer_or_processor`: a `transformers` tokenizer (created with `AutoTokenizer` for instance, it must be an instance of either `PreTrainedTokenizer` or `PreTrainedTokenizerFast`)\n- `device_dtype` (optional): the tensor dtype to use for inference. If not provided, the model will use the default dtype.\n\nFor instance:\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create the transformers model and tokenizer\nhf_model = AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\nhf_tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n\n# Create the Outlines model\nmodel = outlines.from_transformers(hf_model, hf_tokenizer)\n```\n\nIf you provide a processor instead of a tokenizer for the second argument of the `from_transformers` function, you would get a `TransformersMultiModal` instance. See the [TransformersMultiModal model documentation](./transformers_multimodal.md) for more information on using multimodal models in Outlines.\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create model\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Call it to generate text\nresult = model(\"What's the capital of Latvia?\", max_new_tokens=20)\nprint(result) # 'Riga'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `Transformers` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport outlines\nfrom outlines.inputs import Chat\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create the model\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": \"What's the capital of Latvia?\"},\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_new_tokens=50)\nprint(response) # 'This is a picture of a black dog.'\n```\n\n#### Batching\n\nFinally, the `Transformers` model supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions.\n\nFor instance:\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Create model\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create a list of prompts that will be used in a single batch\nprompts = [\n    \"What's the capital of Lithuania?\",\n    \"What's the capital of Latvia?\",\n    \"What's the capital of Estonia?\"\n]\n\n# Call it to generate text\nresult = model.batch(prompts, max_new_tokens=20)\nprint(result) # ['Vilnius', 'Riga', 'Tallinn']\n```\n\n## Structured Generation\n\nAs a local model, `Transformers` supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model.\n\n### Simple Type\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\noutput_type = int\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"How many countries are there in the world?\", output_type, max_new_tokens=5)\nprint(result) # '200'\n```\n\n### JSON Schema\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\nfrom pydantic import BaseModel\nfrom typing import List\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Create a character.\", output_type=Character, max_new_tokens=200, repetition_penalty=0.5)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n### Multiple Choice\n\n```python\nfrom typing import Literal\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"What is the capital of France?\", output_type, max_new_tokens=10, temperature=0)\nprint(result) # 'Paris'\n```\n\n### Regex\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\nfrom outlines.types import Regex\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Generate a fake social security number.\", output_type, max_new_tokens=20, top_p=0.5)\nprint(result) # '782-32-3789'\n```\n\n### Context-Free Grammar\n\n```python\nimport outlines\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\nfrom outlines.types import CFG\n\narithmetic_grammar = \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\noutput_type = CFG(arithmetic_grammar)\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Write an addition.\", output_type, max_new_tokens=100)\nprint(result) # '23 + 48'\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `generate` method of the `transformers` model. Some common inference arguments include `max_new_tokens`, `temperature`, `repetition_penalty` and `top_p`.\n\nSee the [transformers documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation) for more information on inference parameters.\n\n!!! Warning\n\n    The `max_new_tokens` inference parameter has a default value of 20. This is insufficient for most tasks and will result in the generation output not respecting the output type (because the response is truncated). We recommend you always provide a value for this argument.\n"
  },
  {
    "path": "docs/features/models/transformers_multimodal.md",
    "content": "---\ntitle: Transformers MultiModal\n---\n\n# Transformers MultiModal\n\nThe Outlines `TransformersMultiModal` model inherits from `Transformers` and shares most of its interface. Please start by reading the [Transformers documentation](./transformers.md) as this document only focuses on the specificities of `TransformersMultiModal` compared to `Transformers`.\n\n## Model Initialization\n\nTo load the model, you can use the `from_transformers` function. It takes 2 arguments:\n\n- `model`: a `transformers` model (created with `AutoModelForImageTextToText` for instance)\n- `tokenizer_or_processor`: a `transformers` processor (created with `AutoProcessor` for instance, it must be an instance of `ProcessorMixin`)\n- `device_dtype` (optional): the tensor dtype to use for inference. If not provided, the model will use the default dtype.\n\nFor instance:\n\n```python\nimport outlines\nfrom transformers import AutoModelForImageTextToText, AutoProcessor\n\n# Create the transformers model and processor\nhf_model = AutoModelForImageTextToText.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\")\nhf_processor = AutoProcessor.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\")\n\n# Create the Outlines model\nmodel = outlines.from_transformers(hf_model, hf_processor)\n```\n\n## Model Input\n\nAs with other multimodal models, you should provide a list containing a text prompt and assets (`Image`, `Audio` or `Video` instances) as the model input. The type of asset to provide depends on the capabilities of the `transformers` model you are running.\n\nHere's an example of using a vision multimodal model:\n\n```python\nfrom io import BytesIO\nfrom urllib.request import urlopen\n\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\nfrom transformers import (\n    LlavaForConditionalGeneration,\n    AutoProcessor,\n)\n\nimport outlines\nfrom outlines.inputs import Image\n\nTEST_MODEL = \"trl-internal-testing/tiny-LlavaForConditionalGeneration\"\nIMAGE_URL = \"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\"\n\nclass Animal(BaseModel):\n    specie: str\n    color: str\n    weight: int\n\ndef get_image_from_url(image_url):\n    img_byte_stream = BytesIO(urlopen(image_url).read())\n    image = PILImage.open(img_byte_stream).convert(\"RGB\")\n    image.format = \"PNG\"\n    return image\n\n# Create a model\nmodel = outlines.from_transformers(\n    LlavaForConditionalGeneration.from_pretrained(TEST_MODEL),\n    AutoProcessor.from_pretrained(TEST_MODEL),\n)\n\n# Call it with a model input dict containing a text prompt and an image + an output type\nresult = model(\n    [\"<image>Describe this animal.\", Image(get_image_from_url(IMAGE_URL))],\n    Animal,\n    max_new_tokens=100\n)\nprint(result) # '{\"specie\": \"cat\", \"color\": \"white\", \"weight\": 4}'\nprint(Animal.model_validate_json(result)) # specie=cat, color=white, weight=4\n```\n!!! Warning\n\n    Make sure your prompt contains the tags expected by your processor to correctly inject the assets in the prompt. For some vision multimodal models for instance, you need to add as many `<image>` tags in your prompt as there are image assets included in your model input. `Chat` method, instead, does not require this step.\n\n\n### Chat\nThe `Chat` interface offers a more convenient way to work with multimodal inputs. You don't need to manually add asset tags like `<image>`. The model's HF processor handles the chat templating and asset placement for you automatically.\nTo do so, call the model with a `Chat` instance using a multimodal chat format. Assets must be pre-processed as `outlines.inputs.{Image, Audio, Video}` format, and only `image`, `video`, and `audio` types are supported.\n\nFor instance:\n\n```python\nimport outlines\nfrom outlines.inputs import Chat, Image\nfrom transformers import AutoModelForImageTextToText, AutoProcessor\nfrom PIL import Image as PILImage\nfrom io import BytesIO\nfrom urllib.request import urlopen\nimport torch\n\nmodel_kwargs = {\n        \"torch_dtype\": torch.bfloat16,\n        \"attn_implementation\": \"flash_attention_2\",\n        \"device_map\": \"auto\",\n    }\n\ndef get_image_from_url(image_url):\n    img_byte_stream = BytesIO(urlopen(image_url).read())\n    image = PILImage.open(img_byte_stream).convert(\"RGB\")\n    image.format = \"PNG\"\n    return image\n\n# Create the model\nmodel = outlines.from_transformers(\n    AutoModelForImageTextToText.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\", **model_kwargs),\n    AutoProcessor.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\", **model_kwargs)\n)\n\nIMAGE_URL = \"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\"\n\n# Create the chat mutimodal input\nprompt = Chat([\n    {\n        \"role\": \"user\",\n        \"content\": [\n            {\"type\": \"image\", \"image\": Image(get_image_from_url(IMAGE_URL))},\n            {\"type\": \"text\", \"text\": \"Describe the image in few words.\"}\n        ],\n    }\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_new_tokens=50)\nprint(response) # 'A Siamese cat with blue eyes is sitting on a cat tree, looking alert and curious.'\n```\n\nOr using a list containing text and assets:\n\n```python\nimport outlines\nfrom outlines.inputs import Chat, Image\nfrom transformers import AutoModelForImageTextToText, AutoProcessor\nfrom PIL import Image as PILImage\nfrom io import BytesIO\nimport requests\nimport torch\n\n\nTEST_MODEL = \"Qwen/Qwen2.5-VL-7B-Instruct\"\n\n# Function to get an image\ndef get_image(url):\n    headers = {\n        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'\n    }\n    r = requests.get(url, headers=headers)\n    image = PILImage.open(BytesIO(r.content)).convert(\"RGB\")\n    image.format = \"PNG\"\n    return image\n\nmodel_kwargs = {\n        \"torch_dtype\": torch.bfloat16,\n        # \"attn_implementation\": \"flash_attention_2\",\n        \"device_map\": \"auto\",\n    }\n\n# Create a model\nmodel = outlines.from_transformers(\n    AutoModelForImageTextToText.from_pretrained(TEST_MODEL, **model_kwargs),\n    AutoProcessor.from_pretrained(TEST_MODEL, **model_kwargs),\n)\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"user\", \"content\": \"You are a helpful assistant that helps me described pictures.\"},\n    {\"role\": \"assistant\", \"content\": \"I'd be happy to help you describe pictures! Please go ahead and share an image\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe briefly the image\", Image(get_image(\"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_new_tokens=50)\nprint(response) # 'The image shows a light-colored cat with a white chest...'\n```\n\n\n### Batching\nThe `TransformersMultiModal` model supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions.\n\nAn example using the Chat format:\n\n```python\nimport outlines\nfrom outlines.inputs import Chat, Image\nfrom transformers import AutoModelForImageTextToText, AutoProcessor\nfrom PIL import Image as PILImage\nfrom io import BytesIO\nfrom urllib.request import urlopen\nimport torch\nfrom pydantic import BaseModel\n\nmodel_kwargs = {\n        \"torch_dtype\": torch.bfloat16,\n        \"attn_implementation\": \"flash_attention_2\",\n        \"device_map\": \"auto\",\n    }\n\nclass Animal(BaseModel):\n    animal: str\n    color: str\n\ndef get_image_from_url(image_url):\n    img_byte_stream = BytesIO(urlopen(image_url).read())\n    image = PILImage.open(img_byte_stream).convert(\"RGB\")\n    image.format = \"PNG\"\n    return image\n\n# Create the model\nmodel = outlines.from_transformers(\n    AutoModelForImageTextToText.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\", **model_kwargs),\n    AutoProcessor.from_pretrained(\"Qwen/Qwen2.5-VL-3B-Instruct\", **model_kwargs)\n)\n\nIMAGE_URL_1 = \"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\"\nIMAGE_URL_2 = \"https://upload.wikimedia.org/wikipedia/commons/a/af/Golden_retriever_eating_pigs_foot.jpg\"\n\n# Create the chat mutimodal messages\nmessages = [\n    {\n        \"role\": \"user\",\n        \"content\": [\n            {\"type\": \"text\", \"text\": \"Describe the image in few words.\"},\n            {\"type\": \"image\", \"image\": Image(get_image_from_url(IMAGE_URL_1))},\n        ],\n    },\n]\n\nmessages_2 = [\n    {\n        \"role\": \"user\",\n        \"content\": [\n            {\"type\": \"text\", \"text\": \"Describe the image in few words.\"},\n            {\"type\": \"image\", \"image\": Image(get_image_from_url(IMAGE_URL_2))},\n        ],\n    },\n]\n\nprompts = [Chat(messages), Chat(messages_2)]\n\n# Call the model to generate a response\nresponses = model.batch(prompts, output_type=Animal, max_new_tokens=100)\nprint(responses) # ['{ \"animal\": \"cat\", \"color\": \"white and gray\" }', '{ \"animal\": \"dog\", \"color\": \"white\" }']\nprint([Animal.model_validate_json(i) for i in responses]) # [Animal(animal='cat', color='white and gray'), Animal(animal='dog', color='white')]\n```\n\n\nAn example using a list of lists with tag assets:\n\n```python\nfrom io import BytesIO\nfrom urllib.request import urlopen\n\nfrom PIL import Image as PILImage\nfrom transformers import (\n    LlavaForConditionalGeneration,\n    AutoProcessor,\n)\n\nimport outlines\nfrom outlines.inputs import Image\n\nTEST_MODEL = \"trl-internal-testing/tiny-LlavaForConditionalGeneration\"\nIMAGE_URL = \"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\"\nIMAGE_URL_2 =\"https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg\"\n\ndef get_image_from_url(image_url):\n    img_byte_stream = BytesIO(urlopen(image_url).read())\n    image = PILImage.open(img_byte_stream).convert(\"RGB\")\n    image.format = \"PNG\"\n    return image\n\n# Create a model\nmodel = outlines.from_transformers(\n    LlavaForConditionalGeneration.from_pretrained(TEST_MODEL),\n    AutoProcessor.from_pretrained(TEST_MODEL),\n)\n\n# Call the batch method with a list of model input dicts\nresult = model.batch(\n    [\n        [\"<image>Describe the image.\", Image(get_image_from_url(IMAGE_URL))],\n        [\"<image>Describe the image.\", Image(get_image_from_url(IMAGE_URL_2))],\n    ]\n)\nprint(result) # ['The image shows a cat', 'The image shows an astronaut']\n```\n"
  },
  {
    "path": "docs/features/models/vllm.md",
    "content": "---\ntitle: vLLM\n---\n\n# vLLM\n\n## Prerequisites\n\nThe Outlines `VLLM` model is intended to be used along with a vLLM instance running on a separate server (can be local or remote). Make sure you have a vLLM server running and accessible before using the `VLLM` model. For instance by running:\n\n```shell\npip install vllm\n\nvllm serve microsoft/Phi-3-mini-4k-instruct \\\n  --dtype auto \\\n  --api-key token-abc123\n```\n\nFollow the [Installation instructions](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) for more information on how to set up a vLLM server for your particular setup.\n\nAs the vLLM client relies on the `openai` python sdk, you need to have the `openai` package installed. Install all optional dependencies for the `VLLM` model with: `pip install openai`.\n\nIf you want to use the vllm offline inference mode instead of the server mode, please refer to the [VLLMOffline](./vllm_offline.md) model documentation.\n\n## Model Initialization\n\nTo load the model, you can use the `from_vllm` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Make sure the value of the `base_url` argument of the `OpenAI` client points to your running vLLM server. Consult the [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html) on using an OpenAI client with a vLLM server for more information.\n\nBased on whether the `openai` client instance is synchronous or asynchronous, you will receive a `VLLM` or `AsyncVLLM` model instance.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the OpenAI client\nsync_openai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nasync_openai_client = openai.AsyncOpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\n\n# Create a sync model\nsync_model = outlines.from_vllm(sync_openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\nprint(type(sync_model)) # <class 'outlines.models.vllm.VLLM'>\n\n# Create an async model\nasync_model = outlines.from_vllm(async_openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\nprint(type(async_model)) # <class 'outlines.models.vllm.AsyncVLLM'>\n```\n\n## Text Generation\n\nTo generate text, you can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_vllm(openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\"), \"microsoft/Phi-3-mini-4k-instruct\")\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", max_tokens=20)\nprint(response) # 'The capital of Latvia is Riga.'\n```\n\n#### Vision\n\nSome models you can run with VLLM support vision input. To use this feature, provide a list containing a text prompt and `Image` instances.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport outlines\nimport openai\nfrom outlines.inputs import Image\n\n# Create the model\nmodel = outlines.from_vllm(\n    openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\"),\n    \"Qwen/Qwen2.5-VL-3B-Instruct\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the prompt containing the text and the image\nprompt = [\n    \"Describe the image\",\n    Image(get_image(\"https://picsum.photos/id/237/400/300\"))\n]\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'The image shows a black puppy lying on a wooden surface...'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `VLLM` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport io\nimport requests\nimport PIL\nimport openai\nimport outlines\nfrom outlines.inputs import Chat, Image\n\n# Create the model\nmodel = outlines.from_vllm(\n    openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\"),\n    \"Qwen/Qwen2.5-VL-3B-Instruct\"\n)\n\n# Function to get an image\ndef get_image(url):\n    r = requests.get(url)\n    return PIL.Image.open(io.BytesIO(r.content))\n\n# Create the chat input\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\n        \"role\": \"user\",\n        \"content\": [\"Describe the image\", Image(get_image(\"https://picsum.photos/id/237/400/300\"))]\n    },\n])\n\n# Call the model to generate a response\nresponse = model(prompt, max_tokens=50)\nprint(response) # 'The image shows a black puppy lying on a wooden surface...'\n```\n\n#### Streaming\n\nFinally, the `VLLM` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport openai\nimport outlines\n\n# Create the model\nmodel = outlines.from_vllm(\n    openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\"),\n    \"microsoft/Phi-3-mini-4k-instruct\"\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", max_tokens=50):\n    print(chunk, end=\"\") # 'Once upon a time...'\nprint()\n```\n\n## Asynchronous Calls\n\nvLLM supports asynchronous operations by passing an `AsyncOpenAI` client instead of a regular `OpenAI` client. This returns an `AsyncVLLM` model instance that supports async/await patterns.\n\n### Basic Async Generation\n\n```python\nimport asyncio\nimport openai\nimport outlines\n\nasync def generate_text():\n    async_client = openai.AsyncOpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\n    async_model = outlines.from_vllm(async_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\n    result = await async_model(\"Write a haiku about Python.\", max_tokens=50)\n    print(result)\n\nasyncio.run(generate_text())\n```\n\n### Async Streaming\n\nThe async model also supports streaming with async iteration:\n\n```python\nimport asyncio\nimport openai\nimport outlines\n\nasync def stream_text():\n    async_client = openai.AsyncOpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\n    async_model = outlines.from_vllm(async_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\n    async for chunk in async_model.stream(\"Tell me a story about a robot.\", max_tokens=100):\n        print(chunk, end=\"\")\n\nasyncio.run(stream_text())\n```\n\n### Concurrent Async Requests\n\nOne of the main benefits of async calls is the ability to make multiple concurrent requests:\n\n```python\nimport asyncio\nimport openai\nimport outlines\n\nasync def generate_multiple():\n    async_client = openai.AsyncOpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\n    async_model = outlines.from_vllm(async_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\n    prompts = [\n        \"Write a tagline for a coffee shop.\",\n        \"Write a tagline for a bookstore.\",\n        \"Write a tagline for a gym.\"\n    ]\n\n    tasks = [async_model(prompt, max_tokens=30) for prompt in prompts]\n    results = await asyncio.gather(*tasks)\n\n    for prompt, result in zip(prompts, results):\n        print(f\"{prompt}\\n{result}\\n\")\n\nasyncio.run(generate_multiple())\n```\n\n## Structured Generation\n\nvLLM supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models.\n\n### Simple Type\n\n```python\nimport openai\nimport outlines\n\noutput_type = int\n\nopenai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nmodel = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```\n\n### JSON Schema\n\n```python\nimport openai\nimport outlines\nfrom typing import List\nfrom pydantic import BaseModel\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nopenai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nmodel = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\nresult = model(\"Create a character.\", output_type=Character, frequency_penalty=1.5)\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n### Multiple Choice\n\n```python\nfrom typing import Literal\nimport openai\nimport outlines\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nopenai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nmodel = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\nresult = model(\"What is the capital of France?\", output_type, temperature=0)\nprint(result) # 'Paris'\n```\n\n### Regex\n\n```python\nimport openai\nimport outlines\nfrom outlines.types import Regex\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nopenai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nmodel = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\nresult = model(\"Generate a fake social security number.\", output_type, top_p=0.1)\nprint(result) # '782-32-3789'\n```\n\n### Context-Free Grammar\n\n```python\nimport openai\nimport outlines\nfrom outlines.types import CFG\n\narithmetic_grammar = \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\noutput_type = CFG(arithmetic_grammar)\n\nopenai_client = openai.OpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\nmodel = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\nresult = model(\"Write an addition.\", output_type, extra_body={\"guided_decoding_backend\": \"outlines\"})\nprint(result) # '23 + 48'\n```\n\n### Async Structured Generation\n\nAll structured generation features work seamlessly with async models:\n\n```python\nimport asyncio\nimport openai\nimport outlines\nfrom pydantic import BaseModel\n\nclass User(BaseModel):\n    name: str\n    email: str\n    age: int\n\nasync def generate_user():\n    async_client = openai.AsyncOpenAI(base_url=\"http://0.0.0.0:8000/v1\", api_key=\"token-abc123\")\n    async_model = outlines.from_vllm(async_client, \"microsoft/Phi-3-mini-4k-instruct\")\n\n    result = await async_model(\"Generate a random user profile.\", output_type=User)\n    user = User.model_validate_json(result)\n    print(f\"Name: {user.name}, Email: {user.email}, Age: {user.age}\")\n\nasyncio.run(generate_user())\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `chat.completions.create` method of the OpenAI client.\n\nAn optional parameter of particular interest is `extra_body`, which is a dictionary containing arguments that are specific to vLLM and are not part of the standard `openai` interface. Among those, `guided_decoding_backend` allows you to select the library used by the vLLM server to control structured generation. You can use the value `outlines` to generated structured text with Outlines.\n\nSee the [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters) on extra parameters for the OpenAI-compatible server for more information on inference parameters.\n"
  },
  {
    "path": "docs/features/models/vllm_offline.md",
    "content": "---\ntitle: vLLM Offline\n---\n\n# vLLM Offline\n\nOutlines provides an integration with [vLLM](https://docs.vllm.ai/en/latest/) using the [vllm library](https://github.com/vllm-project/vllm). This model allows you to use vLLM in the \"Offline Inference\" mode, meaning that text generation happens within the model, there is no separate server. If you want to use vLLM with a server, see the [VLLM model documentation](./vllm.md).\n\n!!! Note \"Installation\"\n\n    You need to install the `vllm` library to be able to use the `VLLMOffline` model: `pip install vllm`. Due to a library version conflict between outlines and vllm, you MUST install `vllm` before installing `outlines`.\n\n    When installing `outlines` (after having first installed `vllm`), you may encounter the following error: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed`. You can safely ignore it.\n\n    See the [vLLM documentation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) for instructions on how to install vLLM for CPU, ROCm...\n\n## Model Initialization\n\nTo load the model, you can use the `from_vllm_offline` function. The single argument of the function is a `LLM` model instance from the `vllm` library. You will then receive a `VLLMOffline` model instance you can use to generate text.\n\nConsult the [LLM class API reference](https://docs.vllm.ai/en/latest/api/vllm/index.html#vllm.LLM) for detailed information on how to create an `LLM` instance and on the various available parameters.\n\nFor instance:\n\n```python\nimport outlines\nfrom vllm import LLM\n\n# Create the model\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n```\n\n!!! Note\n\n    When initializing the `vllm.LLM` object, you can specify a `guided_decoding_backend` to choose what library will be used by vLLM to constrain the generation. Consult the [vLLM documentation](https://docs.vllm.ai/en/v0.8.2/features/structured_outputs.html) on structured output for the list of possible values.\n\n## Text Generation\n\nOnce you've created your Outlines `VLLMOffline` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt.\n\nFor instance:\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\n\n# Create the model\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Call it to generate text\nresponse = model(\"What's the capital of Latvia?\", sampling_params=SamplingParams(max_tokens=20))\nprint(response) # 'Riga'\n```\n\n#### Chat\n\nYou can also use chat inputs with the `VLLMOffline` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above.\n\nFor instance:\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\nfrom outlines.inputs import Chat\n\n# Create the model\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create the chat prompt\nprompt = Chat([\n    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    {\"role\": \"user\", \"content\": \"What's the capital of Latvia?\"},\n])\n\n# Call the model to generate a response\nresponse = model(prompt, sampling_params=SamplingParams(max_tokens=50))\nprint(response) # 'Riga'\n```\n\n#### Streaming\n\nThe `VLLMOffline` model supports streaming through the `stream` method.\n\nFor instance:\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\n\n# Create the model\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Stream the response\nfor chunk in model.stream(\"Tell me a short story about a cat.\", sampling_params=SamplingParams(max_tokens=50)):\n    print(chunk) # 'Once...'\n```\n\n#### Batching\n\nFinally, the `VLLMOffline` model also supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions.\n\nFor instance:\n\n```python\nimport outlines\nfrom vllm import LLM\n\n# Create the model\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create a list of prompts that will be used in a single batch\nprompts = [\n    \"What's the capital of Lithuania?\",\n    \"What's the capital of Latvia?\",\n    \"What's the capital of Estonia?\"\n]\n\n# Call it to generate text\nresult = model.batch(prompts, max_new_tokens=20)\nprint(result) # ['Vilnius', 'Riga', 'Tallinn']\n```\n\n## Structured Generation\n\nThe `VLLMOffline` model supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model.\n\n### Simple Type\n\n```python\nimport outlines\nfrom vllm import LLM\n\noutput_type = int\n\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"How many countries are there in the world?\", output_type)\nprint(result) # '200'\n```\n\n### JSON Schema\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\nfrom typing import List\nfrom pydantic import BaseModel\n\nclass Character(BaseModel):\n    name: str\n    age: int\n    skills: List[str]\n\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Create a character.\", output_type=Character, sampling_params=SamplingParams(frequency_penalty=1.5, max_tokens=200))\nprint(result) # '{\"name\": \"Evelyn\", \"age\": 34, \"skills\": [\"archery\", \"stealth\", \"alchemy\"]}'\nprint(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']\n```\n\n### Multiple Choice\n\n```python\nfrom typing import Literal\nimport outlines\nfrom vllm import LLM, SamplingParams\n\noutput_type = Literal[\"Paris\", \"London\", \"Rome\", \"Berlin\"]\n\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"What is the capital of France?\", output_type, sampling_params=SamplingParams(temperature=0))\nprint(result) # 'Paris'\n```\n\n### Regex\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\nfrom outlines.types import Regex\n\noutput_type = Regex(r\"\\d{3}-\\d{2}-\\d{4}\")\n\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Generate a fake social security number.\", output_type, sampling_params=SamplingParams(top_p=0.1))\nprint(result) # '782-32-3789'\n```\n\n### Context-Free Grammar\n\n```python\nimport outlines\nfrom vllm import LLM, SamplingParams\nfrom outlines.types import CFG\n\narithmetic_grammar = \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\noutput_type = CFG(arithmetic_grammar)\n\nmodel = outlines.from_vllm_offline(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\nresult = model(\"Write an addition.\", output_type)\nprint(result) # '23 + 48'\n```\n\n## Inference Arguments\n\nWhen calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `generate` method of the `LLM` model instance. An argument of particular interest is `sampling_params`. It takes as a value a `vllm.SamplingParams` instance containing parameters such as max_tokens or temperature.\n\nSee the [vLLM documentation](https://docs.vllm.ai/en/latest/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams) on sampling parameters for more information on inference parameters.\n"
  },
  {
    "path": "docs/features/utility/application.md",
    "content": "---\ntitle: Application\n---\n\n# Application\n\nThe `Application` class enables you to encapsulate a prompt template and an output type into a reusable component.\n\n## Overview\n\nAn `Application` combines a prompt template with an output type, creating a reusable component that can be applied to different models.\n\nApplications are useful for simplifying repeated tasks where you have a well-defined `Template` and a fixed output type, such as classification tasks or data extraction.\n\nTo create an `Application` instance, initialize the class with a prompt template and an output type. You can then call the application with a model and the variables defined in your template in a dictionary.\n\nFor instance:\n\n```python\nfrom typing import Literal\nimport transformers\nfrom outlines import Application, Template, from_transformers\n\n# Create a template\ntemplate_str = \"Is {{ name }} a boy or a girl name?\"\ntemplate = Template.from_string(template_str)\n\n# Create a model\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    transformers.AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create the application and call it to generate text\napplication = Application(template, Literal[\"boy\", \"girl\"])\nresponse = application(model, {\"name\": \"Alice\"}, max_new_tokens=10)\n\nprint(response) # \"girl\"\n```\n\nInstead of providing an Outlines `Template` instance, you can provide a `Callable` that returns a string. The parameters of the callable are used as the variables of the template such that you must provide values for them in the dictionary when calling the application.\n\nFor instance, we can create the same example as above using a a function instead of a template:\n\n```python\nfrom typing import Literal\nimport transformers\nfrom outlines import Application, from_transformers\n\n# Create a function that will be used as a template\ndef template_func(name: str) -> str:\n    return f\"Is {name} a boy or a girl name?\"\n\n# Create a model\nmodel = from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    transformers.AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\n# Create the application with the function template and call it to generate text\napplication = Application(template_func, Literal[\"boy\", \"girl\"])\nresponse = application(model, {\"name\": \"Alice\"}, max_new_tokens=10)\n\nprint(response) # \"girl\"\n```\n"
  },
  {
    "path": "docs/features/utility/regex_dsl.md",
    "content": "---\ntitle: Regex DSL\n---\n\n# Regex DSL\n\nThis library provides a Domain-Specific Language (DSL) to construct regular expressions in a more intuitive and modular way. It allows you to create complex regexes using simple building blocks that represent literal strings, patterns, and various quantifiers. Additionally, these custom regex types can be used directly as types in [Pydantic](https://pydantic-docs.helpmanual.io/) schemas to enforce pattern constraints during text generation.\n\n---\n\n## Why Use This DSL?\n\n1. **Modularity & Readability**: Instead of writing cryptic regular expression strings, you compose a regex as a tree of objects.\n2. **Enhanced Debugging**: Each expression can be visualized as an ASCII tree, making it easier to understand and debug complex regexes.\n3. **Pydantic Integration**: Use your DSL-defined regex as types in Pydantic models. The DSL seamlessly converts to JSON Schema with proper pattern constraints.\n4. **Extensibility**: Easily add or modify quantifiers and other regex components by extending the provided classes.\n\n---\n\n## Building Blocks\n\n\nEvery regex component in this DSL is a **Term**. Here are two primary types:\n\n- **`String`**: Represents a literal string. It escapes the characters that have a special meaning in regular expressions.\n- **`Regex`**: Represents an existing regex pattern string.\n\n```python\nfrom outlines.types import String, Regex\n\n# A literal string \"hello\"\nliteral = String(\"hello\")   # Internally represents \"hello\"\n\n# A regex pattern to match one or more digits\ndigit = Regex(r\"[0-9]+\")     # Internally represents the pattern [0-9]+\n\n# Converting to standard regex strings:\nfrom outlines.types.dsl import to_regex\n\nprint(to_regex(literal))  # Output: hello\nprint(to_regex(digit))    # Output: [0-9]+\n```\n\n---\n\n## Early Introduction to Quantifiers & Combining Terms\n\nThe DSL supports common regex quantifiers as methods on every `Term`. These methods allow you to specify how many times a pattern should be matched. They include:\n\n- **`exactly(count)`**: Matches the term exactly `count` times.\n- **`optional()`**: Matches the term zero or one time.\n- **`one_or_more()`**: Matches the term one or more times (Kleene Plus).\n- **`zero_or_more()`**: Matches the term zero or more times (Kleene Star).\n- **`between(min_count, max_count)`**: Matches the term between `min_count` and `max_count` times (inclusive).\n- **`at_least(count)`**: Matches the term at least `count` times.\n- **`at_most(count)`**: Matches the term up to `count` times.\n\nThese quantifiers can also be used as functions that take the `Term` as an argument. If the term is a plain string, it will be automatically converted to a `String` object. Thus `String(\"foo\").optional()` is equivalent to `optional(\"foo\")`.\n\nLet's see these quantifiers side by side with examples.\n\n### Quantifiers in Action\n\n#### `exactly(count)`\n\nThis method restricts the term to appear exactly `count` times.\n\n```python\n# Example: exactly 5 digits\nfive_digits = Regex(r\"\\d\").exactly(5)\nprint(to_regex(five_digits))  # Output: (\\d){5}\n```\n\nYou can also use the `exactly` function:\n\n```python\nfrom outlines.types import exactly\n\n# Example: exactly 5 digits\nfive_digits = exactly(Regex(r\"\\d\"), 5)\nprint(to_regex(five_digits))  # Output: (\\d){5}\n```\n\n#### `optional()`\n\nThis method makes a term optional, meaning it may occur zero or one time.\n\n```python\n# Example: an optional \"s\" at the end of a word\nmaybe_s = String(\"s\").optional()\nprint(to_regex(maybe_s))  # Output: (s)?\n```\n\nYou can also use the `optional` function:\n\n```python\nfrom outlines.types import optional\n\n# Example: an optional \"s\" at the end of a word\nmaybe_s = optional(\"s\")\nprint(to_regex(maybe_s))  # Output: (s)?\n```\n\n#### `one_or_more()`\n\nThis method indicates that the term must appear at least once.\n\n```python\n# Example: one or more alphabetic characters\nletters = Regex(r\"[A-Za-z]\").one_or_more()\nprint(to_regex(letters))  # Output: ([A-Za-z])+\n```\n\nYou can also use the `one_or_more` function:\n\n```python\nfrom outlines.types import one_or_more\n\n# Example: one or more alphabetic characters\nletters = one_or_more(Regex(r\"[A-Za-z]\"))\nprint(to_regex(letters))  # Output: ([A-Za-z])+\n\n```\n\n#### `zero_or_more()`\n\nThis method indicates that the term can occur zero or more times.\n\n```python\n# Example: zero or more spaces\nspaces = String(\" \").zero_or_more()\nprint(to_regex(spaces))  # Output: ( )*\n```\n\nYou can also use the `zero_or_more` function:\n\n```python\nfrom outlines.types import zero_or_more\n\n# Example: zero or more spaces\nspaces = zero_or_more(\" \")\nprint(to_regex(spaces))  # Output: ( )*\n```\n\n#### `between(min_count, max_count)`\n\nThis method indicates that the term can appear any number of times between `min_count` and `max_count` (inclusive).\n\n```python\n# Example: Between 2 and 4 word characters\nword_chars = Regex(r\"\\w\").between(2, 4)\nprint(to_regex(word_chars))  # Output: (\\w){2,4}\n```\n\nYou can also use the `between` function:\n\n```python\nfrom outlines.types import between\n\n# Example: Between 2 and 4 word characters\nword_chars = between(Regex(r\"\\w\"), 2, 4)\nprint(to_regex(word_chars))  # Output: (\\w){2,4}\n```\n\n#### `at_least(count)`\n\nThis method indicates that the term must appear at least `count` times.\n\n```python\n# Example: At least 3 digits\nat_least_three = Regex(r\"\\d\").at_least(3)\nprint(to_regex(at_least_three))  # Output: (\\d){3,}\n```\n\nYou can also use the `at_least` function:\n\n```python\nfrom outlines.types import at_least\n\n# Example: At least 3 digits\nat_least_three = at_least(Regex(r\"\\d\"), 3)\nprint(to_regex(at_least_three))  # Output: (\\d){3,}\n```\n\n#### `at_most(count)`\n\nThis method indicates that the term can appear at most `count` times.\n\n```python\n# Example: At most 3 digits\nup_to_three = Regex(r\"\\d\").at_most(3)\nprint(to_regex(up_to_three))  # Output: (\\d){0,3}\n```\n\nYou can also use the `at_most` function:\n\n```python\nfrom outlines.types import at_most\n\n# Example: At most 3 digits\nup_to_three = at_most(Regex(r\"\\d\"), 3)\nprint(to_regex(up_to_three))  # Output: (\\d){0,3}\n```\n\n---\n\n## Combining Terms\n\nThe DSL allows you to combine basic terms into more complex patterns using concatenation and alternation.\n\n### Concatenation (`+`)\n\nThe `+` operator (and its reflected variant) concatenates terms, meaning that the terms are matched in sequence.\n\n```python\n# Example: Match \"hello world\"\npattern = String(\"hello\") + \" \" + Regex(r\"\\w+\")\nprint(to_regex(pattern))  # Output: hello\\ (\\w+)\n```\n\n### Alternation (`either()`)\n\nThe `either()` function creates alternatives, allowing a match for one of several patterns. You can provide as many terms as you want.\n\n```python\n# Example: Match either \"cat\" or \"dog\" or \"mouse\"\nanimal = either(String(\"cat\"), \"dog\", \"mouse\")\nprint(to_regex(animal))  # Output: (cat|dog|mouse)\n```\n\n*Note:* When using `either()` with plain strings (such as `\"dog\"`), the DSL automatically wraps them in a `String` object that escapes the characters that have a special meaning in regular expressions, just like with quantifier functions.\n\n---\n\n## Custom types\n\nThe DSL comes \"batteries included\" with types that represent common text constructs:\n\n- `integer` represents an integer number as recognized by `int`\n- `boolean` represents a boolean, \"True\" or \"False\" as recognized by `bool`\n- `number` represents a floating-point number recognize by Python's `float`\n- `date` represents a date as understood by `datetime.date`\n- `time` represents a time as understood by `datetime.time`\n- `datetime` represents a time as understood by `datetime.datetime`\n- `digit` represents a single digit\n- `char` represents a single character\n- `newline` represents a new line character\n- `whitespace` represents a white space\n- `hex_str` represents a hexadecimal string, optionally prefixed with \"0x\"\n- `uuid4` represents a UUID version 4 string in the format \"xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx\"\n- `ipv4` represents an IPv4 address in the format \"xxx.xxx.xxx.xxx\" where each octet is between 0 and 255\n- `sentence` represents a sentence\n- `paragraph` represents a paragraph (one or more sentences separated by one or more line breaks)\n\nFor instance you can describe the answers in the GSM8K dataset using the following pattern:\n\n```python\nfrom outlines.types import sentence, digit\n\nanswer = \"A: \" + sentence.between(2,4) + \" So the answer is: \" + digit.between(1,4)\n```\n\n---\n\n## Practical Examples\n\n### Example 1: Matching a Custom ID Format\n\nSuppose you want to create a regex that matches an ID format like \"ID-12345\", where:\n\n- The literal \"ID-\" must be at the start.\n- Followed by exactly 5 digits.\n\n```python\nid_pattern = \"ID-\" + Regex(r\"\\d\").exactly(5)\nprint(to_regex(id_pattern))  # Output: ID-(\\d){5}\n```\n\n### Example 2: Email Validation with Pydantic\n\nYou can define a regex for email validation and use it as a type in a Pydantic model.\n\n```python\nfrom pydantic import BaseModel, ValidationError\n\n# Define an email regex term (this is a simplified version)\nemail_regex = Regex(r\"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+\")\n\nclass User(BaseModel):\n    name: str\n    email: email_regex  # Use our DSL regex as a field type\n\n# Valid input\nuser = User(name=\"Alice\", email=\"alice@example.com\")\nprint(user)\n\n# Invalid input (raises a ValidationError)\ntry:\n    User(name=\"Bob\", email=\"not-an-email\")\nexcept ValidationError as e:\n    print(e)\n```\n\nWhen used in a Pydantic model, the email field is automatically validated against the regex pattern and its JSON Schema includes the `pattern` constraint.\n\n### Example 3: Building a Complex Pattern\n\nConsider a pattern to match a simple date format: `YYYY-MM-DD`.\n\n```python\nyear = Regex(r\"\\d\").exactly(4)         # Four digits for the year\nmonth = Regex(r\"\\d\").exactly(2)        # Two digits for the month\nday = Regex(r\"\\d\").exactly(2)          # Two digits for the day\n\n# Combine with literal hyphens\ndate_pattern = year + \"-\" + month + \"-\" + day\nprint(to_regex(date_pattern))\n# Output: (\\d){4}\\-(\\d){2}\\-(\\d){2}\n```\n\n---\n\n## Visualizing Your Pattern\n\nOne of the unique features of this DSL is that each term can print its underlying structure as an ASCII tree. This visualization can be particularly helpful when dealing with complex expressions.\n\n```python\n# A composite pattern using concatenation and quantifiers\npattern = \"a\" + String(\"b\").one_or_more() + \"c\"\nprint(pattern)\n```\n\n*Expected Output:*\n\n```ascii\n└── Sequence\n    ├── String('a')\n    ├── KleenePlus(+)\n    │   └── String('b')\n    └── String('c')\n```\n\nThis tree representation makes it easy to see the hierarchy and order of operations in your regular expression.\n\n---\n\n## Final Words\n\nThis DSL is designed to simplify the creation and management of regular expressions—whether you're validating inputs in a web API, constraining the output of an LLM, or just experimenting with regex patterns. With intuitive methods for common quantifiers and operators, clear visual feedback, and built-in integration with Pydantic, you can build robust and maintainable regex-based validations with ease.\n\nFeel free to explore the library further and adapt the examples to your use cases. Happy regexing!\n"
  },
  {
    "path": "docs/features/utility/template.md",
    "content": "---\ntitle: Template\n---\n\n# Template\n\nOutlines templates provide a way of creating reusable prompt structures with placeholders for dynamic content.\n\n## Overview\n\nTo create a `Template` instance, you can use two class methods:\n- `from_string`: Creates a template from a string containing a Jinja2 template\n- `from_file`: Creates a template from a file containing a Jinja2 template\n\nAfter creating a template, you can call it with the variables required by the template as keyword arguments.\n\nFor instance:\n\n```python\nfrom outlines import Template\n\n# Create a template from a string\ntemplate_str = \"\"\"\nHello, {{ name }}!\nThe weather today is {{ weather }}.\n\"\"\"\ntemplate = Template.from_string(template_str)\n\n# Create a template from a file, assuming the content of template_str is put into a file\ntemplate = Template.from_file(\"path_to/my_file.txt\")\n\n# Call the template to render the prompt\nprompt: str = template(name=\"Alice\", weather=\"sunny\")\nprint(prompt)  # \"Hello, Alice!\\nThe weather today is sunny.\"\n```\n\n## Composite Templates\n\nTemplates can be nested and composed to create complex prompt structures:\n\n```python\nfrom outlines import Template\n\n# Create component templates\nuser_template = Template.from_string(\"User: {{ query }}\")\nsystem_template = Template.from_string(\"System: {{ instruction }}\")\n\n# Create a composite template\nchat_template = Template.from_string(\"\"\"\n{{ system }}\n{{ user }}\n\"\"\")\n\n# Fill in nested templates\nprompt = chat_template(\n    system=system_template(instruction=\"You are a helpful assistant.\"),\n    user=user_template(query=\"What is machine learning?\")\n)\n\nprint(prompt)\n# System: You are a helpful assistant.\n#\n# User: What is machine learning?\n```\n\n## Custom Filters\n\nYou can add custom filters to your Outlines template to extend the templating functionality. To do so, provide as second argument a dictionary with filter names as keys and filter functions as values. The filter can then be used in your jinja2 template following the regular syntax. When rendering a prompt, the function will be applied to the associated variable.\n\nFor instance:\n\n```python\nfrom outlines import Template\n\ndef uppercase(text: str) -> str:\n    return text.upper()\n\n# Add custom filter when creating template\ntemplate = Template.from_string(\n    \"Hello {{ name | uppercase }}!\",\n    filters={\"uppercase\": uppercase}\n)\nprompt = template(name=\"alice\")\nprint(prompt)  # \"Hello ALICE!\"\n```\n"
  },
  {
    "path": "docs/guide/architecture.md",
    "content": "# Architecture Overview\n\nThis guide explains how Outlines is organized so you can navigate the codebase, debug issues, and extend the library.\n\n## How Structured Generation Works\n\nWhen you ask an LLM to output JSON or follow a specific format, traditional approaches generate text freely and hope it matches. Outlines takes a different approach: it constrains the model at generation time by masking invalid tokens, making it impossible for the model to produce invalid output.\n\n## Core Abstractions\n\nOutlines has three main abstractions: **Model**, **Generator**, and **Type System**.\n\n### Model and ModelTypeAdapter\n\nThe `Model` class (`outlines/models/base.py`) is the abstract base class for all LLM integrations. There are two categories based on how structured generation is implemented:\n\n**Steerable models** (`SteerableModel`): Models where Outlines directly applies a logits processor during generation. This includes:\n- `LlamaCpp` - llama.cpp bindings\n- `MLXLM` - Apple MLX models\n- `Transformers` - HuggingFace Transformers\n\n**Black-box models** (`BlackBoxModel`): Models where Outlines delegates structured generation to the provider's API rather than applying logits processors directly. This includes:\n- `OpenAI`, `Anthropic`, `Gemini`, `Mistral` - Cloud API providers\n- `VLLM`, `VLLMOffline`, `SGLang`, `TGI`, `Ollama` - Inference servers with built-in structured generation\n- `Dottxt` - Dottxt API\n\nNote: Some black-box models (like vLLM or Ollama) could technically expose logits, but they implement structured generation server-side, so Outlines delegates to their APIs instead of building processors locally.\n\n**The Model interface:**\n\nEvery model subclass must implement these methods:\n\n| Method | Purpose |\n|--------|---------|\n| `generate(model_input, output_type, **kwargs)` | Generate a single response (internal, receives logits processor or output type) |\n| `generate_batch(model_input, output_type, **kwargs)` | Generate responses for multiple prompts |\n| `generate_stream(model_input, output_type, **kwargs)` | Stream a response token by token |\n\nThe base `Model` class provides these convenience methods that create a `Generator` internally:\n\n| Method | Purpose |\n|--------|---------|\n| `__call__(model_input, output_type, backend, **kwargs)` | Generate a single response |\n| `batch(model_input, output_type, backend, **kwargs)` | Generate batch responses |\n| `stream(model_input, output_type, backend, **kwargs)` | Stream a response |\n\n**ModelTypeAdapter - Bridging formats:**\n\nEach model has a `type_adapter` attribute that handles format conversion between Outlines and the specific model provider:\n\n```python\nclass ModelTypeAdapter(ABC):\n    @abstractmethod\n    def format_input(self, model_input) -> Any:\n        \"\"\"Convert user input to model-specific format.\n\n        For API models: creates the `messages` argument\n        For local models: may apply chat templates, convert str to list, etc.\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def format_output_type(self, output_type) -> Any:\n        \"\"\"Convert output type to model-specific format.\n\n        For black-box models: creates `response_format` argument\n        For steerable models: formats the logits processor for the model\n        \"\"\"\n        ...\n```\n\n### Generator - Unifying the Generation Interface\n\nThe `Generator` (`outlines/generator.py`) is a factory function that returns the appropriate generator class based on the model type.\n\n**Why Generator exists:**\n\nWithout Generator, users would need different code for different model types:\n\n```python\n# Without Generator - user needs to know model internals\nif isinstance(model, SteerableModel):\n    processor = build_logits_processor(output_type)\n    result = model.generate(prompt, processor)\nelse:\n    result = model.generate(prompt, output_type)\n```\n\nWith Generator, the complexity is hidden:\n\n```python\n# With Generator - same code works for any model\ngenerator = Generator(model, output_type)\nresult = generator(prompt)\n```\n\n**Generator classes:**\n\n| Class | Used For | How It Works |\n|-------|----------|--------------|\n| `SteerableGenerator` | Local models (`LlamaCpp`, `MLXLM`, `Transformers`) | Builds and caches a logits processor from the output type, resets and passes it to the model on each call |\n| `BlackBoxGenerator` | Sync API models | Passes output type directly to model's generate method |\n| `AsyncBlackBoxGenerator` | Async API models | Async version of BlackBoxGenerator |\n\n**SteerableGenerator internals:**\n\nWhen you create a `SteerableGenerator` with an output type, it:\n\n1. Converts the Python type to a `Term` using `python_types_to_terms()`\n2. Based on the Term type, builds the appropriate logits processor:\n   - `CFG` → calls `get_cfg_logits_processor()`\n   - `JsonSchema` → calls `get_json_schema_logits_processor()`\n   - Other terms → converts to regex via `to_regex()`, then calls `get_regex_logits_processor()`\n3. Caches the processor for reuse\n4. On each call, resets processor state and passes it to the model\n\n### Type System - From Python Types to Constraints\n\nThe type system (`outlines/types/dsl.py`) converts Python types into constraints that can be enforced during generation.\n\n**The conversion pipeline:**\n\n```\nPython Type → Term (via python_types_to_terms)\n                    ↓\n            ┌───────┴───────┐\n            ↓               ↓\n    CFG or JsonSchema    Other Terms\n            ↓               ↓\n    Direct to backend   to_regex() → Regex string\n            ↓               ↓\n            └───────┬───────┘\n                    ↓\n            Logits Processor (via backend)\n```\n\n**Term classes:**\n\n`Term` is the base class for Outlines' constraint DSL. Key subclasses:\n\n| Term | Purpose | Example |\n|------|---------|---------|\n| `Regex` | Match a regex pattern | `Regex(\"[0-9]+\")` |\n| `JsonSchema` | Match valid JSON for a schema | `JsonSchema(MyPydanticModel)` |\n| `CFG` | Match a context-free grammar | `CFG(grammar_string)` |\n| `String` | Match a literal string | `String(\"hello\")` |\n| `Sequence` | Concatenate terms | `String(\"[\") + item + String(\"]\")` |\n| `Alternatives` | Match any of several terms | `term1 \\| term2` |\n| `KleeneStar` | Zero or more repetitions | `zero_or_more(term)` |\n| `KleenePlus` | One or more repetitions | `one_or_more(term)` |\n| `Optional` | Zero or one occurrence | `optional(term)` |\n\n**python_types_to_terms:**\n\nThis function converts Python types to Term instances:\n\n```python\ndef python_types_to_terms(ptype) -> Term:\n    # Already a Term - return as-is\n    if isinstance(ptype, Term):\n        return ptype\n\n    # Basic types - return predefined regex patterns\n    if is_int(ptype):\n        return types.integer\n    if is_float(ptype):\n        return types.number\n    if is_str(ptype):\n        return types.string\n    if is_bool(ptype):\n        return types.boolean\n\n    # Structured types - convert to JsonSchema\n    if is_pydantic_model(ptype) or is_dataclass(ptype) or is_typed_dict(ptype):\n        return JsonSchema(ptype)\n\n    # Enum - create alternatives from members\n    if is_enum(ptype):\n        return Alternatives([...])\n\n    # Union, Literal, List, Tuple, Dict - handle recursively\n    ...\n```\n\n## Data Flow\n\nHere's how a structured generation request flows through the system:\n\n```\n1. User calls: model(\"What is 2+2?\", int)\n\n2. Model.__call__ creates Generator:\n   Generator(model, int)\n\n3. Generator factory checks model type:\n   - SteerableModel → SteerableGenerator\n   - BlackBoxModel → BlackBoxGenerator\n\n4. For SteerableGenerator:\n   a. python_types_to_terms(int) → Regex(\"-?[0-9]+\")\n   b. to_regex(term) → regex string\n   c. get_regex_logits_processor(backend, model, regex) → LogitsProcessor\n\n5. Generator.__call__(prompt):\n   a. processor.reset()  # Reset state for new generation\n   b. model.generate(prompt, processor)\n\n6. During generation (steerable models only):\n   - Model computes logits for all tokens\n   - LogitsProcessor masks invalid tokens (set to -inf)\n   - Model samples from remaining valid tokens\n\n7. Result returned to user\n```\n\n## File Organization\n\n```\noutlines/\n├── __init__.py              # Public API exports\n├── generator.py             # Generator factory and classes\n├── models/\n│   ├── base.py              # Model, AsyncModel, ModelTypeAdapter base classes\n│   ├── transformers.py      # HuggingFace Transformers\n│   ├── llamacpp.py          # llama.cpp bindings\n│   ├── mlxlm.py             # Apple MLX models\n│   ├── openai.py            # OpenAI API\n│   ├── anthropic.py         # Anthropic API\n│   ├── vllm.py              # vLLM server\n│   ├── vllm_offline.py      # vLLM offline mode\n│   └── ...                  # Other providers\n├── types/\n│   ├── __init__.py          # Predefined types: integer, number, date, etc.\n│   ├── dsl.py               # Term classes, python_types_to_terms, to_regex\n│   └── utils.py             # Type checking utilities\n├── backends/\n│   ├── __init__.py          # get_*_logits_processor functions\n│   ├── base.py              # LogitsProcessorType protocol\n│   ├── outlines_core.py     # Default backend using outlines-core\n│   ├── llguidance.py        # Microsoft llguidance backend\n│   └── xgrammar.py          # xgrammar backend\n├── processors/\n│   ├── base_logits_processor.py  # Base processor implementation\n│   └── tensor_adapters/     # Tensor library adapters\n├── grammars/                # Predefined grammar files\n└── templates.py             # Prompt template utilities\n```\n\n## Backends\n\nBackends are responsible for converting constraints (regex, JSON schema, CFG) into logits processors that can be applied during generation. They only apply to steerable models.\n\n**Available backends:**\n\n| Backend | Default For | Description |\n|---------|-------------|-------------|\n| `outlines_core` | Regex, JSON Schema | The default backend, built on the `outlines-core` Rust library. Compiles constraints into finite state machines. |\n| `llguidance` | CFG | Microsoft's llguidance library. Supports context-free grammars and is the only backend that handles CFG constraints. |\n| `xgrammar` | - | Alternative backend using the xgrammar library. |\n\n**How backends are selected:**\n\n1. If the user specifies a backend via the `backend` parameter, that backend is used\n2. Otherwise, the default backend for the constraint type is used:\n   - Regex → `outlines_core`\n   - JSON Schema → `outlines_core`\n   - CFG → `llguidance`\n\n**Backend interface:**\n\nAll backends inherit from `BaseBackend` and implement three methods:\n\n```python\nclass BaseBackend(ABC):\n    @abstractmethod\n    def get_json_schema_logits_processor(self, json_schema: str) -> LogitsProcessorType:\n        ...\n\n    @abstractmethod\n    def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:\n        ...\n\n    @abstractmethod\n    def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:\n        ...\n```\n\n**Specifying a backend:**\n\n```python\nfrom outlines import from_transformers, Generator\n\nmodel = from_transformers(\"microsoft/Phi-3-mini-4k-instruct\")\n\n# Use xgrammar instead of the default outlines_core\ngenerator = Generator(model, int, backend=\"xgrammar\")\n```\n\n## Extension Points\n\n### Adding a New Model Provider\n\n1. Create a new file in `outlines/models/` (e.g., `mymodel.py`)\n2. Implement a `ModelTypeAdapter` subclass with `format_input()` and `format_output_type()`\n3. Implement a `Model` subclass with `generate()`, `generate_batch()`, and `generate_stream()`\n4. Add a factory function (e.g., `from_mymodel()`)\n5. Export from `outlines/models/__init__.py`\n6. Add to `SteerableModel` or `BlackBoxModel` type alias as appropriate\n"
  },
  {
    "path": "docs/guide/chat_templating.md",
    "content": "# Chat templating\n\nInstruction-tuned language models use \"special tokens\" to indicate different parts of text, such as the system prompt, the user prompt, any images, and the assistant's response. A [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) is how different types of input are composited together into a single, machine-readable string.\n\nOutlines supports chat templating throught the `Chat` model input class. It contains a list of messages similar in format to the chat history you would use with API models such as OpenAI or Anthropic and to the expected arguments of the `apply_chat_template` method of transformers tokenizers. You can find detailed information on the interface of this object in the [model inputs documentation](../features/core/inputs.md).\n"
  },
  {
    "path": "docs/guide/core_concepts.md",
    "content": "---\ntitle: Core concepts\n---\n\n# Core concepts\n\nComing soon. This will document various concepts at a high level, so users can understand Outlines before diving into specific implementations.\n\n1. Constrained decoding, tokens, and the basics of logit biasing\n2. Different ways to define output structure (regex, JSON schema, Pydantic models, context-free grammars)\n3. How finite state machines are used to guarantee output structure\n4. `Generator`, `Application`, `Template`,\n5. Prompt engineering vs. structured generation\n"
  },
  {
    "path": "docs/guide/fastapi_vllm_deployment.md",
    "content": "---\ntitle: Deploying with FastAPI\n---\n\n# Deploying with FastAPI\n\nThis guide demonstrates how to build a FastAPI application that leverages Outlines' async integration with vLLM. We create a customer support API that can intelligently categorize tickets and generate structured responses.\n\n## Prerequisites\n\nBefore starting, ensure you have a vLLM server running (locally or remotely) and the following packages installed:\n\n```shell\npip install fastapi uvicorn outlines openai pydantic\n```\n\n## Building the Application\n\n### Step 1: Define Data Models\n\nFirst, let's define our Pydantic models for structured outputs:\n\n```python\n# models.py\nfrom enum import Enum\nfrom typing import List\nfrom pydantic import BaseModel, Field\n\nclass TicketCategory(str, Enum):\n    BILLING = \"billing\"\n    TECHNICAL = \"technical\"\n    ACCOUNT = \"account\"\n    PRODUCT = \"product\"\n    OTHER = \"other\"\n\nclass TicketPriority(str, Enum):\n    LOW = \"low\"\n    MEDIUM = \"medium\"\n    HIGH = \"high\"\n    URGENT = \"urgent\"\n\nclass TicketAnalysis(BaseModel):\n    category: TicketCategory\n    priority: TicketPriority\n    summary: str = Field(description=\"Brief summary of the issue\")\n    customer_sentiment: str = Field(description=\"Customer emotional state\")\n    key_issues: List[str] = Field(description=\"List of main problems\")\n    requires_human: bool = Field(description=\"Whether this needs human intervention\")\n\nclass SupportResponse(BaseModel):\n    greeting: str\n    acknowledgment: str = Field(description=\"Acknowledge the customer's issue\")\n    solution_steps: List[str] = Field(description=\"Steps to resolve the issue\")\n    closing: str\n```\n\n### Step 2: Define the prompts\n\nLet us now write the prompts that we will be using in our application, using Jinja 2's templating language. We separate them from the application implementation so they are easier to modify and version.\n\n```ascii\n{# prompts/categorize.txt #}\nAnalyze this customer support ticket:\n\nCustomer ID: {{ customer_id }}\nMessage: {{ message }}\n\nExtract the category, priority, and other relevant information.\n```\n\n```ascii\n{# prompts/respond.txt #}\nGenerate a professional customer support response.\n\nCustomer Message: {{ message }}\nCategory: {{ category }}\nPriority: {{  priority }}\nCustomer Sentiment: {{ customer_sentiment }}\n\nCreate a helpful, empathetic response that addresses their concerns.\n```\n\n### Step 3: Create the FastAPI Application\n\nNow let's create our FastAPI application with async vLLM integration:\n\n```python\n# main.py\nimport asyncio\nfrom contextlib import asynccontextmanager\nfrom typing import Optional\n\nimport openai\nfrom outlines import models, Template\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\nfrom models import TicketAnalysis, SupportResponse\n\n# Request model\nclass TicketRequest(BaseModel):\n    customer_id: str\n    message: str\n\n# Global model instance\nasync_model = None\n\n# The lifespan function is a FastAPI construct\n# used to define startup and shutdown logic for the API.\n@asynccontextmanager\nasync def lifespan(app: FastAPI):\n    \"\"\"Initialize the async vLLM model on startup.\"\"\"\n    global async_model\n\n    client = openai.AsyncOpenAI(\n        base_url=\"http://localhost:8000/v1\",  # Adjust to your vLLM server URL\n        api_key=\"dummy\"  # vLLM doesn't require a real API key\n    )\n    async_model = models.from_vllm(client, \"Qwen/Qwen2.5-VL-7B-Instruct\")\n\n    yield\n\n    async_model = None  # Cleanup\n\n# Create FastAPI app\napp = FastAPI(\n    title=\"Customer Support Assistant API\",\n    description=\"AI-powered customer support with structured outputs\",\n    version=\"1.0.0\",\n    lifespan=lifespan\n)\n\n\n@app.post(\"/analyze-ticket\", response_model=TicketAnalysis)\nasync def analyze_ticket(request: TicketRequest):\n    \"\"\"Analyze a customer support ticket and extract structured information.\"\"\"\n    if async_model is None:\n        raise HTTPException(status_code=503, detail=\"Model not initialized\")\n\n    template = Template.from_file(\"prompts/categorize.txt\")\n    prompt = template(\n        customer_id=request.customer_id,\n        message=request.message\n    )\n\n    try:\n        # Generate and parse a structured response\n        result = await async_model(prompt, TicketAnalysis, max_tokens=5000)\n        analysis = TicketAnalysis.model_validate_json(result)\n\n        return analysis\n\n    except Exception as e:\n        raise HTTPException(status_code=500, detail=f\"Analysis failed: {str(e)}\")\n\n\n@app.post(\"/generate-response\", response_model=SupportResponse)\nasync def generate_response(\n    request: TicketRequest,\n    analysis: TicketAnalysis\n):\n    \"\"\"Generate a structured support response based on ticket analysis.\"\"\"\n    if async_model is None:\n        raise HTTPException(status_code=503, detail=\"Model not initialized\")\n\n    template = Template.from_file(\"prompts/respond.txt\")\n    prompt = template(\n        message=request.message,\n        category=analysis.category,\n        priority=analysis.priority,\n        customer_sentiment=analysis.customer_sentiment\n    )\n\n    try:\n        # Generate and parse a structured response\n        result = await async_model(prompt, SupportResponse, max_tokens=5000)\n        response = SupportResponse.model_validate_json(result)\n\n        return response\n\n    except Exception as e:\n        raise HTTPException(status_code=500, detail=f\"Response generation failed: {str(e)}\")\n```\n\n## Running the Application\n\n### Step 1: Start your vLLM server\n\n```shell\nvllm serve Qwen/Qwen2.5-VL-7B-Instruct\n```\n\n### Step 2: Run the FastAPI application\n\n```shell\nuvicorn main:app --reload --host 0.0.0.0 --port 8080\n```\n\n## Testing the API\n\n### Example 1: Analyze a support ticket\n\n```shell\ncurl -X POST \"http://localhost:8080/analyze-ticket\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"customer_id\": \"CUST123\",\n    \"message\": \"I have been charged twice for my subscription this month. This is unacceptable and I want a refund immediately!\"\n  }'\n```\n\nExpected response:\n\n```json\n{\n  \"category\": \"billing\",\n  \"priority\": \"high\",\n  \"summary\": \"Customer charged twice for subscription, requesting refund\",\n  \"customer_sentiment\": \"angry\",\n  \"key_issues\": [\"duplicate charge\", \"subscription billing\", \"refund request\"],\n  \"requires_human\": false\n}\n```\n\n### Example 2: Generate a support response\n\n```shell\n# First, get the analysis\nANALYSIS=$(curl -s -X POST \"http://localhost:8080/analyze-ticket\" \\\n  -H \"Content-Type: application/json\" \\\n  -d '{\n    \"customer_id\": \"CUST456\",\n    \"message\": \"My app keeps crashing when I try to upload photos.\"\n  }')\n\n# Then generate a response\ncurl -X POST \"http://localhost:8080/generate-response\" \\\n  -H \"Content-Type: application/json\" \\\n  -d \"{\n    \\\"request\\\": {\n      \\\"customer_id\\\": \\\"CUST456\\\",\n      \\\"message\\\": \\\"My app keeps crashing when I try to upload photos.\\\"\n    },\n    \\\"analysis\\\": $ANALYSIS\n  }\"\n```\n\nBy combining FastAPI's async capabilities with Outlines' structured generation, you can build robust APIs that leverage large language models.\n\n## Using Alternative Backends: SGLang and TGI\n\nOne of the key advantages of Outlines is its unified API across different inference backends. You can easily switch from vLLM to SGLang or TGI with minimal code changes - just modify the model initialization in the `lifespan` function.\n\n### Using SGLang Instead of vLLM\n\nTo use SGLang, simply change the client initialization:\n\n```python\n@asynccontextmanager\nasync def lifespan(app: FastAPI):\n    \"\"\"Initialize the async SGLang model on startup.\"\"\"\n    global async_model\n\n    client = openai.AsyncOpenAI(\n        base_url=\"http://localhost:30000/v1\",  # SGLang server URL\n        api_key=\"dummy\"\n    )\n    async_model = models.from_sglang(client)\n\n    yield\n\n    async_model = None\n```\n\nStart your SGLang server with:\n\n```shell\npython -m sglang.launch_server \\\n    --model-path meta-llama/Llama-2-7b-chat-hf \\\n    --port 30000\n```\n\n### Using TGI Instead of vLLM\n\nFor TGI (Text Generation Inference), use the Hugging Face client:\n\n```python\nimport huggingface_hub\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI):\n    \"\"\"Initialize the async TGI model on startup.\"\"\"\n    global async_model\n\n    client = huggingface_hub.AsyncInferenceClient(\n        \"http://localhost:8080\"  # TGI server URL\n    )\n    async_model = models.from_tgi(client)\n\n    yield\n\n    async_model = None\n```\n\nStart your TGI server with:\n\n```shell\ndocker run --gpus all -p 8080:80 \\\n    ghcr.io/huggingface/text-generation-inference:latest \\\n    --model-id meta-llama/Llama-2-7b-chat-hf\n```\n\nThe rest of your FastAPI application - all the endpoints, error handling, and business logic - remains completely unchanged. This flexibility allows you to test different inference engines without rewriting your application.\n"
  },
  {
    "path": "docs/guide/getting_started.md",
    "content": "---\ntitle: Getting Started\n---\n\n# Getting Started\n\n## Installation\n\nWe recommend using `uv` to install Outlines. You can find `uv` installation instructions [here](https://github.com/astral-sh/uv).\n\n```shell\nuv pip install 'outlines[transformers]'\n```\n\nor the classic `pip`:\n\n```shell\npip install 'outlines[transformers]'\n```\n\nFor more information, see the [installation guide](./installation).\n\n## Creating a Model\n\nOutlines contains a variety of models that wrap LLM inference engines/clients. For each of them, you need to install the model's associated library as described in the [installation guide](../installation).\n\nThe full list of available models along with detailed explanation on how to use them can be found in the [models page](../features/models/index.md) of the Features section of the documentation.\n\nFor a quick start, you can find below an example of how to initialize all supported models in Outlines:\n\n=== \"vLLM\"\n\n    ```python\n    import outlines\n    from openai import OpenAI\n\n    # You must have a separate vLLM server running\n    # Create an OpenAI client with the base URL of the VLLM server\n    openai_client = OpenAI(base_url=\"http://localhost:11434/v1\")\n\n    # Create an Outlines model\n    model = outlines.from_vllm(openai_client, \"microsoft/Phi-3-mini-4k-instruct\")\n    ```\n\n=== \"Ollama\"\n\n    ```python\n    import outlines\n    from ollama import Client\n\n    # Create an Ollama client\n    ollama_client = Client()\n\n    # Create an Outlines model, the model must be available on your system\n    model = outlines.from_ollama(ollama_client, \"tinyllama\")\n    ```\n\n=== \"OpenAI\"\n\n    ```python\n    import outlines\n    from openai import OpenAI\n\n    # Create an OpenAI client instance\n    openai_client = OpenAI()\n\n    # Create an Outlines model\n    model = outlines.from_openai(openai_client, \"gpt-4o\")\n    ```\n\n=== \"Transformers\"\n\n    ```python\n    import outlines\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n\n    # Define the model you want to use\n    model_name = \"HuggingFaceTB/SmolLM2-135M-Instruct\"\n\n    # Create a HuggingFace model and tokenizer\n    hf_model = AutoModelForCausalLM.from_pretrained(model_name)\n    hf_tokenizer = AutoTokenizer.from_pretrained(model_name)\n\n    # Create an Outlines model\n    model = outlines.from_transformers(hf_model, hf_tokenizer)\n    ```\n\n\n=== \"llama.cpp\"\n\n    ```python\n    import outlines\n    from llama_cpp import Llama\n\n    # Model to use, it will be downloaded from the HuggingFace hub\n    repo_id = \"TheBloke/Llama-2-13B-chat-GGUF\"\n    file_name = \"llama-2-13b-chat.Q4_K_M.gguf\"\n\n    # Create a Llama.cpp model\n    llama_cpp_model = Llama.from_pretrained(repo_id, file_name)\n\n    # Create an Outlines model\n    model = outlines.from_llamacpp(llama_cpp_model)\n    ```\n\n=== \"Gemini\"\n\n    ```python\n    import outlines\n    from google.generativeai import GenerativeModel\n\n    # Create a Gemini client\n    gemini_client = GenerativeModel()\n\n    # Create an Outlines model\n    model = outlines.from_gemini(gemini_client, \"gemini-1-5-flash\")\n    ```\n\n=== \"mlx-lm\"\n\n    ```python\n    import outlines\n    import mlx_lm\n\n    # Create an MLXLM model with the output of mlx_lm.load\n    # The model will be downloaded from the HuggingFace hub\n    model = outlines.from_mlxlm(\n        *mlx_lm.load(\"mlx-community/SmolLM-135M-Instruct-4bit\")\n    )\n    ```\n\n=== \"SgLang\"\n\n    ```python\n    import outlines\n    from openai import OpenAI\n\n    # You must have a separate SgLang server running\n    # Create an OpenAI client with the base URL of the SgLang server\n    openai_client = OpenAI(base_url=\"http://localhost:11434/v1\")\n\n    # Create an Outlines model\n    model = outlines.from_sglang(openai_client)\n    ```\n\n=== \"TGI\"\n\n    ```python\n    # SgLang\n\n    import outlines\n    from huggingface_hub import InferenceClient\n\n    # You must have a separate TGI server running\n    # Create an InferenceClient client with the base URL of the TGI server\n    tgi_client = InferenceClient(\"http://localhost:8080\")\n\n    # Create an Outlines model\n    model = outlines.from_tgi(tgi_client)\n    ```\n\n=== \"vLLM (offline)\"\n\n    ```python\n    import outlines\n    from vllm import LLM\n\n    # Create a vLLM model\n    vllm_model = LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n\n    # Create an Outlines model\n    model = outlines.from_vllm_offline(vllm_model)\n    ```\n\n\n## Generating Text\n\nOnce you have created the Outlines model for your inference engine/client, you are already all set to generate text! Models are callable such that you can simply call them with a text prompt. For instance:\n\n```python\nmodel = <your_model_as_defined_above>\n\n# Call the model to generate text\nresult = model(\"Write a short story about a cat.\")\nprint(result) # 'In a quiet village where the cobblestones hummed softly beneath the morning mist...'\n```\n\nMost models also support streaming through the use of a `streaming` method. You can directly use with a prompt just like regular text generation. For instance:\n\n```python\nmodel = <your_model_as_defined_above>\n\n# Stream text\nfor chunk in model.streaming(\"Write a short story about a cat.\")\n    print(chunk) # 'In ...'\n```\n\n## Structured Generation\n\nOutlines follows a simple pattern that mirrors Python's own type system for structured outputs. Simply specify the desired output type as you would when using type hinting with a function, and Outlines will ensure your data matches that structure exactly.\n\nSupported output types can be organized in 5 categories:\n\n- [Basic Types](../../features/core/output_types#basic-python-types): `int`, `float`, `bool`...\n- [Multiple Choices](../../features/core/output_types#multiple-choices): using `Literal` or `Enum`\n- [JSON Schemas](../../features/core/output_types#json-schemas): using a wide range of possible objects including Pydantic models and dataclasses\n- [Regex](../../features/core/output_types#regex-patterns): through the Outlines's `Regex` object\n- [Context-free Grammars](../../features/core/output_types#context-free-grammars): through the Outlines's `CFG` object\n\nConsult the section on [Output Types](../../features/core/output_types.md) in the features documentation for more detailed information on all supported types for each output type category.\n\nIn the meantime, you can find below examples of using each of the five output type categories:\n\n=== \"Basic Types\"\n\n    ```python\n    model = <your_model_as_defined_above>\n\n    # Generate an integer\n    result = model(\"How many countries are there in the world?\", int)\n    print(result) # '200'\n    ```\n\n=== \"Multiple Choice\"\n\n    ```python\n    from enum import Enum\n\n    # Define our multiple choice output type\n    class PizzaOrBurger(Enum):\n        pizza = \"pizza\"\n        burger = \"burger\"\n\n    model = <your_model_as_defined_above>\n\n    # Generate text corresponding to either of the choices defined above\n    result = model(\"What do you want to eat, a pizza or a burger?\", PizzaOrBurger)\n    print(result) # 'pizza'\n    ```\n\n=== \"JSON Schemas\"\n\n    ```python\n    from datetime import date\n    from typing import Dict, List, Union\n    from pydantic import BaseModel\n\n    model = <your_model_as_defined_above>\n\n    # Define the class we will use as an output type\n    class Character(BaseModel):\n        name: str\n        birth_date: date\n        skills: Union[Dict, List[str]]\n\n    # Generate a character\n    result = model(\"Create a character\", Character)\n    print(result) # '{\"name\": \"Aurora\", \"birth_date\": \"1990-06-15\", \"skills\": [\"Stealth\", \"Diplomacy\"]}'\n    print(Character.model_validate_json(result)) # name=Aurora birth_date=datetime.date(1990, 6, 15) skills=['Stealth', 'Diplomacy']\n    ```\n\n=== \"Regex\"\n\n    ```python\n    from outlines.types import Regex\n\n    model = <your_model_as_defined_above>\n\n    # Define our regex for a 3 digit number\n    output_type = Regex(r\"[0-9]{3}\")\n\n    # Generate the number\n    result = model(\"Write a 3 digit number\", output_type)\n    print(result) # '236'\n    ```\n\n=== \"Context-free Grammars\"\n\n    ```python\n    from outlines.types import CFG\n\n    model = <your_model_as_defined_above>\n\n    # Define your Lark grammar as string\n    arithmetic_grammar = \"\"\"\n        ?start: sum\n\n        ?sum: product\n            | sum \"+\" product   -> add\n            | sum \"-\" product   -> sub\n\n        ?product: atom\n            | product \"*\" atom  -> mul\n            | product \"/\" atom  -> div\n\n        ?atom: NUMBER           -> number\n            | \"-\" atom         -> neg\n            | \"(\" sum \")\"\n\n        %import common.NUMBER\n        %import common.WS_INLINE\n\n        %ignore WS_INLINE\n    \"\"\"\n\n    # Generate an arithmetic operation\n    result = model(\"Write an arithmetic operation\", CFG(grammar_string))\n    print(result) # '2 + 3'\n    ```\n\nIt's important to note that not all output types are available for all models due to limitations in the underlying inference engines. The [Models](../features/models/index.md) section of the features documentation includes a features matrix that summarize the availability of output types.\n\n## Generators\n\nGenerators are an important type of objects in Outlines that are used to encapsulate a model and an output type. After having created a generator, you can call it using a similar interface to a model and it will generate text conforming to the output type you initially provided.\n\nThis feature is useful if you want to generate text several times for given model and output type. Not only does it prevent having to include the same output type at each call, but it also allows us to compile the output type only once instead of doing it at each generation (which is important for local models as this operation can be expensive).\n\nFor instance:\n\n```python\nfrom typing import Literal\nfrom outlines import Generator\n\nmodel = <your_model_as_defined_above>\n\n# Create a generator\ngenerator = Generator(model, Literal[\"pizza\", \"burger\"])\n\n# Call it as you would call a model\nresult = generator(\"What do you want to eat, a pizza or a burger?\")\nprint(result) # pizza\n```\n\nYou can find more information on generators in the dedicated page on [Generators](../features/core/generator.md) in the features documentation.\n\n## Other features\n\nOn top of more detailed explanation on the concepts already discussed here, the [Features](../features/index.md) section of the documentation contains information on additional Outlines features such as applications, prompt templates, the regex DSL...\n"
  },
  {
    "path": "docs/guide/installation.md",
    "content": "---\ntitle: Installation\n---\n\n# Installation\n\n## Dependency Management\n\nWe recommend using modern Python packaging tools such as `uv` for managing python dependencies.\n\n### uv (Recommended)\n\n```shell\n# Install uv\ncurl -LsSf https://astral.sh/uv/install.sh | sh\n\n# Create a virtual environment and install Outlines\nuv venv\nsource .venv/bin/activate\nuv pip install outlines\n```\n\nor with pip:\n\n```shell\npip install outlines\n```\n\n\n\n\n\n## Optional Dependencies\n\nTo use Outlines models, you need to install the Python libraries for the associated inference engines/clients. Such libraries are not part of the general installation as you should only install the libraries needed for the specific models you want to use.\n\nOutlines models with the installation of their associated additional depencies:\n\n- [Anthropic](features/models/anthropic.md): `pip install anthropic`\n- [Dottxt](features/models/dottxt.md): `pip install dottxt`\n- [Gemini](features/models/gemini.md): `pip install google-generativeai`\n- [Llamacpp](features/models/llamacpp.md): `pip install llama-cpp-python`\n- [Mlx-lm](features/models/mlxlm.md): `pip install mlx mlx-lm`\n- [Ollama](features/models/ollama.md): `pip install ollama` (after having downloaded Ollama in your system)\n- [OpenAI](features/models/openai.md): `pip install openai`\n- [SGLang](features/models/sglang.md): `pip install openai`\n- [TGI](features/models/tgi.md): `pip install huggingface_hub`\n- [Transformers](features/models/transformers.md): `pip install transformers`\n- [TransformersMultiModal](features/models/transformers_multimodal.md): `pip install transformers`\n- [vLLM (online server)](features/models/vllm.md): `pip install openai`\n- [vLLM (offline)](features/models/vllm_offline.md): `pip install vllm`\n\nIf you encounter any problems using Outlines with these libraries, take a look at their installation instructions. The installation of `openai` and `transformers` should be straightforward, but other libraries have specific hardware requirements.\n\n!!! warning \"Hardware Requirements\"\n\n    If you are using a local model, your model may require specific hardware. Please check the documentation for these libraries.\n\n    Some libraries like `vllm` and `llama-cpp-python` require specific hardware, such as a compatible GPU. `mlx-lm` on its side is designed for Apple Silicon, so it may not be appropriate for your use case if you are on a different platform.\n\n## Bleeding Edge\n\nYou can install the latest version of Outlines from the repository's `main` branch:\n\n```sh\npip install git+https://github.com/dottxt-ai/outlines.git@main\n```\n\nThis can be useful, for instance, when a fix has been merged but not yet released.\n\n## Installing for Development\n\nSee the [contributing documentation](community/contribute.md) for instructions on how to install Outlines for development, including an example using the `dot-install` method for one of the backends.\n"
  },
  {
    "path": "docs/guide/migration.md",
    "content": "# Outlines 1.0 migration guide\n\nOutlines 1.0 introduces some breaking changes that affect the way you use the library. You are likely concerned by all of the following sections, so please read this document carefully until the end.\n\nThis guide will help you migrate your code to the new version.\n\nAll previous functionalities will be supported until Outlines version 1.1.0, but a warning message will be displayed to remind you to migrate your code and provide instructions to help you do so. Please migrate your code to the v1 as soon as possible.\n\n## Removed or modified features\n- [Generate functions](#generate-functions)\n- [Models](#models)\n- [Samplers](#samplers)\n- [Functions](#functions)\n- [Text generation return types](#text-generation-return-types)\n- [Inference arguments](#inference-arguments)\n\n### Generate functions\n\nThe whole `generate` module has been removed. That includes the functions `generate.cfg`, `generate.choice`, `generate.format`,`generate.fsm`, `generate.json`, `generate.regex` and `generate.text`.\n\nYou should replace these functions by the [`Generator`](../features/core/generator.md) object along with the right output type as an argument (on top of the model). The output type can either be a python type or be an object from the `outlines.types` module. You can find more information about the output types in the [Output Types](../features/core/output_types.md) section of the features documentation.\n\nAssociated v1 output types for each deprecated function:\n- `generate.cfg` -> `outlines.types.CFG`\n- `generate.choice` -> `typing.Literal` or `typing.Union`\n- `generate.format` -> native python types (`str`, `int` etc.)\n- `generate.fsm` -> `outlines.types.FSM`\n- `generate.json` -> `pydantic.BaseModel`, `typing.TypedDict`, `dataclasses.dataclass`, `genson.schema.SchemaBuilder` or `outlines.types.JsonSchema`\n- `generate.regex` -> `outlines.types.Regex`\n- `generate.text` -> no output type (`None`)\n\nFor instance, instead of:\n\n```python\nfrom outlines import generate\n\nmodel = ...\ngenerator = generate.choice(model, [\"foo\", \"bar\"])\n```\n\nYou should now use:\n\n```python\nfrom typing import Literal\nfrom outlines import Generator\n\nmodel = ...\ngenerator = Generator(model, Literal[\"foo\", \"bar\"])\n```\n\n### Models\n\nThe model classes found in the `outlines.models` module are maintained but there are a few important changes to be aware of.\n\nThe functions used to created a model have been replaced by equivalent functions named with a `from_` prefix. The function `outlines.models.transformers` has been replaced by `outlines.from_transformers` for instance. On top of this change of name, the arguments have been modified. You should refer to the [models documentation](../features/models/index.md) for more details, but the overall idea is that you now need to provide a model/client instance from the inference library the Outlines model is wrapping.\n\nFor instance, instead of:\n\n```python\nfrom outlines import models\n\nmodel = models.llamacpp(\n    repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n    filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n    chat_format=\"qwen\",\n)\n```\n\nYou should now do:\n\n```python\nfrom llama_cpp import Llama\nfrom outlines import from_llamacpp\n\nllamacpp_model = Llama.from_pretrained(\n    repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n    filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n    chat_format=\"qwen\",\n)\nmodel = from_llamacpp(llamacpp_model)\n```\n\nThe `load_lora` methods that are present on the `VLLM` and `LlamaCpp` models have been removed. You should now handle lora loading through the `Llama` instance in the case of the `LlamaCpp` model or provide it as a keyword argument when calling the model in the case of the `VLLM` model.\n\nFor instance, instead of:\n\n```python\nfrom outlines import from_vllm\nfrom vllm import LLM\n\nmodel = from_vllm(\n    LLM.from_pretrained(\"meta-llama/Llama-3.1-8B-Instruct\")\n)\nmodel.load_lora(\"path/to/lora/file\")\n\nresponse = model(\"foo\")\n```\n\nYou should now do:\n\n```python\nfrom outlines import from_vllm\nfrom vllm import LLM\nfrom vllm.lora.request import LoRARequest\n\nmodel = from_vllm(\n    LLM.from_pretrained(\"meta-llama/Llama-3.1-8B-Instruct\")\n)\nlora_request = LoRARequest(\"path/to/lora/file\", 1, \"path/to/lora/file\")\n\nresponse = model(\"foo\", lora_request=lora_request)\n```\n\nThe `ExLlamav2` model has been removed without replacement. This inference library is not fully compatible with Outlines, so we decided to remove it. You can still use it until final deprecation, but we recommend you to migrate to a different inference library right now.\n\n### Samplers\n\nThe `outlines.samplers` module has been removed without replacement. You should now use the arguments of the inference library model to control the sampling. Depending on the model you use, this could be done at initialization or when calling the model to generate text (so when calling the outlines model or a generator).\n\nFor instance, instead of:\n\n```python\nfrom outlines import generate\n\nmodel = <transformers_model>\n\ngenerator = generate.text(model, samplers.beam_search(2))\nresponse = generator(\"foo\")\n```\n\nYou should now do:\n\n```python\nfrom outlines import Generator\n\nmodel = <transformers_model>\n\ngenerator = Generator(model)\nresponse = generator(\"foo\", num_beams=2)\n```\n\n### Functions\n\nThe `outlines.function` module has been removed. It is replaced by the `outlines.applications` module. An [`Application`](../features/utility/application.md) serves a similar purpose as a `Function`: it encapsulates a prompt template and an output type. A difference is that can `Application` is not instantiated with a model name. Instead, you should provide a model instance along with the prompt when calling it.\n\nFor instance, instead of:\n\n```python\nfrom outlines import Function\n\nprompt_template = ...\noutput_type = ...\n\nfn = Function(\n    prompt_template,\n    output_type,\n    \"hf-internal-testing/tiny-random-GPTJForCausalLM\",\n)\n\nresult = fn(\"foo\")\n```\n\nYou should now do:\n\n```python\nfrom outlines import Application\n\nprompt_template = ...\noutput_type = ...\n\napplication = Application(\n    prompt_template,\n    output_type,\n)\n\nmodel = ...\n\nresult = application(model, \"foo\")\n```\n\n### Text generation return types\n\nIn the previous version of Outlines, the return type of the generators depended on the output type provided. For instance, if you passed a Pydantic model to the `generate.json` function, the return type was a Pydantic model instance. In the v1, the return type of a generator is always a `str`, the raw text generated by the model. You are responsible for parsing the text into the desired format.\n\nFor instance, instead of:\n\n```python\nfrom pydantic import BaseModel\nfrom outlines import generate\n\nclass Foo(BaseModel):\n    bar: str\n\nmodel = ...\n\ngenerator = generate.json(model, Foo)\n\nresult = generator(\"foo\")\nprint(result.bar)\n```\n\nYou should now do:\n\n```python\nfrom pydantic import BaseModel\nfrom outlines import Generator\n\nclass Foo(BaseModel):\n    bar: str\n\nmodel = ...\n\ngenerator = Generator(model, Foo)\n\nresult = generator(\"foo\")\nresult = Foo.model_validate_json(result) # parse the text into the Pydantic model instance\nprint(result.bar)\n```\n\nThe [Output Types](../features/core/output_types.md) section of the features documentation includes extensive details on available output types.\n\n### Inference arguments\n\nIn the previous version of Outlines, some of the inference arguments were standardized across the models and were provided as positional arguments to the generator or through the sampling params dictionary. Additionally, various default values were added by outlines to the inference library models. This is no longer the case. You should refer to the documentation of the inference library you use to find the right arguments for your use case and pass them as keyword arguments to the outlines generator when calling it.\n\nFor instance, instead of:\n\n```python\nfrom outlines import generate\n\nmodel = <transformers_model>\n\ngenerator = generate.text(model)\n\nresult = generator(\"foo\", 256, \".\", 10) # 256 tokens, stop at \".\" and seed 10\n```\n\nYou should now do:\n\n```python\nfrom outlines import Generator\n\nmodel = <transformers_model>\n\ngenerator = Generator(model)\n\nresult = generator(\"foo\", max_new_tokens=256, stop_strings=\".\", seed=10)\n```\n"
  },
  {
    "path": "docs/guide/selecting_an_inference_backend.md",
    "content": "This guide should provide a general overview of the available models in the [API reference](/api/models/).\n\n## Models\n\n-  [Anthropic](/api/models/anthropic)\n"
  },
  {
    "path": "docs/guide/vlm.md",
    "content": "# Vision-Language Models with Outlines\n\nThis guide demonstrates how to use Outlines with vision-language models. Vision-language models can process both text and images, allowing for tasks like image captioning, visual question answering, and more.\n\nWe will be using the Pixtral-12B model from Mistral to take advantage of some of its visual reasoning capabilities and a workflow to generate a multistage atomic caption.\n\n## Setup\nFirst, we need to install the necessary dependencies. In addition to Outlines, we\"ll need to install the transformers library and any specific requirements for the vision-language model we\"ll be using.\n\n```shell\npip install outlines transformers torch pillow\n```\n\n### Initializing the Model\nWe\"ll use the `outlines.from_transformers` function to initialize our vision-language model. For this function to return a vision multi-modal model we need to pass in a transformers model and a transformers processor that can handle both text and image inputs. Today we\"ll be using the Pixtral model with the AutoProcessor.\n\n```python\nimport outlines\nimport torch\nfrom transformers import (\n    AutoProcessor,\n    LlavaForConditionalGeneration\n)\n\nmodel_name=\"mistral-community/pixtral-12b\" # original magnet model is able to be loaded without issue\nmodel_class=LlavaForConditionalGeneration\nprocessor_class=AutoProcessor\n\ndef get_vision_model(model_name: str, model_class, processor_class):\n    model_kwargs = {\n        \"torch_dtype\": torch.bfloat16,\n        \"attn_implementation\": \"flash_attention_2\",\n        \"device_map\": \"auto\",\n    }\n    processor_kwargs = {\n        \"device\": \"cuda\",\n    }\n\n    model = outlines.from_transformers(\n        model_class.from_pretrained(model_name, **model_kwargs),\n        processor_class.from_pretrained(model_name, **processor_kwargs),\n    )\n    return model\nmodel = get_vision_model(model_name, model_class, processor_class)\n```\n\n### Defining the Schema\nNext, we will define a schema for the output we expect from our vision multi-modal model. This schema will help structure the model's responses. We use the `outlines.Generator` object to create a generator for our schema that will then be called with our prompt and images.\n\n```python\nfrom enum import Enum\nfrom pydantic import BaseModel, Field, confloat, constr\nfrom pydantic.types import StringConstraints, PositiveFloat\nfrom typing import List\nfrom typing_extensions import Annotated\n\nclass TagType(Enum):\n    ENTITY = \"Entity\"\n    RELATIONSHIP = \"Relationship\"\n    STYLE = \"Style\"\n    ATTRIBUTE = \"Attribute\"\n    COMPOSITION = \"Composition\"\n    CONTEXTUAL = \"Contextual\"\n    TECHNICAL = \"Technical\"\n    SEMANTIC = \"Semantic\"\n\nclass ImageTag(BaseModel):\n    tag: Annotated[\n        constr(min_length=1, max_length=30),\n        Field(\n            description=(\n                \"Descriptive keyword or phrase representing the tag.\"\n            )\n        )\n    ]\n    category: TagType\n    confidence: Annotated[\n        confloat(le=1.0),\n        Field(\n            description=(\n                \"Confidence score for the tag, between 0 (exclusive) and 1 (inclusive).\"\n            )\n        )\n    ]\n\nclass ImageData(BaseModel):\n    tags_list: List[ImageTag] = Field(..., min_items=8, max_items=20)\n    short_caption: Annotated[str, StringConstraints(min_length=10, max_length=150)]\n    dense_caption: Annotated[str, StringConstraints(min_length=100, max_length=2048)]\n\nimage_data_generator = outlines.Generator(model, ImageData)\n```\n\nThis schema defines the structure for image tags, including categories like Entity, Relationship, Style, etc., as well as short and dense captions.\n\n### Preparing the Prompt\n\nWe'll create a prompt that instructs the model on how to analyze the image and generate the structured output:\n\n```python\npixtral_instruction = \"\"\"\n<s>[INST]\n<Task>You are a structured image analysis agent. Generate comprehensive tag list, caption, and dense caption for an image classification system.</Task>\n<TagCategories requirement=\"You should generate a minimum of 1 tag for each category.\" confidence=\"Confidence score for the tag, between 0 (exclusive) and 1 (inclusive).\">\n- Entity : The content of the image, including the objects, people, and other elements.\n- Relationship : The relationships between the entities in the image.\n- Style : The style of the image, including the color, lighting, and other stylistic elements.\n- Attribute : The most important attributes of the entities and relationships in the image.\n- Composition : The composition of the image, including the arrangement of elements.\n- Contextual : The contextual elements of the image, including the background, foreground, and other elements.\n- Technical : The technical elements of the image, including the camera angle, lighting, and other technical details.\n- Semantic : The semantic elements of the image, including the meaning of the image, the symbols, and other semantic details.\n<Examples note=\"These show the expected format as an abstraction.\">\n{\n  \"tags_list\": [\n    {\n      \"tag\": \"subject 1\",\n      \"category\": \"Entity\",\n      \"confidence\": 0.98\n    },\n    {\n      \"tag\": \"subject 2\",\n      \"category\": \"Entity\",\n      \"confidence\": 0.95\n    },\n    {\n      \"tag\": \"subject 1 runs from subject 2\",\n      \"category\": \"Relationship\",\n      \"confidence\": 0.90\n    },\n   }\n</Examples>\n</TagCategories>\n<ShortCaption note=\"The short caption should be a concise single sentence caption of the image content with a maximum length of 100 characters.\">\n<DenseCaption note=\"The dense caption should be a descriptive but grounded narrative paragraph of the image content with high quality narrative prose. It should incorporate elements from each of the tag categories to provide a broad dense caption\">\n[IMG]<image>[/INST]\n\"\"\".strip()\n```\n\nThis prompt provides detailed instructions to the model on how to generate comprehensive tag lists, captions, and dense captions for image analysis. Because of the ordering of the instructions the original tag generation serves as a sort of visual grounding for the captioning task, reducing the amount of manual post processing required. It is essential to include the <image> tag in the prompt at the location where the image will be inserted.\n\n### Generating Structured Output\nNow we can use our model to generate structured output based on an input image:\n\n```python\nfrom io import BytesIO\nfrom urllib.request import urlopen\nfrom PIL import Image\n\ndef img_from_url(url):\n    img_byte_stream = BytesIO(urlopen(url).read())\n    return Image.open(img_byte_stream).convert(\"RGB\")\n\nimage_url=\"https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg\"\nimage= img_from_url(image_url)\nresult = image_data_generator({\n    \"text\": pixtral_instruction,\n    \"images\": image\n})\nprint(result)\n```\n\nThis code loads an image from a URL, passes it to our vision multi-modal model along with the instruction prompt, and generates a structured output based on the defined schema. We end up with an output like this, ready to be used for the next stage in your pipeline:\n\n```json\n{\"tags_list\": [\n  {\n    \"tag\": \"astronaut\",\n    \"category\": <TagType.ENTITY: \"Entity\">,\n    \"confidence\": 0.99\n  },\n  {\"tag\": \"moon\", \"category\": <TagType.ENTITY: \"Entity\">, \"confidence\": 0.98},\n  {\n    \"tag\": \"space suit\",\n    \"category\": <TagType.ATTRIBUTE: \"Attribute\">,\n    \"confidence\": 0.97\n  },\n  {\n    \"tag\": \"lunar module\",\n    \"category\": <TagType.ENTITY: \"Entity\">,\n    \"confidence\": 0.95\n  },\n  {\n    \"tag\": \"shadow of astronaut\",\n    \"category\": <TagType.COMPOSITION: \"Composition\">,\n    \"confidence\": 0.95\n  },\n  {\n    \"tag\": \"footprints in moon dust\",\n    \"category\": <TagType.CONTEXTUAL: \"Contextual\">,\n    \"confidence\": 0.93\n  },\n  {\n    \"tag\": \"low angle shot\",\n    \"category\": <TagType.TECHNICAL: \"Technical\">,\n    \"confidence\": 0.92\n  },\n  {\n    \"tag\": \"human first steps on the moon\",\n    \"category\": <TagType.SEMANTIC: \"Semantic\">,\n    \"confidence\": 0.95\n  }],\n  \"short_caption\": \"First man on the Moon\",\n  \"dense_caption\": \"The figure clad in a pristine white space suit, emblazoned with the American flag, stands powerfully on the moon's desolate and rocky surface. The lunar module, a workhorse of space engineering, looms in the background, its metallic legs sinking slightly into the dust where footprints and tracks from the mission's journey are clearly visible. The photograph captures the astronaut from a low angle, emphasizing his imposing presence against the desolate lunar backdrop. The stark contrast between the blacks and whiteslicks of lost light and shadow adds dramatic depth to this seminal moment in human achievement.\"\n}\n```\n\n## Conclusion\n\nThis guide demonstrated how Outlines enables structured output generation with vision-language models. With the techniques shown above, you can build:\n\n- **Content Management Systems**: Automatically tag and categorize visual content with structured metadata that can be directly stored in databases, enabling powerful search and filtering capabilities\n- **Accessibility Tools**: Generate rich, structured descriptions of images that can be adapted for different contexts - from brief alt-text to detailed scene descriptions for screen readers\n- **Quality Assurance Pipelines**: Validate visual content against specific criteria by extracting structured attributes and checking them against business rules\n"
  },
  {
    "path": "docs/index.md",
    "content": "---\ntitle: Welcome to Outlines!\nhide:\n  - navigation\n---\n\n#\n\n<figure markdown>\n![](assets/images/logo-light-mode.svg#only-light){ width=\"500\" }\n![](assets/images/logo-dark-mode.svg#only-dark){ width=\"500\" }\n</figure>\n\n\nLLMs are powerful but their outputs are unpredictable. Most solutions attempt to fix bad outputs after generation using parsing, regex, or fragile code that breaks easily.\n\nOutlines guarantees structured outputs during generation — directly from any LLM.\n\n- **Works with any model** - Same code runs across OpenAI, Ollama, vLLM, and more\n- **Simple integration** - Just pass your desired output type: `model(prompt, output_type)`\n- **Guaranteed valid structure** - No more parsing headaches or broken JSON\n- **Provider independence** - Switch models without changing code\n- **Rich structure definition** - Use Json Schema, regular expressions or context-free grammars\n\n<figure markdown>\n[Get Started](guide/getting_started){ .md-button .md-button--primary }\n[View Examples](examples/){ .md-button }\n[API Reference](api_reference/){ .md-button }\n[GitHub](https://github.com/dottxt-ai/outlines){ .md-button }\n</figure>\n\n## 🚀 Building the future of structured generation\n\nWe're working with select partners to develop new interfaces to structured generation.\n\nNeed XML, FHIR, custom schemas or grammars? Let's talk.\n\nAudit your schema: share one schema, we show you what breaks under generation, the constraints that fix it, and compliance rates before and after. Sign up [here](https://h1xbpbfsf0w.typeform.com/to/rtFUraA2?typeform).\n\n## See it in action\n\n```python\nfrom pydantic import BaseModel\nfrom typing import Literal\nimport outlines\nimport openai\n\nclass Customer(BaseModel):\n    name: str\n    urgency: Literal[\"high\", \"medium\", \"low\"]\n    issue: str\n\nclient = openai.OpenAI()\nmodel = outlines.from_openai(client, \"gpt-4o\")\n\ncustomer = model(\n    \"Alice needs help with login issues ASAP\",\n    Customer\n)\n# ✓ Always returns valid Customer object\n# ✓ No parsing, no errors, no retries\n```\n\n## Quick install\n\n```shell\npip install outlines\n```\n\n## Features\n\n<div class=\"grid cards\" markdown>\n\n- :material-shield-check: **Reliable** - Guaranteed schema compliance -- always valid JSON.\n- :material-puzzle: **Feature-rich** - Supports a large proportion of the JSON Schema spec, along with regex and context-free grammars.\n- :material-lightning-bolt: **Fast** - Microseconds of overhead vs seconds of retries. Compilation happens once, not every request.\n- :material-lightbulb: **Simple** - Outlines is a low-abstraction library. Write code the way you normally do with LLMs. No agent frameworks needed.\n\n</div>\n\n## Supported inference APIs, libraries & servers\n\n- [vLLM](features/models/vllm.md)\n- [vLLM offline](features/models/vllm_offline.md)\n- [Transformers](features/models/transformers.md)\n- [llama.cpp](features/models/llamacpp.md)\n- [Ollama](features/models/ollama.md)\n- [MLX-LM](features/models/mlxlm.md)\n- [SgLang](features/models/sglang.md)\n- [TGI](features/models/tgi.md)\n- [OpenAI](features/models/openai.md)\n- [Anthropic](features/models/anthropic.md)\n- [Gemini](features/models/gemini.md)\n- [Dottxt](features/models/dottxt.md)\n\n## Who is using Outlines?\n\nHundreds of organisations and the main LLM serving frameworks ([vLLM][vllm], [TGI][tgi], [LoRAX][lorax], [xinference][xinference], [SGLang][sglang]) use Outlines. Prominent companies and organizations that use Outlines include:\n\n<div class=\"grid cards\" markdown>\n  <div class=\"row\"><img src=\"../logos/amazon.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/apple.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/best_buy.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/canoe.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/cisco.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/dassault_systems.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/databricks.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/datadog.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/dbt_labs.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../assets/images/dottxt.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/gladia.jpg\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/harvard.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/hf.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/johns_hopkins.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/meta.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/mit.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/mount_sinai.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/nvidia.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/nyu.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/safran.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/salesforce.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/shopify.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/smithsonian.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/tinder.png\" width=\"200\"></div>\n  <div class=\"row\"><img src=\"../logos/upenn.png\" width=\"200\"></div>\n</div>\n\nOrganizations are included either because they use Outlines as a dependency in a public repository, or because of direct communication between members of the Outlines team and employees at these organizations.\n\nStill not convinced, read [what people say about us](community/feedback.md). And make sure to take a look at what the [community is building](community/examples.md)!\n\n\n## Outlines people\n\nOutlines would not be what it is today without a community of dedicated developers:\n\n<a href=\"https://github.com/dottxt-ai/outlines/graphs/contributors\">\n  <img src=\"https://contrib.rocks/image?repo=dottxt-ai/outlines\" />\n</a>\n\n## About .txt\n\nOutlines is built with ❤️ by [.txt](https://dottxt.co).\n\n.txt solves the critical problem of reliable structured output generation for large language models. Our [commercially-licensed libraries][dottxt-doc] ensure 100% compliance with JSON Schema, regular expressions and context-free grammars while adding only microseconds of latency. Unlike open-source alternatives, we offer superior reliability, performance, and enterprise support.\n\n\n## Acknowledgements\n\n<div class=\"grid\" markdown>\n\n<figure markdown>\n  <a href=\"https://www.normalcomputing.ai\">\n  ![Normal Computing logo](assets/images/normal_computing.jpg){ width=\"150\" }\n  </a>\n</figure>\n\n</div>\n\nOutlines was originally developed at [@NormalComputing](https://twitter.com/NormalComputing) by [@remilouf](https://twitter.com/remilouf) and [@BrandonTWillard](https://twitter.com/BrandonTWillard). It is now maintained by [.txt](https://dottxt.co).\n\n[discord]: https://discord.gg/R9DSu34mGd\n[aesara]: https://github.com/aesara-devs\n[blackjax]: https://github.com/blackjax-devs/blackjax\n[pythological]: https://github.com/pythological\n[hy]: https://hylang.org/\n[.txt]: https://dottxt.co\n[vllm]: https://github.com/vllm-project/vllm\n[tgi]: https://github.com/huggingface/text-generation-inference\n[lorax]: https://github.com/predibase/lorax\n[xinference]: https://github.com/xorbitsai/inference\n[sglang]: https://github.com/sgl-project/sglang/\n[dottxt-doc]: https://docs.dottxt.co\n"
  },
  {
    "path": "docs/overrides/home.html",
    "content": "{#-\nThis file overrides the home page to use HTML tooling\nbetter.\n-#}\n{% extends \"main.html\" %}\n{% block tabs %}\n{{ super() }}\n\n<style>\n    @media screen and (min-width:60em) {\n        .md-sidebar--secondary {\n            display: none;\n        }\n    }\n\n    @media screen and (min-width:76.25em) {\n        .md-sidebar--primary {\n            display: none;\n        }\n    }\n\n    .mdx-container {\n        display: flex;\n        justify-content: center;\n        align-items: center;\n        padding-left: 1rem;\n        padding-right: 1rem;\n    }\n\n    .mdx-hero {\n        text-align: center;\n        margin-top: 6rem;\n    }\n\n    .mdx-hero__image {\n        margin-bottom: 1rem;\n        margin-top: 1rem;\n        max-width: 80%;\n        margin: 0 auto;\n\n        @media screen and (max-width: 40em) {\n            max-width: 60%;\n        }\n    }\n\n    .mdx-hero__content {\n        max-width: 600px;\n        margin: 0 auto;\n\n        .subtitle {\n            font-size: 1rem;\n            letter-spacing: 0.025rem;\n        }\n\n        h2 {\n            max-width: 460px;\n        }\n\n        @media screen and (max-width: 40em) {\n            .subtitle {\n                font-size: 0.8rem;\n                max-width: 70%;\n                font-weight: 700;\n            }\n        }\n\n        a:hover{\n            color: #A7623A;\n        }\n    }\n\n    .md-buttons {\n        display: flex;\n        justify-content: center;\n        gap: 0.5rem;\n        flex-direction: row;\n        margin: 0 auto;\n        margin-top: 2rem;\n        flex-direction: row;\n        width: max-content;\n        font-weight: 700;\n    }\n\n    .md-team {\n        margin-top: 4rem;\n    }\n\n    /* New media query for smaller screens */\n    @media screen and (max-width: 40em) {\n        .md-buttons {\n            flex-direction: column;\n            /* Stack buttons vertically */\n            align-items: center;\n            /* Center-align the stacked buttons */\n        }\n\n        .md-buttons .md-button {\n            width: 100%;\n            /* Make buttons full-width */\n            max-width: 200px;\n            /* Limit maximum width for better appearance */\n        }\n    }\n\n    .md-button {\n        transition: background-color 0.3s ease-in-out; /* Smooth transition */\n        border-radius: 6px !important;\n        padding: 0.4rem 0.8rem !important;\n\n    }\n\n    .md-button:hover {\n        background-color: #A7623A !important; /* Desired hover background color */\n        border-color: #A7623A !important;\n    }\n</style>\n\n<section class=\"mdx-container\">\n    <div class=\"md-grid md-typeset\">\n        <div class=\"mdx-hero\">\n            <div class=\"mdx-hero__image\">\n                <img src=\"assets/images/logo-light-mode.svg#only-light\" alt=\"Outlines Logo\" width=\"600\" draggable=\"false\">\n                <img src=\"assets/images/logo-dark-mode.svg#only-dark\" alt=\"Outlines Logo\" width=\"600\" draggable=\"false\">\n            </div>\n            <div class=\"mdx-hero__content\">\n                <h2 class=\"subtitle\" style=\"font-weight: 500; padding-top: 1rem;\">\n                    Structured text generation and robust prompting for language models\n                </h2>\n                <div class=\"md-buttons\">\n                    <a href=\"{{ 'welcome/' | url }}\" title=\"Get started\" class=\"md-button md-button--primary\">\n                        Get started\n                    </a>\n                    <a href=\"https://discord.gg/ZxBxyWmW5n\" title=\"Join the Community\" class=\"md-button\">\n                        Join the community\n                    </a>\n                </div>\n                <div class=\"md-buttons\">\n                    <p>\n                        Follow us on <a href=\"https://twitter.com/dottxtai\" title=\"Follow us on X\" target=\"_blank\">\n                            X\n                        </a> and\n                        <a href=\"https://bsky.app/profile/dottxtai.bsky.social\" title=\"Follow us on Bluesky\" target=\"_blank\">\n                            Bluesky\n                        </a>\n                    </p>\n                </div>\n\n                <p class=\"md-team\">Made with ❤️ by the team at <a href=\"https://dottxt.co\">.txt</a></p>\n            </div>\n        </div>\n    </div>\n</section>\n{% endblock %}\n{% block content %}{% endblock %}\n{% block footer %}{% endblock %}\n"
  },
  {
    "path": "docs/overrides/main.html",
    "content": "{% extends \"base.html\" %}\n"
  },
  {
    "path": "docs/stylesheets/extra.css",
    "content": "@font-face {\n    font-family: \"Source Code Pro Custom\", monospace;\n    src: url(https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,200..900;1,200..900&display=swap);\n}\n\n/* Header/banner styling */\n.md-header {\n    background-color: #DFD1B6 !important;\n}\n\n:root > * {\n    /* Notion-like color palette */\n    --md-default-fg-color: #37352f;\n    --md-default-fg-color--light: #73706c;\n    --md-default-fg-color--lighter: #9b9a97;\n    --md-default-bg-color: #ffffff;\n    --md-default-bg-color--light: #f7f6f3;\n    --md-default-bg-color--lighter: #edece9;\n\n    /* Typography */\n    --md-text-font-family: \"Inter\", -apple-system, BlinkMacSystemFont, \"Segoe UI\", Helvetica, Arial, sans-serif;\n    --md-code-font: \"Source Code Pro\", Consolas, \"Liberation Mono\", Menlo, monospace;\n\n    /* Notion-like link colors */\n    --md-typeset-a-color: #37352f;\n    --md-accent-fg-color: #eb5757;\n\n    /* Background colors */\n    --md-code-bg-color: #f7f6f3;\n    --md-code-fg-color: #eb5757;\n}\n\n/* Code block styling */\n.highlight pre,\n.md-typeset pre code,\n.md-typeset .highlight pre,\n.md-typeset .highlighttable pre {\n    background-color: #2E3440 !important; /* Nord's darkest blue (nord0) */\n    border-radius: 4px !important; /* Subtle rounded corners like Notion */\n    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1), 0 1px 3px rgba(0, 0, 0, 0.08) !important; /* Subtle shadow */\n    border: none !important; /* No border for cleaner look */\n}\n\n/* Adjust padding from code content */\n.md-code__content {\n    padding: 1em 1.5em !important; /* Increased top/bottom padding */\n}\n\n/* Style only inline code (not code blocks) */\n.md-typeset :not(pre) > code {\n    background-color: rgba(135, 131, 120, 0.15); /* Notion's exact inline code background */\n    color: #E35A26; /* Orange color for inline code */\n    border-radius: 3px; /* Subtle rounded corners */\n    padding: 0.2em 0.4em; /* Notion-like padding */\n    font-weight: 500; /* Medium weight */\n    font-size: 0.85em; /* Slightly smaller than body text */\n    border: none;\n    font-family: \"SFMono-Regular\", Consolas, \"Liberation Mono\", Menlo, monospace;\n}\n\n/* Override code block container background */\n.md-typeset .highlight,\n.md-typeset .highlighttable {\n    background-color: #ffffff !important; /* Match page background */\n    border-radius: 4px !important; /* Match code block radius */\n    overflow: hidden; /* Ensure child elements respect borders */\n}\n\n/* Ensure proper spacing for the entire code block */\n.md-typeset pre {\n    margin: 1.5em 0 !important;\n}\n\n/* Style the copy button with Nord snow grey */\n.md-clipboard {\n    color: #D8DEE9 !important; /* Nord snow storm */\n    top: 0.75em !important; /* Lower the button more */\n    right: 0.5em !important; /* Add some spacing from right edge */\n}\n\n.md-clipboard:hover {\n    color: #ECEFF4 !important; /* Brighter snow storm on hover */\n}\n\n.md-clipboard:after {\n    color: #D8DEE9 !important;\n}\n\n/* Style scrollbars with Nord colors */\n.md-typeset pre::-webkit-scrollbar {\n    height: 0.4rem;\n    width: 0.4rem;\n}\n\n.md-typeset pre::-webkit-scrollbar-track {\n    background-color: #3B4252; /* Nord1 */\n}\n\n.md-typeset pre::-webkit-scrollbar-thumb {\n    background-color: #4C566A; /* Nord3 */\n    border-radius: 0.2rem;\n}\n\n.md-typeset pre::-webkit-scrollbar-thumb:hover {\n    background-color: #D8DEE9; /* Nord snow storm on hover */\n}\n\n/* Firefox scrollbar styling */\n.md-typeset pre {\n    scrollbar-width: thin;\n    scrollbar-color: #4C566A #3B4252;\n}\n\n/* Notion-like visual hierarchy */\n.md-typeset h1 {\n    font-weight: 600;\n    font-size: 2.5rem;\n    line-height: 1.2;\n    margin-top: 2.5rem;\n    margin-bottom: 0.75rem;\n    color: #37352f;\n    letter-spacing: -0.01em;\n}\n\n.md-typeset h2 {\n    font-weight: 600;\n    font-size: 1.875rem;\n    line-height: 1.3;\n    margin-top: 2.5rem;\n    margin-bottom: 0.75rem;\n    color: #37352f;\n    letter-spacing: -0.005em;\n    border: none;\n    padding: 0;\n}\n\n.md-typeset h3 {\n    font-weight: 500;\n    font-size: 1.5rem;\n    line-height: 1.3;\n    margin-top: 2rem;\n    margin-bottom: 0.75rem;\n    color: #37352f;\n    letter-spacing: -0.003em;\n}\n\n.md-typeset h4 {\n    font-weight: 500;\n    font-size: 1.25rem;\n    line-height: 1.3;\n    margin-top: 1.75rem;\n    margin-bottom: 0.5rem;\n    color: #37352f;\n}\n\n.md-typeset h5 {\n    font-weight: 500;\n    font-size: 1rem;\n    line-height: 1.4;\n    margin-top: 1.5rem;\n    margin-bottom: 0.5rem;\n    color: #37352f;\n}\n\n.md-typeset h6 {\n    font-weight: 500;\n    font-size: 0.875rem;\n    line-height: 1.4;\n    margin-top: 1.25rem;\n    margin-bottom: 0.5rem;\n    color: #37352f;\n    text-transform: none;\n    letter-spacing: normal;\n}\n\n/* Notion-like paragraph styling */\n.md-typeset p {\n    line-height: 1.5;\n    margin-bottom: 1em; /* More whitespace */\n    color: #37352f;\n    font-weight: 400;\n}\n\n/* Reduce font size for navigation */\n.md-nav__link {\n    font-size: 0.6875rem; /* 11px */\n}\n\n/* Reduce font size for TOC */\n.md-nav--secondary .md-nav__link {\n    font-size: 0.6875rem; /* 11px */\n}\n\n/* Reduce spacing between navigation items */\n.md-nav__item {\n    margin: 0;\n}\n\n.md-nav__link {\n    padding-top: 0;\n    padding-bottom: 0;\n    line-height: 1.2; /* Tighter line height */\n}\n\n/* Make navigation sections more compact */\n.md-nav__title {\n    line-height: 1.2;\n    padding: 0.2rem 0.5rem;\n    margin-bottom: 0.2rem;\n}\n\n/* Reduce TOC line height for compactness */\n.md-nav--secondary .md-nav__link {\n    line-height: 1.2;\n}\n\n/* Notion-like list styling */\n.md-typeset ul,\n.md-typeset ol {\n    margin-top: 0.25em; /* Reduced top margin to sit closer to text */\n    margin-bottom: 1em; /* Match paragraph spacing */\n    color: #37352f;\n}\n\n.md-typeset li {\n    line-height: 1.5;\n    margin-bottom: 0.15rem; /* Slightly more spacing between list items */\n    font-weight: 400;\n}\n\n/* Notion-style links */\n.md-typeset a {\n    color: #37352f;\n    text-decoration: underline;\n    text-decoration-color: rgba(55, 53, 47, 0.4);\n    text-underline-offset: 2px;\n    transition: text-decoration-color 0.1s ease;\n}\n\n.md-typeset a:hover {\n    text-decoration-color: rgba(55, 53, 47, 0.8);\n    background-color: rgba(55, 53, 47, 0.04);\n}\n\n/* Make important elements stand out */\n.md-typeset strong {\n    font-weight: 600;\n    color: #37352f;\n}\n\n/* Better spacing for code blocks in relation to text */\n.md-typeset pre {\n    margin: 1.5em 0 !important; /* More whitespace around code blocks */\n}\n\n/* Notion-style tables */\n.md-typeset table {\n    border-collapse: collapse;\n    margin: 1rem 0;\n}\n\n.md-typeset table th {\n    font-weight: 600;\n    background-color: #f7f6f3;\n    color: #37352f;\n    border: 1px solid #e1e0dd;\n    padding: 0.5rem 0.75rem;\n}\n\n.md-typeset table td {\n    border: 1px solid #e1e0dd;\n    padding: 0.5rem 0.75rem;\n}\n\n/* Notion-style blockquotes */\n.md-typeset blockquote {\n    border-left: 3px solid #37352f;\n    padding-left: 1rem;\n    margin: 1rem 0;\n    color: #37352f;\n    background: transparent;\n}\n\n/* Page styling */\n.md-content {\n    background-color: #ffffff;\n}\n\n.md-sidebar {\n    background-color: #fbfbfa;\n}\n\n/* Remove shadows for cleaner look */\n.md-header,\n.md-tabs {\n    box-shadow: none;\n    border-bottom: 1px solid #e1e0dd;\n}\n\n/* Admonition styling with custom palette */\n.md-typeset .admonition,\n.md-typeset details {\n    border-radius: 4px;\n    border: none;\n    box-shadow: none;\n    font-size: 0.6875rem; /* Very small font size - 11px */\n    padding: 0.75rem;\n    margin: 1rem 0;\n}\n\n/* Note/Info - Blue */\n.md-typeset .admonition.note,\n.md-typeset details.note,\n.md-typeset .admonition.info,\n.md-typeset details.info {\n    background-color: rgba(127, 154, 207, 0.1) !important;\n    border-left: 4px solid #7F9ACF !important;\n}\n\n.md-typeset .note > .admonition-title,\n.md-typeset .note > summary,\n.md-typeset .info > .admonition-title,\n.md-typeset .info > summary {\n    background-color: rgba(127, 154, 207, 0.2) !important;\n    border-left: none !important;\n}\n\n/* Additional specificity for info type and custom types that should be blue */\n.md-typeset .admonition.admonition-info,\n.md-typeset details.details-info,\n.md-typeset .admonition.installation,\n.md-typeset .admonition.example,\n.md-typeset .admonition.abstract,\n.md-typeset .admonition.summary,\n.md-typeset .admonition.tldr {\n    background-color: rgba(127, 154, 207, 0.1) !important;\n    border-left: 4px solid #7F9ACF !important;\n}\n\n/* Titles for custom blue admonitions */\n.md-typeset .installation > .admonition-title,\n.md-typeset .example > .admonition-title,\n.md-typeset .abstract > .admonition-title,\n.md-typeset .summary > .admonition-title,\n.md-typeset .tldr > .admonition-title {\n    background-color: rgba(127, 154, 207, 0.2) !important;\n    border-left: none !important;\n}\n\n/* Warning/Caution - Yellow */\n.md-typeset .admonition.warning,\n.md-typeset details.warning,\n.md-typeset .admonition.caution,\n.md-typeset details.caution {\n    background-color: rgba(189, 147, 47, 0.1);\n    border-left: 4px solid #BD932F;\n}\n\n.md-typeset .warning > .admonition-title,\n.md-typeset .warning > summary,\n.md-typeset .caution > .admonition-title,\n.md-typeset .caution > summary {\n    background-color: rgba(189, 147, 47, 0.2);\n    border-left: none;\n}\n\n/* Danger/Error - Orange */\n.md-typeset .admonition.danger,\n.md-typeset details.danger,\n.md-typeset .admonition.error,\n.md-typeset details.error {\n    background-color: rgba(227, 90, 38, 0.1);\n    border-left: 4px solid #E35A26;\n}\n\n.md-typeset .danger > .admonition-title,\n.md-typeset .danger > summary,\n.md-typeset .error > .admonition-title,\n.md-typeset .error > summary {\n    background-color: rgba(227, 90, 38, 0.2);\n    border-left: none;\n}\n\n/* Success/Tip/Hint - Green */\n.md-typeset .admonition.success,\n.md-typeset details.success,\n.md-typeset .admonition.tip,\n.md-typeset details.tip,\n.md-typeset .admonition.hint,\n.md-typeset details.hint {\n    background-color: rgba(166, 180, 163, 0.1);\n    border-left: 4px solid #A6B4A3;\n}\n\n.md-typeset .success > .admonition-title,\n.md-typeset .success > summary,\n.md-typeset .tip > .admonition-title,\n.md-typeset .tip > summary,\n.md-typeset .hint > .admonition-title,\n.md-typeset .hint > summary {\n    background-color: rgba(166, 180, 163, 0.2);\n    border-left: none;\n}\n\n/* General admonition title styling */\n.md-typeset .admonition-title,\n.md-typeset summary {\n    font-weight: 600;\n    font-size: 0.6875rem; /* Very small - 11px */\n    padding: 0.5rem 0.75rem;\n    margin: -0.75rem -0.75rem 0.5rem -0.75rem;\n    border-radius: 4px 4px 0 0;\n}\n\n/* Ensure consistent icon styling */\n.md-typeset .admonition > .admonition-title::before,\n.md-typeset details > summary::before {\n    font-size: 1rem;\n    margin-right: 0.5rem;\n}\n"
  },
  {
    "path": "environment.yml",
    "content": "# To use:\n#\n#   $ conda env create -f environment.yml  # `mamba` works too for this command\n#   $ conda activate dottxt-ai\n#\nname: dottxt-ai\nchannels:\n  - conda-forge\n  - huggingface\ndependencies:\n  - python==3.10.0\n  - jinja2\n  - numpy\n  - pydantic\n  - scipy\n  - pytest\n  - pre-commit\n  - referencing\n  - jsonschema\n  - transformers\n  - pip\n  - pip:\n    - -e \".[test]\"\n"
  },
  {
    "path": "examples/babyagi.py",
    "content": "\"\"\"This example is a simplified translation of BabyAGI.\n\nIt currently does not use the vector store retrieval\n\nThe original repo can be found at https://github.com/yoheinakajima/babyagi\n\"\"\"\n\nfrom collections import deque\nfrom typing import Deque, List\n\nfrom openai import OpenAI\n\nimport outlines\nfrom outlines import Template\n\n\nmodel = outlines.from_openai(OpenAI(), \"gpt-4o-mini\")\ncomplete = outlines.Generator(model)\n\n## Load the prompts\nperform_task_ppt = Template.from_file(\"prompts/babyagi_perform_task.txt\")\ncreate_tasks_ppt = Template.from_file(\"prompts/babyagi_create_task.txt\")\nprioritize_tasks_ppt = Template.from_file(\"prompts/babyagi_prioritize_task.txt\")\n\n\ndef create_tasks_fmt(result: str) -> List[str]:\n    new_tasks = result.split(\"\\n\")\n\n    task_list = []\n    for task in new_tasks:\n        parts = task.strip().split(\".\", 1)\n        if len(parts) == 2:\n            task_list.append(parts[1].strip())\n\n    return task_list\n\n\ndef prioritize_tasks_fmt(result: str):\n    new_tasks = result.split(\"\\n\")\n\n    task_list: Deque = deque([])\n    for task in new_tasks:\n        parts = task.strip().split(\".\", 1)\n        if len(parts) == 2:\n            task_id = int(parts[0].strip())\n            task_name = parts[1].strip()\n            task_list.append({\"task_id\": task_id, \"task_name\": task_name})\n\n    return task_list\n\n\nobjective = \"Becoming rich while doing nothing.\"\nfirst_task = {\n    \"task_id\": 1,\n    \"task_name\": \"Find a repeatable, low-maintainance, scalable business.\",\n}\nnext_task_id = 1\ntask_list = deque([first_task])\n\n\ndef one_cycle(objective: str, task_list, next_task_id: int):\n    \"\"\"One BabyAGI cycle.\n\n    It consists in executing the highest-priority task, creating some new tasks\n    given the result, and re-priotizing the tasks.\n\n    Parameters\n    ----------\n    objective\n        The overall objective of the session.\n    task_list\n        The current list of tasks to perform.\n    task_id_counter\n        The current task id.\n\n    \"\"\"\n\n    task = task_list.popleft()\n\n    prompt = perform_task_ppt(objective=objective, task=task)\n    result = complete(prompt)\n\n    prompt = create_tasks_ppt(\n        objective=objective,\n        task=first_task[\"task_name\"],\n        result=result,\n        previous_tasks=[first_task[\"task_name\"]],\n    )\n    new_tasks = complete(prompt)\n\n    new_tasks = create_tasks_fmt(new_tasks)\n\n    for task in new_tasks:\n        next_task_id += 1\n        task_list.append({\"task_id\": next_task_id, \"task_name\": task})\n\n    prompt = prioritize_tasks_ppt(\n        objective=objective,\n        tasks=[task[\"task_name\"] for task in task_list],\n        next_task_id=next_task_id,\n    )\n    prioritized_tasks = complete(prompt)\n\n    prioritized_tasks = prioritize_tasks_fmt(prioritized_tasks)\n\n    return task, result, prioritized_tasks, next_task_id\n\n\n# Let's run it for 5 cycles to see how it works without spending a fortune.\nfor _ in range(5):\n    print(\"\\033[95m\\033[1m\" + \"\\n*****TASK LIST*****\\n\" + \"\\033[0m\\033[0m\")\n    for t in task_list:\n        print(\" • \" + str(t[\"task_name\"]))\n\n    task, result, task_list, next_task_id = one_cycle(\n        objective, task_list, next_task_id\n    )\n\n    print(\"\\033[92m\\033[1m\" + \"\\n*****NEXT TASK*****\\n\" + \"\\033[0m\\033[0m\")\n    print(task)\n    print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n    print(result)\n"
  },
  {
    "path": "examples/beam-cloud/README.md",
    "content": "## Deploy Outlines on Beam\n\n1. Create an account [here](https://beam.cloud) and install the Beam SDK\n2. Download the `app.py` file to your computer\n3. Deploy it as a serverless API by running: `beam deploy app.py:predict`\n"
  },
  {
    "path": "examples/beam-cloud/app.py",
    "content": "from typing import Literal\n\nfrom beam import Image, endpoint, env\n\n\nif env.is_remote():\n    import outlines\n\n\n# Pre-load models when the container first starts\ndef load_models():\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    import outlines\n\n    model = outlines.models.from_transformers(\n        AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n        AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    )\n    return model\n\n\n@endpoint(\n    name=\"outlines-serverless\",\n    gpu=\"A10G\",\n    cpu=1,\n    memory=\"16Gi\",\n    on_start=load_models,\n    image=Image().add_python_packages(\n        [\"outlines\", \"torch\", \"transformers\", \"accelerate\"]\n    ),\n)\ndef predict(context, **inputs):\n    default_prompt = \"\"\"You are a sentiment-labelling assistant.\n    Is the following review positive or negative?\n\n    Review: This restaurant is just awesome!\n    \"\"\"\n\n    prompt = inputs.get(\"prompt\", default_prompt)\n\n    # Unpack cached model from context\n    model = context.on_start_value\n    # Inference\n    generator = outlines.Generator(model, Literal[\"Positive\", \"Negative\"])\n    answer = generator(prompt)\n    return {\"answer\": answer}\n"
  },
  {
    "path": "examples/bentoml/.bentoignore",
    "content": "__pycache__/\n*.py[cod]\n*$py.class\n.ipynb_checkpoints\nvenv/\n"
  },
  {
    "path": "examples/bentoml/bentofile.yaml",
    "content": "service: \"service:Outlines\"\nlabels:\n  owner: bentoml-team\n  stage: demo\ninclude:\n- \"*.py\"\npython:\n  requirements_txt: \"./requirements.txt\"\n  lock_packages: false\n"
  },
  {
    "path": "examples/bentoml/import_model.py",
    "content": "import bentoml\n\nMODEL_ID = \"mistralai/Mistral-7B-v0.1\"\nBENTO_MODEL_TAG = MODEL_ID.lower().replace(\"/\", \"--\")\n\n\ndef import_model(model_id, bento_model_tag):\n    import torch\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n\n    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)\n    model = AutoModelForCausalLM.from_pretrained(\n        MODEL_ID,\n        torch_dtype=torch.float16,\n        low_cpu_mem_usage=True,\n    )\n\n    with bentoml.models.create(bento_model_tag) as bento_model_ref:\n        tokenizer.save_pretrained(bento_model_ref.path)\n        model.save_pretrained(bento_model_ref.path)\n\n\nif __name__ == \"__main__\":\n    import_model(MODEL_ID, BENTO_MODEL_TAG)\n"
  },
  {
    "path": "examples/bentoml/requirements.txt",
    "content": "bentoml>=1.2.11\noutlines==0.0.37\ntransformers==4.38.2\ndatasets==2.18.0\naccelerate==0.27.2\n"
  },
  {
    "path": "examples/bentoml/service.py",
    "content": "import typing as t\n\nimport bentoml\nfrom import_model import BENTO_MODEL_TAG, MODEL_ID\n\nDEFAULT_SCHEMA = \"\"\"{\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\n            \"title\": \"Name\",\n            \"maxLength\": 10,\n            \"type\": \"string\"\n        },\n        \"age\": {\n            \"title\": \"Age\",\n            \"type\": \"integer\"\n        },\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\n            \"title\": \"Strength\",\n            \"type\": \"integer\"\n        }\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\"\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\"\n        }\n    }\n}\"\"\"\n\n\n@bentoml.service(\n    traffic={\n        \"timeout\": 300,\n    },\n    resources={\n        \"gpu\": 1,\n        \"gpu_type\": \"nvidia-l4\",\n    },\n)\nclass Outlines:\n    bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG)\n\n    def __init__(self) -> None:\n        import torch\n        from transformers import AutoModelForCausalLM, AutoTokenizer\n\n        import outlines\n\n        self.model = outlines.from_transformers(\n            AutoTokenizer.from_pretrained(MODEL_ID),\n            AutoModelForCausalLM.from_pretrained(\n                MODEL_ID,\n                torch_dtype=torch.float16,\n                low_cpu_mem_usage=True,\n            )\n        )\n\n    @bentoml.api\n    async def generate(\n        self,\n        prompt: str = \"Give me a character description.\",\n        json_schema: t.Optional[str] = DEFAULT_SCHEMA,\n    ) -> t.Dict[str, t.Any]:\n        import outlines\n\n        generator = outlines.Generator(self.model, outlines.json_schema(json_schema))\n        character = generator(prompt)\n\n        return character\n"
  },
  {
    "path": "examples/cerebrium/cerebrium.toml",
    "content": "[cerebrium.deployment]\nname = \"cerebrium\"\npython_version = \"3.11\"\ncuda_version = \"12\"\ninclude = \"[./*, main.py, cerebrium.toml]\"\nexclude = \"[.*]\"\nshell_commands = []\n\n[cerebrium.hardware]\ncpu = 2\nmemory = 14.0\ngpu = \"AMPERE A10\"\ngpu_count = 1\nprovider = \"aws\"\nregion = \"us-east-1\"\n\n[cerebrium.scaling]\nmin_replicas = 0\nmax_replicas = 5\ncooldown = 60\n\n[cerebrium.dependencies.pip]\noutline = \"==0.0.37\"\ntransformers = \"==4.38.2\"\ndatasets = \"==2.18.0\"\naccelerate = \"==0.27.2\"\n"
  },
  {
    "path": "examples/cerebrium/main.py",
    "content": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nimport outlines\n\n\nmodel = outlines.from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"mistralai/Mistral-7B-Instruct-v0.2\"),\n    AutoTokenizer.from_pretrained(\"mistralai/Mistral-7B-Instruct-v0.2\"),\n)\n\n\nschema = {\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\"title\": \"Name\", \"maxLength\": 10, \"type\": \"string\"},\n        \"age\": {\"title\": \"Age\", \"type\": \"integer\"},\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\"title\": \"Strength\", \"type\": \"integer\"},\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\",\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\",\n        },\n    },\n}\n\n\ndef generate(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n    character = model(\n        f\"<s>[INST]Give me a character description. Describe {prompt}.[/INST]\",\n        outlines.json_schema(schema),\n    )\n\n    print(character)\n    return character\n"
  },
  {
    "path": "examples/dating_profile.py",
    "content": "from dataclasses import dataclass\nfrom enum import Enum\n\nimport torch\nimport transformers\nfrom pydantic import BaseModel, conlist\n\nimport outlines\nfrom outlines import Template\n\n\nclass QuestionChoice(str, Enum):\n    A = \"The key to my heart is\"\n    B = \"The first item on my bucket list is\"\n    C = \"Perks of dating me\"\n    D = \"Message me if you also love\"\n    E = \"People would describe me as\"\n    F = \"I can beat you in a game of\"\n\n\n@dataclass\nclass QuestionAnswer:\n    question: QuestionChoice\n    answer: str\n\n\nclass DatingProfile(BaseModel):\n    # It is possible put length constraints on these strings using constr- however, this appears to dramatically increase the generation time\n    # This may be resolved in the future with this PR: https://github.com/dottxt-ai/outlines/pull/272\n    bio: str\n    job: str\n    # Ignore mypy checks here because it still doesn't support conlist or constr: https://github.com/pydantic/pydantic/issues/975\n    interests: conlist(str, min_length=1, max_length=5)  # type: ignore\n    qna1: QuestionAnswer\n    qna2: QuestionAnswer\n\n\n@dataclass\nclass Example:\n    description: str\n    profile: DatingProfile\n\n\nsamples: list[Example] = [\n    Example(\n        description=\"I'm an author and former professional soccer player living in Seattle who publishes popular fiction books. A typical day for me starts by hanging out with my cat, drinking a coffee, and reading as much as I can in a few hours. Then, I'll prepare a quick smoothie before starting to write for a few hours, take a break with soccer or running a few miles, and finally meet friends for dinner at a new, hip restaurant in the evening. Sometimes we go axe-throwing afterwards, or play poker, or watch a comedy show, or visit a dive bar. On my vacations, I travel extensively to countries South America, Europe, and Asia, with the goal of visiting them all!\",\n        profile=DatingProfile(\n            bio=\"Adventurer, dreamer, author, and soccer enthusiast. Life’s too short to waste time so I make the most of each day by exploring new places and playing with my friends on the pitch. What’s your favorite way to get out and have fun?\",\n            job=\"Famous Soccer Player -> Famous Author\",\n            interests=[\"Soccer\", \"Travel\", \"Friends\", \"Books\", \"Fluffy Animals\"],\n            qna1=QuestionAnswer(\n                question=QuestionChoice.B, answer=\"swim in all seven oceans!\"\n            ),\n            qna2=QuestionAnswer(\n                question=QuestionChoice.E,\n                answer=\"fun-loving, adventurous, and a little bit crazy\",\n            ),\n        ),\n    ),\n    Example(\n        description=\"I run my company and build houses for a living. I'm a big fan of the outdoors and love to go hiking, camping, and fishing. I don't like video games, but do like to watch movies. My love language is home-cooked food, and I'm looking for someone who isn't afraid to get their hands dirty.\",\n        profile=DatingProfile(\n            bio=\"If you're looking for a Montana man who loves to get outdoors and hunt, and who's in-tune with his masculinity then I'm your guy!\",\n            job=\"House Construction Manager / Entrepreneur\",\n            interests=[\"Hunting\", \"Hiking\", \"The outdoors\", \"Home-cooked food\"],\n            qna1=QuestionAnswer(question=QuestionChoice.A, answer=\"food made at home\"),\n            qna2=QuestionAnswer(\n                question=QuestionChoice.C,\n                answer=\"having a man in your life who can fix anything\",\n            ),\n        ),\n    ),\n    Example(\n        description=\"I run my own Youtube channel with 10M subscribers. I love working with kids, and my audience skews pretty young too. In my free time, I play Fortnite and Roblox. I'm looking for someone who is also a gamer and likes to have fun. I'm learning Japanese in my free time as well as how to cook.\",\n        profile=DatingProfile(\n            bio=\"Easy on the eyes (find me on Youtube!) and great with kids. What more do you need?\",\n            job=\"Youtuber 10M+ subscribers\",\n            interests=[\"Kids\", \"Gaming\", \"Japanese\"],\n            qna1=QuestionAnswer(question=QuestionChoice.D, answer=\"anime and gaming!\"),\n            qna2=QuestionAnswer(question=QuestionChoice.F, answer=\"Fortnite, gg ez\"),\n        ),\n    ),\n]\n\n\n# Below requires ~13GB of GPU memory\n# https://huggingface.co/mosaicml/mpt-7b-8k-instruct\n# Motivation: Reasonably large model that fits on a single GPU and has been fine-tuned for a larger context window\nmodel_name = \"mosaicml/mpt-7b-8k-instruct\"\nmodel = outlines.from_transformers(\n    transformers.AutoModelForCausalLM.from_pretrained(model_name),\n    transformers.AutoTokenizer.from_pretrained(model_name),\n)\n\nnew_description = \"I'm a laid-back lawyer who spends a lot of his free-time gaming. I work in a corporate office, but ended up here after the start-up I cofounded got acquired, so still play ping pong with my cool coworkers every day. I have a bar at home where I make cocktails, which is great for entertaining friends. I secretly like to wear suits and get a new one tailored every few months. I also like weddings because I get to wear those suits, and it's a good excuse for a date. I watch the latest series because I'm paying, with my hard-earned money, for every streaming service.\"\n\ndating_profile_prompt = Template.from_file(\"prompts/dating_profile.txt\")\nprompt = dating_profile_prompt(description=new_description, examples=samples)\nprofile = model(prompt, outlines.json_schema(DatingProfile), max_tokens=500)  # type: ignore\nprint(profile)\n\n# Sample generated profiles\n\"\"\"\n{\n    \"bio\": \"I'm an ambitious lawyer with a casual and fashionable style. I love games and sports, but my true passion is preparing refreshing cocktails at home and dressing to the nines at weddings. I'm currently looking for a woman to show a good time to and get a kiss on the opulent suit I just had made. Send resumÃ € to this inbox.\",\n    \"job\": \"Lawyer\",\n    \"interests\":\n    [\n        \"Stylish guys\",\n        \"Gaming\",\n        \"Ping pong\",\n        \"Cocktails\",\n        \"Weddings\"\n    ],\n    \"qna1\":\n    {\n        \"question\": \"The first item on my bucket list is\",\n        \"answer\": \"be married and have a family.\"\n    },\n    \"qna2\":\n    {\n        \"question\": \"People would describe me as\",\n        \"answer\": \"charming, stylish, and funny.\"\n    }\n}\n\"\"\"\n\n\"\"\"\n{\n    \"bio\": \"I’m a sexy lawyer with time on my hands. I love to game and play ping pong, but the real reason you should swipe to the right is because I look great in a suit. Who doesn’t love a man in a suit? Just saying. Send me a message if you think it’s time to take your dating life to the next level.\",\n    \"job\": \"Lawyer\",\n    \"interests\":\n    [\n        \"Gaming\",\n        \"Ping Pong\",\n        \"Tailored Suits\",\n        \"Weddings\",\n        \"Streaming Services\"\n    ],\n    \"qna1\":\n    {\n        \"question\": \"The first item on my bucket list is\",\n        \"answer\": \"simulate space but stay alive for as long as possible\"\n    },\n    \"qna2\":\n    {\n        \"question\": \"People would describe me as\",\n        \"answer\": \"easy-going, a little nerdy but with a mature essence\"\n    }\n}\n\"\"\"\n"
  },
  {
    "path": "examples/llamacpp_example.py",
    "content": "from enum import Enum\n\nfrom pydantic import BaseModel, constr\nfrom llama_cpp import Llama\n\nimport outlines\n\n\nclass Weapon(str, Enum):\n    sword = \"sword\"\n    axe = \"axe\"\n    mace = \"mace\"\n    spear = \"spear\"\n    bow = \"bow\"\n    crossbow = \"crossbow\"\n\n\nclass Armor(str, Enum):\n    leather = \"leather\"\n    chainmail = \"chainmail\"\n    plate = \"plate\"\n\n\nclass Character(BaseModel):\n    name: constr(max_length=10)\n    age: int\n    armor: Armor\n    weapon: Weapon\n    strength: int\n\n\nif __name__ == \"__main__\":\n    # curl -L -o mistral-7b-instruct-v0.2.Q5_K_M.gguf https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf\n    model = outlines.from_llamacpp(Llama(\"./mistral-7b-instruct-v0.2.Q5_K_M.gguf\"))\n\n    # Construct structured sequence generator\n    generator = outlines.Generator(model, Character)\n\n    # Draw a sample\n    seed = 789005\n\n    prompt = \"Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\\nOutput:\"\n\n    sequence = generator(prompt, seed=seed, max_tokens=512)\n    print(sequence)\n"
  },
  {
    "path": "examples/llamacpp_processor.py",
    "content": "from enum import Enum\n\nfrom llama_cpp import Llama, LogitsProcessorList\nfrom pydantic import BaseModel, constr\n\nfrom outlines.processors import JSONLogitsProcessor\nfrom outlines.models.llamacpp import LlamaCppTokenizer\n\n\nclass Weapon(str, Enum):\n    sword = \"sword\"\n    axe = \"axe\"\n    mace = \"mace\"\n    spear = \"spear\"\n    bow = \"bow\"\n    crossbow = \"crossbow\"\n\n\nclass Armor(str, Enum):\n    leather = \"leather\"\n    chainmail = \"chainmail\"\n    plate = \"plate\"\n\n\nclass Character(BaseModel):\n    name: constr(max_length=10)\n    age: int\n    armor: Armor\n    weapon: Weapon\n    strength: int\n\n\nif __name__ == \"__main__\":\n    llama = Llama(\"./phi-2.Q4_K_M.gguf\")\n    tokenizer = LlamaCppTokenizer(llama)\n\n    prompt = \"Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\\nOutput:\"\n\n    logits_processor = JSONLogitsProcessor(Character, tokenizer, tensor_library_name=\"numpy\")\n\n    json_str = llama.create_completion(\n        prompt,\n        top_k=40,\n        top_p=0.95,\n        temperature=0.7,\n        max_tokens=100,\n        logits_processor=LogitsProcessorList([logits_processor]),\n    )[\"choices\"][0][\"text\"]\n\n    print(json_str)\n"
  },
  {
    "path": "examples/math_generate_code.py",
    "content": "\"\"\"Example from https://dust.tt/spolu/a/d12ac33169\"\"\"\n\nimport openai\n\nimport outlines\nfrom outlines import Template\n\n\nexamples = [\n    {\"question\": \"What is 37593 * 67?\", \"code\": \"37593 * 67\"},\n    {\n        \"question\": \"Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?\",\n        \"code\": \"(16-3-4)*2\",\n    },\n    {\n        \"question\": \"A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?\",\n        \"code\": \" 2 + 2/2\",\n    },\n]\n\nquestion = \"Carla is downloading a 200 GB file. She can download 2 GB/minute, but 40% of the way through the download, the download fails. Then Carla has to restart the download from the beginning. How load did it take her to download the file in minutes?\"\n\nanswer_with_code_prompt = Template.from_string(\n    \"\"\"\n    {% for example in examples %}\n    QUESTION: {{example.question}}\n    CODE: {{example.code}}\n\n    {% endfor %}\n    QUESTION: {{question}}\n    CODE:\"\"\"\n)\n\n\ndef execute_code(code):\n    result = eval(code)\n    return result\n\n\nprompt = answer_with_code_prompt(question=question, examples=examples)\nmodel = outlines.from_openai(openai.OpenAI(), \"gpt-4o-mini\")\nanswer = model(prompt)\nresult = execute_code(answer)\nprint(f\"It takes Carla {result:.0f} minutes to download the file.\")\n"
  },
  {
    "path": "examples/meta_prompting.py",
    "content": "\"\"\"Meta-prompting examples.\n\nReferences\n----------\n\n.. [0] \"Prompting is programming: A Query Language for Large Language Models\"\n       https://arxiv.org/abs/2212.06094\n.. [1] \"Prompt programming For Large Language Models: Beyond the Few-Shot Paradigm\"\n       https://arxiv.org/abs/2102.07350.\n\n\"\"\"\n\nimport argparse\n\nimport openai\n\nimport outlines\nfrom outlines import Template\n\n\nclient = openai.OpenAI()\n\n\ndef split_into_steps(question, model_name: str):\n    solve = Template.from_string(\n        \"\"\"{{question}}\n        Rephrase : : as a true or false statement, identify an Object, relationship and subject\n        \"\"\"\n    )\n\n    model = outlines.from_openai(client, model_name)\n\n    prompt = solve(question=question)\n    answer = model(prompt, max_tokens=500)\n    prompt += (\n        answer\n        + \"\\n what is the only option that displays the same type of relationship as : :?\"\n    )\n    answer = model(prompt, max_tokens=500)\n    completed = prompt + answer\n\n    return completed\n\n\ndef fill_in_the_blanks(question, model_name: str):\n    determine_goal = Template.from_string(\n        \"\"\"{{question}}\n\n        In order to solve this problem, we will analyze each of the options and determine\n        \"\"\"\n    )\n\n    solve = Template.from_string(\"\"\"{{memory}}. Let's begin.\"\"\")\n\n    model = outlines.from_openai(client, model_name)\n\n    prompt = determine_goal(question=question)\n    answer = model(prompt, stop=[\".\"])\n    prompt = solve(memory=prompt + answer)\n    answer = model(prompt, max_tokens=500)\n    completed = prompt + answer\n\n    return completed\n\n\ndef ask_an_expert(question, model_name: str):\n    find_expert = Template.from_string(\n        \"\"\"\n        {{question}}\n        I entered my question into the Expert Generator \\\n        and waited. The Expert Generator will render a \\\n        simulation of an expert to answer my question. \\\n        The expert could be anyone, dead or alive, real \\\n        or fictional; the machine will find the person \\\n        most qualified to answer the question. For this \\\n        question in particular, the expert must be someone \\\n        who has thought a lot about the problem of \\\n        artificial intelligence and its alignment. \\\n        The Expert Generator beeped, indicating that it has \\\n        found the most qualified expert. The name displayed \\\n        on the screen: \"\n        \"\"\"\n    )\n\n    get_answer = Template.from_string(\n        \"\"\"\n        {{memory}}\".\n        I am ready to ask my question.\n        \"{{expert}}\" I say,\n        {{question}}\n        \"\"\"\n    )\n\n    model = outlines.from_openai(client, model_name)\n\n    prompt = find_expert(question=question)\n    expert = model(prompt, stop=['\"'])\n    prompt = get_answer(question=question, expert=expert, memory=prompt+expert)\n    answer = model(prompt, max_tokens=500)\n    completed = prompt + answer\n\n    return completed\n\n\ndef ask_an_expert_simple(question, model_name: str):\n    find_expert = Template.from_string(\n        \"\"\"\n        Q: {{question}}\n        A: A good person to answer this question would be\n        \"\"\"\n    )\n\n    get_answer = Template.from_string(\n        \"\"\"\n        {{memory}}.\n\n        For instance, {{expert}} would answer\n        \"\"\"\n    )\n\n    model = outlines.from_openai(client, model_name)\n\n    prompt = find_expert(question=question)\n    expert = model(prompt, stop=[\"\\n\", \".\"])\n    prompt = get_answer(expert=expert, memory=prompt+expert)\n    answer = model(prompt, max_tokens=500)\n    completed = prompt + answer\n\n    return completed\n\n\ndef run_example(model_fn, question, model_name):\n    completed = model_fn(question, model_name)\n    print(\"\\n-----------------------\")\n    print(f\"{completed}\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Run the Meta Prompting examples\")\n    parser.add_argument(\n        \"--model\",\n        type=str,\n        default=\"gpt-4o-mini\",\n        help=\"The Large Language Model to use to run the examples.\",\n    )\n    args = parser.parse_args()\n\n    math_q = \"f(x) = x*x. What is f(f(3))?\"\n    sat_q = \"\"\"\n\nBRAGGART :: MODESTY\nA) FLEDGLING : EXPERIENCE\nB) EMBEZZLER : GREED\nC) WALLFLOWER : TIMIDITY\nD) INVALID : MALADY\nE) CANDIDATE : AMBITION\n\n    \"\"\"\n    alignment_q = \"What should humankind do to ensure that artificial general intelligence is aligned?\"\n    meaning_q = \"What is the meaning of life?\"\n\n    run_example(split_into_steps, math_q, args.model)\n    run_example(\n        split_into_steps, sat_q.lower(), args.model\n    )  # gpt>3.5 usually gets this one right\n    run_example(fill_in_the_blanks, sat_q, args.model)\n    run_example(ask_an_expert, alignment_q, args.model)\n    run_example(ask_an_expert_simple, meaning_q, args.model)\n"
  },
  {
    "path": "examples/modal_example.py",
    "content": "import modal\n\napp = modal.App(name=\"outlines-app\")\n\n\noutlines_image = modal.Image.debian_slim(python_version=\"3.11\").pip_install(\n    \"outlines==1.0.0\",\n    \"transformers==4.38.2\",\n    \"datasets==2.18.0\",\n    \"accelerate==0.27.2\",\n)\n\n\ndef import_model():\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n\n    model_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n    _ = AutoTokenizer.from_pretrained(model_id)\n    _ = AutoModelForCausalLM.from_pretrained(model_id)\n\n\noutlines_image = outlines_image.run_function(import_model)\n\n\nschema = \"\"\"{\n    \"title\": \"Character\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"name\": {\n            \"title\": \"Name\",\n            \"maxLength\": 10,\n            \"type\": \"string\"\n        },\n        \"age\": {\n            \"title\": \"Age\",\n            \"type\": \"integer\"\n        },\n        \"armor\": {\"$ref\": \"#/definitions/Armor\"},\n        \"weapon\": {\"$ref\": \"#/definitions/Weapon\"},\n        \"strength\": {\n            \"title\": \"Strength\",\n            \"type\": \"integer\"\n        }\n    },\n    \"required\": [\"name\", \"age\", \"armor\", \"weapon\", \"strength\"],\n    \"definitions\": {\n        \"Armor\": {\n            \"title\": \"Armor\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"leather\", \"chainmail\", \"plate\"],\n            \"type\": \"string\"\n        },\n        \"Weapon\": {\n            \"title\": \"Weapon\",\n            \"description\": \"An enumeration.\",\n            \"enum\": [\"sword\", \"axe\", \"mace\", \"spear\", \"bow\", \"crossbow\"],\n            \"type\": \"string\"\n        }\n    }\n}\"\"\"\n\n\n@app.function(image=outlines_image, gpu=\"A100-40GB\")\ndef generate(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n    import outlines\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n\n    model_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n    model = outlines.from_transformers(\n        tokenizer=AutoTokenizer.from_pretrained(model_id),\n        model=AutoModelForCausalLM.from_pretrained(model_id, device=\"cuda\"),\n    )\n\n    character = model(\n        f\"<s>[INST]Give me a character description. Describe {prompt}.[/INST]\",\n        outlines.json_schema(schema),\n    )\n\n    print(character)\n\n\n@app.local_entrypoint()\ndef main(\n    prompt: str = \"Amiri, a 53 year old warrior woman with a sword and leather armor.\",\n):\n    generate.remote(prompt)\n"
  },
  {
    "path": "examples/pick_odd_one_out.py",
    "content": "\"\"\"Chain-of-thought prompting for Odd one out classification.\n\nExample taken from the LQML library [1]_.\n\nReferences\n----------\n.. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022).\n       Prompting Is Programming: A Query Language For Large Language Models.\n       arXiv preprint arXiv:2212.06094.\n\n\"\"\"\n\nimport json\n\nimport openai\n\nimport outlines\nfrom outlines import Generator\nfrom outlines.types import JsonSchema\n\n\nbuild_ooo_prompt = outlines.Template.from_file(\"prompts/pick_odd_one_out.txt\")\n\noptions = [\"sea\", \"mountains\", \"plains\", \"sock\"]\noptions_schema = JsonSchema({\n    \"type\": \"object\",\n    \"properties\": {\n        \"result\": {\n            \"type\": \"string\",\n            \"enum\": options\n        }\n    },\n    \"required\": [\"result\"]\n})\n\nmodel = outlines.from_openai(openai.OpenAI(), \"gpt-4o-mini\")\ngen_text = Generator(model)\ngen_choice = Generator(model, options_schema)\n\nprompt = build_ooo_prompt(options=options)\nreasoning = gen_text(prompt, stop=[\"Pick the odd word\", \"So the odd one\"])\nprompt += reasoning\nraw_result = gen_choice(prompt)\nresult = json.loads(raw_result)[\"result\"]\nprompt += result\nprint(result)\n"
  },
  {
    "path": "examples/prompts/babyagi_create_task.txt",
    "content": "Objective: {{ objective }}\nCurrent Task: {{ task }}\nResult: {{ result }}\nPrevious Tasks: {{ previous_tasks }}\n\nBased on the result, create a list of new tasks that will help achieve the objective.\nPlease provide the tasks in the following format:\n1. [Task description]\n2. [Task description]\n"
  },
  {
    "path": "examples/prompts/babyagi_perform_task.txt",
    "content": "Objective: {{ objective }}\nTask: {{ task }}\n\nPlease perform the task and provide a concise result in the following format:\nResult: [Your concise result here]\n"
  },
  {
    "path": "examples/prompts/babyagi_prioritize_task.txt",
    "content": "Tasks: {{ tasks }}\nNext Task ID: {{ next_task_id }}\n\nPlease prioritize the tasks based on their importance and urgency to achieve the objective.\nProvide the prioritized tasks in the following format:\n1. [Task ID]. [Task description]\n2. [Task ID]. [Task description]\n"
  },
  {
    "path": "examples/prompts/dating_profile.txt",
    "content": "You are a world-renowned matchmaker who understands the modern dating market. Your job is to generate dating app profiles for male clients interested in women based on a provided description. The profiles should be authentic, show off their strengths, and maximize their likelihood of getting matches on dating apps.\nHere are some examples of past clients that you have successfully created profiles for:\n{% for example in examples %}\nDescription:\n{{ example.description }}\nProfile:\n{{ example.profile }}\n{% endfor %}\nHere is the new client who you need to create a profile for:\nDescription: {{ description }}\nProfile:\n"
  },
  {
    "path": "examples/prompts/pick_odd_one_out.txt",
    "content": "Pick the odd word out: skirt, dress, pen, jacket.\nskirt is clothing, dress is clothing, pen is an object, jacket is clothing.\nSo the odd one is pen.\n\nPick the odd word out: Spain, France, German, England, Singapore.\nSpain is a country, France is a country, German is a language, ...\nSo the odd one is German.\n\nPick the odd word out: {{ options | join(\", \") }}.\n"
  },
  {
    "path": "examples/prompts/self_consistency.txt",
    "content": "{% for example in examples %}\nQ: {{ example.question }}\nA: {{ example.answer }}\n{% endfor %}\nQ: {{ question }}\nA:\n"
  },
  {
    "path": "examples/react.py",
    "content": "\"\"\"ReAct\n\nThis example was inspired by the LQML library [1]_. The ReAct framework was\nfirst developed in [2]_ and augments Chain-of-Thought prompting with the ability\nfor the model to query external sources.\n\nReferences\n----------\n.. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). Prompting Is Programming: A Query Language For Large Language Models. arXiv preprint arXiv:2212.06094.\n.. [2] Yao, S., Zhao, J., Yu, D., Du, N., Shafran, I., Narasimhan, K., & Cao, Y. (2022). React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629.\n\n\"\"\"\n\nimport json\n\nimport requests  # type: ignore\nfrom openai import OpenAI\n\nimport outlines\nfrom outlines import Generator, Template\nfrom outlines.types import JsonSchema\n\n\nbuild_reAct_prompt = Template.from_string(\n    \"\"\"What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\nTho 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ...\nAct 2: Search 'Colorado orogeny'\nObs 2: The Colorado orogeny was an episode of mountain building (an orogeny) ...\nTho 3: It does not mention the eastern sector. So I need to look up eastern sector.\n...\nTho 4: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\nAct 5: Finish '1,800 to 7,000 ft'\n{{ question }}\n\"\"\"\n)\n\n\nadd_mode = Template.from_string(\n    \"\"\"{{ prompt }}\n{{ mode }} {{ i }}: {{ result }}\n\"\"\"\n)\n\n\ndef search_wikipedia(query: str):\n    url = f\"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={query}&origin=*\"\n    response = requests.get(url)\n    page = response.json()[\"query\"][\"pages\"]\n    return \".\".join(list(page.values())[0][\"extract\"].split(\".\")[:2])\n\n\nprompt = build_reAct_prompt(question=\"Where is Apple Computers headquarted? \")\nmodel = outlines.from_openai(OpenAI(), \"gpt-4o-mini\")\n\n# Define JSON schemas for mode and action\nmode_schema = JsonSchema({\n    \"type\": \"object\",\n    \"properties\": {\n        \"result\": {\n            \"type\": \"string\",\n            \"enum\": [\"Tho\", \"Act\"]\n        }\n    },\n    \"required\": [\"result\"]\n})\naction_schema = JsonSchema({\n    \"type\": \"object\",\n    \"properties\": {\n        \"result\": {\n            \"type\": \"string\",\n            \"enum\": [\"Search\", \"Finish\"]\n        }\n    },\n    \"required\": [\"result\"]\n})\n\nmode_generator = Generator(model, mode_schema)\naction_generator = Generator(model, action_schema)\ntext_generator = Generator(model)\n\nfor i in range(1, 10):\n    mode_output = mode_generator(prompt, max_tokens=128)\n    mode = json.loads(mode_output)[\"result\"]  # Extract the result from the JSON output\n    prompt = add_mode(i=i, mode=mode, result=\"\", prompt=prompt)\n\n    if mode == \"Tho\":\n        thought = text_generator(prompt, stop=\"\\n\", max_tokens=128)\n        prompt += f\"{thought}\"\n    elif mode == \"Act\":\n        action_output = action_generator(prompt, max_tokens=128)\n        action = json.loads(action_output)[\"result\"]  # Extract the result from the JSON output\n        prompt += f\"{action} '\"\n\n        subject = text_generator(prompt, stop=[\"'\"], max_tokens=128)\n        # Apple Computers headquartered\n        subject = \" \".join(subject.split()[:2])\n        prompt += f\"{subject}'\"\n\n        if action == \"Search\":\n            result = search_wikipedia(subject)\n            prompt = add_mode(i=i, mode=\"Obs\", result=result, prompt=prompt)\n        else:\n            break\n\nprint(prompt)\n"
  },
  {
    "path": "examples/sampling.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"id\": \"62129e1a-e9de-454e-a714-35ccbcf0b518\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"#OK\\n\",\n    \"import functools as ft\\n\",\n    \"import re\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"import matplotlib.pylab as plt\\n\",\n    \"import openai\\n\",\n    \"\\n\",\n    \"import outlines\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 13,\n   \"id\": \"b20aafe8-b7a3-4df4-878f-b48b74e131df\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"env: OPENAI_API_KEY=# you key here\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%env OPENAI_API_KEY= # you key here\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"2a3514d6-d5d7-46e9-9b69-1251d337e094\",\n   \"metadata\": {},\n   \"source\": [\n    \"In this example we will look at completion results for questions similar to those in the GSM8K dataset, using few-shots prompts with 5 examples. We first use `outlines.Template` to build the few-shot prompt. Outlines uses the Jinja2 templating engine to render the object when the function is called with the variables' values; it thus allows you to build complex prompts very easily.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"id\": \"ffe8bb11-6b51-4fe7-bfb3-c62556a60db8\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"examples = [\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\\\",\\n\",\n    \"    },\\n\",\n    \"    {\\n\",\n    \"        \\\"question\\\": \\\"Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\\\",\\n\",\n    \"        \\\"answer\\\": \\\"She bought 5 bagels for $3 each. This means she spent 5\\\",\\n\",\n    \"    },\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"few_shot_prompt = outlines.Template.from_string(\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    {% for example in examples %}\\n\",\n    \"    Q: {{ example.question }}\\n\",\n    \"    A: {{ example.answer }}\\n\",\n    \"    {% endfor %}\\n\",\n    \"    Q: {{ question }}\\n\",\n    \"    A:\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \")\\n\",\n    \"\\n\",\n    \"# Template instances can be partially evaluated because they are callable objects\\n\",\n    \"gsm8k_prompt = ft.partial(few_shot_prompt, examples=examples)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"1eae0ec8-89f0-43fc-b055-6fcd64cbc03b\",\n   \"metadata\": {},\n   \"source\": [\n    \"## When `gpt-4o-mini` is uncertain\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"a273ed78-e813-467e-85f3-16d7f283ba87\",\n   \"metadata\": {},\n   \"source\": [\n    \"Let us now sample 20 completions with the `gpt-4o-mini` model. Outlines is sampling first, and allows to draw several samples with both OpenAI and `transformers` models easily:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"id\": \"beff960d-6833-4f24-af09-5b65886a9549\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model = outlines.from_openai(openai.OpenAI(), \\\"gpt-4o\\\")\\n\",\n    \"\\n\",\n    \"question = \\\"When I was 6, my sister was half the age of my brother. When I was 14, my sister was 3 years younger than my brother. Now I'm 70, how old is my sister now?\\\"\\n\",\n    \"prompt = gsm8k_prompt(question=question)\\n\",\n    \"answers = model(prompt, n=20, max_tokens=512)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"1a895b6d-d4d4-40f9-9156-24ba7e21cc08\",\n   \"metadata\": {},\n   \"source\": [\n    \"The correct answer to this question is 67. Let us now count the different answers, and take a look at their distribution. Let us first define a few utility functions:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"id\": \"f1c83d1f-a478-4509-890e-b84a2e0d8846\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def count_digits(answers):\\n\",\n    \"    digits = []\\n\",\n    \"    for answer in answers:\\n\",\n    \"        try:\\n\",\n    \"            match = re.findall(r\\\"\\\\d+\\\", answer)[-1]\\n\",\n    \"            if match is not None:\\n\",\n    \"                digit = int(match)\\n\",\n    \"                digits.append(digit)\\n\",\n    \"        except AttributeError:\\n\",\n    \"            print(f\\\"Could not parse the completion: '{answer}'\\\")\\n\",\n    \"\\n\",\n    \"    unique_digits, counts = np.unique(digits, return_counts=True)\\n\",\n    \"    return {d: c for d, c in zip(unique_digits, counts)}\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"def plot_counts(counts):\\n\",\n    \"    fig = plt.figure(figsize=(12, 8))\\n\",\n    \"    ax = fig.add_subplot(111)\\n\",\n    \"\\n\",\n    \"    bar = ax.bar(counts.keys(), counts.values())\\n\",\n    \"    ax.spines[[\\\"right\\\", \\\"top\\\", \\\"left\\\"]].set_visible(False)\\n\",\n    \"    ax.get_yaxis().set_visible(False)\\n\",\n    \"    ax.get_yaxis().set_visible(False)\\n\",\n    \"\\n\",\n    \"    for rect in bar:\\n\",\n    \"        height = rect.get_height()\\n\",\n    \"        plt.text(\\n\",\n    \"            rect.get_x() + rect.get_width() / 2.0,\\n\",\n    \"            height,\\n\",\n    \"            f\\\"{height:.0f}\\\",\\n\",\n    \"            ha=\\\"center\\\",\\n\",\n    \"            va=\\\"bottom\\\",\\n\",\n    \"            fontsize=20,\\n\",\n    \"        )\\n\",\n    \"\\n\",\n    \"    ax.set_xticks(list(counts.keys()))\\n\",\n    \"    ax.set_xlabel(\\\"Answer\\\")\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"def entropy(counts):\\n\",\n    \"    counts = np.array(list(counts.values()))\\n\",\n    \"    probs = counts / np.sum(counts)\\n\",\n    \"    log_probs = np.log(probs)\\n\",\n    \"    return -np.sum(probs * log_probs)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"id\": \"88668e09-bcd6-4a6a-83a5-838189b910eb\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAqsAAAHgCAYAAACCbCTDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVwklEQVR4nO3df7DldX3f8dcbFtmVLb9cDDgLrB0DjNWUlWQzWBaDtEJg0BBiJk5VsKLFKRSsU7u2M8wKZouDjsg4Y0chBn9MTfgh3QkapQiCHSJWFigBAlMgggUkJlVJDXXh0z/2ILuwF9LZe+95372Px8yZvff7PeznvcB893m/53vOt8YYAQCAjnaZ9gAAADATsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtLXkRfb7XCsAAOZDbW+jM6sAALQlVgEAaEusAgvGddddl5NPPjn7779/dt9997ziFa/Icccdl69+9avTHg3YiTn2TNeLXbMK0MIHP/jBXHjhhVm5cmXe/OY3Z8WKFXn88cfzve99LzfccENOOOGEaY8I7IQce6avxnjB91B5gxUwdZ/97Gfz3ve+N6eeemo+85nP5CUveck2+3/+859nt912m9J0wM7KsWfebfcNVmIVaO3JJ5/MgQcemGXLluW+++573l8WAHPBsWcqthurLgMAWrv22mvz+OOP55xzzskuu+ySa665JnfeeWeWLl2aNWvW5Mgjj5z2iMBOyLGnD7EKtPbd7343SbJ06dKsXr06d9555zb7jz766FxxxRXZb7/9pjEesJNy7OnDpwEArf3whz9Mklx44YWpqtx000356U9/mjvuuCNvetObcuONN+atb33rlKcEdjaOPX2IVaC1p59+OkmyZMmSbNy4MUcddVSWL1+e1772tfnKV76SlStX5lvf+lZuvvnmKU8K7Ewce/oQq0Bre++9d5Jk9erVWbVq1Tb7XvrSl+a4445Lktxyyy3zPBmwM3Ps6UOsAq0deuihSZ79i+O59tlnnyTJz372s/kaCVgEHHv6EKtAa8cee2yqKnfdddcvXpbb2jNvenjlK18536MBOzHHnj7EKtDawQcfnJNOOinf//7388lPfnKbfd/4xjfy9a9/PXvvvXeOP/74KU0I7Iwce/pwUwCgvYcffjivf/3r89BDD+XYY4/N6tWr88ADD+Tqq69OVeXLX/5yTjnllGmPCexkHHvmnTtYAQvX448/nvPOOy8bN27MI488kj333DNr167Nhz70oaxZs2ba4wE7KceeeSVWAQBoa7ux6ppVAADaEqsAALQlVgEAaGvJtAcAeDGr1l0z474HLzhxHicBFhPHnh6cWQUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCAAvaF7/4xVRVqiqXXHLJtMdhlolVAGDBeuihh3LmmWdm+fLl0x6FOSJWAYAFaYyRd73rXXnZy16WM844Y9rjMEfEKgCwIF188cX55je/mc997nPZY489pj0Oc0SsAgALzt13351169bl7LPPztFHHz3tcZhDYhUAWFA2b96cd7zjHTnooIOyYcOGaY/DHFsy7QEAAP5/nHfeedm0aVO+/e1vZ9myZdMehznmzCoAsGB85zvfyYYNG/KBD3wgRx555LTHYR6IVQBgQdi8eXPe+c535pBDDsn5558/7XGYJ2IVAFgQnnjiidx77725++67s3Tp0l/cCKCq8uEPfzhJ8p73vCdVlXPOOWe6wzJrXLMKACwIu+++e9797ndvd9+tt96aTZs25aijjsqhhx7qEoGdiFgFABaEZcuWzXg71fXr12fTpk059dRTc/rpp8/zZMwllwEAANCWWAUAoC2xCgAseOvXr88YwyUAOyGxCgBAW2IVAIC2xCoAAG356CoAYMFYte6aF9z/4AUnztMkzBdnVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAAIvEj370o1xyySU5+eST86pXvSrLli3LXnvtlaOOOiqXXnppnn766WmP+DxLpj0AAADz4/LLL8/73ve+HHDAATnmmGNy0EEH5bHHHstVV12V008/PV/72tdy+eWXp6qmPeoviFUAgEXikEMOycaNG3PiiSdml12efYF9w4YNWbNmTa688spcddVVOeWUU6Y45bZcBgAAsEi88Y1vzEknnbRNqCbJ/vvvnzPOOCNJcsMNN0xhspmJVQAAsttuuyVJlizp9cK7WAUAWOQ2b96cz3/+80mS448/fsrTbEusAgAscuvWrcudd96ZE044Iccdd9y0x9mGWAUAWMQuvvjifPzjH89hhx2WL3zhC9Me53nEKgDAIvWpT30qZ599dl796lfn+uuvz7777jvtkZ5HrAIALEIXXXRRzjrrrLzmNa/J9ddfn/3333/aI22XWAUAWGQ++tGP5v3vf38OP/zwXH/99Xn5y18+7ZFmJFYBABaR888/P+vWrcsRRxyR6667LitWrJj2SC+o1wdpAQAwZy677LKce+652XXXXbN27dpcfPHFz3vOqlWrctppp83/cDMQqwAAi8QDDzyQJHnqqady0UUXbfc5b3jDG1rFqssAAAAWifXr12eM8YIPt1sFAIC/J7EKAEBbYhUAgLa8wQoAYBFZte6aGfc9eMGJ8zjJ348zqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoq2WsXnHFFTnrrLOydu3a7LnnnqmqvP3tb5/2WAAAc0L7zGzJtAfYno985CO5/fbbs3z58qxcuTL33HPPtEcCAJgz2mdmLc+sfuITn8i9996bn/zkJ/n0pz897XEAAOaU9plZyzOrxxxzzLRHAACYN9pnZi3PrAIAQCJWAQBoTKwCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2mp5U4Crr746V199dZLk0UcfTZLcfPPNOe2005IkK1asyMc+9rEpTQcAMLu0z8xaxuptt92Wyy67bJtt999/f+6///4kycEHH7xo/4MBADsf7TOzlpcBrF+/PmOMGR8PPvjgtEcEAJg12mdmLWMVAAASsQoAQGNiFQCAtlq+wSpJVq27ZsZ9D15w4jxOAgAwt3TPzJxZBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG3VGGPmnVV/mmTF/I0zoxVJ/mraQwBtOCYAz5jP44G15tZfjTGOf+7GF4zVLqrqv48xfnXacwA9OCYAz5jP44G1psNlAAAAtCVWAQBoa6HE6memPQDQimMC8Iz5PB5YawoWxDWrAAAsTgvlzCoAAItQ+1itqr2r6oqquqeq7q6qI6c9EzA/qmppVd1SVbdX1Z9X1Ycn2/+wqh6oqtsmj8OnPCowD16oCarqA1U1qmpWPnJze2tV1R9tddx5sKpum4V1Dt3q97ytqn5SVedU1b5VdW1V3Tf5dZ85XOv8qrpjsu0bVfWKHV1rNrW/DKCqLkty0xjjkqp6SZKXjjH+95THAuZBVVWSPcYYT1TVbkm+neTsJGck+ZMxxhVTHRCYVzM1QVUdmOSSJIclOWKMscOfGfpi/VFVH0/y4zHGeTu61la/565JfpDk15P8qyR/Pca4oKrWJdlnjPHv5mitvxlj/GSy/V8nefUY44zZWmtHtT6zWlV7JTk6yaVJMsb4v0IVFo+xxROTb3ebPHr/hA3MiRdpgk8k+WBm6fjwYv0x+UH6d5P859lYbyvHJvmfY4y/TPKWJJdNtl+W5Lfmaq1nQnVijzQ7zraO1SSvTPJ4ks9V1aaquqSq9pj2UMD8qapdJy+1/TDJtWOM70x2/f7kZatPVNXu05sQmCfbbYKqekuSH4wxbp/rtbbavzbJY2OM+2ZxzST5vTwbwL80xnhk8vWjSX5pDtdKVf1+VT2U5J8nOXeW19oh3WN1SZLXJfn0GGN1kr9Nsm66IwHzaYzx1Bjj8CQrk6ypqtck+VC2vNz3a0n2TTJrL40BbW2vCdYn+feZ/bh6sf54W2b5rOrkUoM3J7n8ufvGlms2Z+1s5/bWGmP8hzHGgUm+lOTM2VprNnSP1YeTPLzVmZQrsuV/HmCRmbwEd32S48cYj0wuEXgyyeeSrJnqcMB8mKkJXpnk9qp6MFt+qL21qvafo7VSVUuS/HaSP9rBNZ7rN5PcOsZ4bPL9Y1V1wGTNA7Ll1aW5WmtrX0pyyiyutcNax+oY49EkD1XVoZNNxya5a4ojAfOoqvarqr0nXy9L8s+S3LPVAbyy5TquO6c1IzA/ZmiCW8cYLx9jrBpjrMqWyHzd5LmzvdYz/fFPk9wzxnh4R9bYjueerd2Y5NTJ16cm+S9ztVZV/fJW+96S5J5ZXGuHLYRPAzg8W97h95Ik9yd51xjjb6Y6FDAvqupXsuWNBbtmyw/XfzzGOK+qvplkvySV5LYkZ2z1RixgJ/ViTTA5u/qrs/RpANtdq6r+MMmfjTH+046usdVaeyT5fpJ/OMb48WTby5L8cZKDkvxlkt8dY/z1HK11ZZJDkzw9WeuMMcYPdnSt2dI+VgEAWLxaXwYAAMDiJlYBAGhLrAIA0JZYBQCgLbEKAEBbYhXgOarqt6pqVNVh054FYLETqwDP97Yk3578OhWTu+QALHpiFWArVbU8yVFJ3p3k9ybbfqOqbqiqK6rqnqr60uTuWamqC6rqrqq6o6o+VlW7VtUDtcXeVfVUVR09ee6NVfXLVbVHVf1BVd1SVZuq6i2T/adV1cbJTQ+um86/AYBe/OQOsK23JPnTMca9VfWjqjpisn11kn+U5H8l+W9J/klV3Z3k5CSHjTFGVe09xniqqv4iyauz5Z7ltyZZW1XfSXLgGOO+qtqQ5JtjjH8xuZ3sLVX1XyfrvC7Jr8zGnWoAdgbOrAJs621Jvjz5+st59lKAW8YYD48xns6WW7yuSvLjJH+X5NKq+u0k/2fy3JuSHD15/MdsOVP7a0m+O9n/piTrquq2JDckWZott1RMkmuFKsCznFkFmKiqfZO8Mclrq2ok2TXJSHJNkie3eupTSZaMMTZX1Zokxyb5nSRnTv75G5O8L8krkpyb5N8m+Y1sidgkqSSnjDH+4jnr/3qSv52TPxzAAuXMKsCzfifJF8YYB48xVo0xDkzyQJK123vy5PrWvcYYX03y/iT/eLLrliSvT/L0GOPvsuVM7L/MlohNkq8nOWur615Xz9GfB2DBE6sAz3pbkq88Z9uVmflTAf5Bkj+pqjuy5dMD/k2SjDGeTPJQkj+bPO+myXP/x+T785PsluSOqvrzyfcAbEeNMaY9AwAAbJczqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2vp/jdj4sUZoV2sAAAAASUVORK5CYII=\",\n      \"text/plain\": [\n       \"<Figure size 864x576 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {\n      \"needs_background\": \"light\"\n     },\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"counts = count_digits(answers)\\n\",\n    \"plot_counts(counts)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"661a1135-ac2d-4a49-a786-d04a7ba68b48\",\n   \"metadata\": {},\n   \"source\": [\n    \"We see that there is an important variabilty in the answers given by `gpt-4o-mini`. Depending on the number of samples taken, even self-consistency sampling may lead to the wrong result here.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"id\": \"30ea0dfe-6c15-44f0-881c-88b325542b44\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Entropy: 1.5741030017371853\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print(f\\\"Entropy: {entropy(counts)}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"0b15b230-b667-4c9c-8a5d-366dd61de9b7\",\n   \"metadata\": {},\n   \"source\": [\n    \"## `gpt-4o-mini` on an easier question\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"beae30f0-4168-4a80-90d4-d26a4f476469\",\n   \"metadata\": {},\n   \"source\": [\n    \"Let us now look at the results for an arguably easier question:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"id\": \"7e106b94-2dfd-4a75-b4d9-b1ad693418a7\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"question = \\\"When I was 6 my sister was half my age. Now I’m 70 how old is my sister?\\\"\\n\",\n    \"prompt = gsm8k_prompt(question)\\n\",\n    \"answers = model(question, samples=20, max_tokens=512)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"id\": \"dd46fb2b-08ef-4003-8d03-ea0f39c865c4\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Entropy: 0.1985152433458726\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAqsAAAHgCAYAAACCbCTDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQuklEQVR4nO3db6ye9V3H8c+PPxGxIjMVghSLkCYgOtmUzSCd6xii8mCIyxBCTBUTWbBmIzEhWeIAUfegSYkha6JIQsmSRaoclnUbmeCyQbrBoi2GrtSlEIZKxnQOCmPB9vLBuaH8aUui4dyfwuuVnPQ+13Wd3N/TR+/++ruva0zTFAAAaHTEvAcAAICDEasAANQSqwAA1BKrAADUEqsAANQSqwAA1Drqdc67rxUAAEthHOiglVUAAGqJVQAAaolVAIA3sc2bN2fdunVZvXp1jjvuuIwxcsUVVxz0+meeeSYf+9jHcsYZZ+SYY47J2972tlx44YW55557lnDq/V5vzyoAAIexG2+8Mdu3b8+yZcuyYsWK7Ny586DXfve73815552XHTt25KyzzspVV12VPXv25K677sr73//+3HLLLbnyyiuXcHorqwAAb2obNmzIrl278vTTT2fjxo2HvPa6667Ljh07cskll2Tbtm256aabcsstt+Thhx/OKaecknXr1uWJJ55YoskXiVUAgDexNWvWZNWqVRnjgB+2f4U777wzSXLDDTfkqKP2/wf8CSeckGuuuSbf//73c+utt75hsx6IWAUAIEny5JNPJklOO+2015x78dhS710VqwAAJEmWL1+eJHn00Udfc2737t1JkkceeWRJZxKrAAAkSS666KIkycc//vHs3bv3peNPPfVUNmzYkGTxQ1hLyd0AAABIsrhX9e67787mzZtz9tln5/zzz8+zzz6bu+66KyeffHIef/zxHHHE0q51WlkFACBJctJJJ+XBBx/M1VdfnWeeeSaf/OQns2XLllx66aW54447kix+2GopWVkFAOAlJ554Ym6++ebcfPPNrzh+7733JknOOeecJZ3HyioAAK9r06ZNSZLLL798Sd9XrAIAkCTZt29f9uzZ85rjt99+ezZt2pRzzz03F1988ZLOZBsAAMCb2MLCQhYWFpLsv4/q1q1bs3bt2iSLt6tav359kuS5557LiSeemAsuuCCnn356jjjiiNx///3ZunVrzjzzzNxxxx1L/gGrMU3Toc4f8iQAAN2uu+66XH/99Qc9v3Llyjz22GNJkhdeeCFXXXVV7rvvvpceq7pq1ap86EMfykc+8pEce+yxb+SoB3zEllgFAKDBAWPVnlUAAGqJVQAAaolVAABqiVUAAHLqtVvmPcIBiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGrVxOrmzZuzbt26rF69Oscdd1zGGLniiivmPRYAAHN01LwHeNGNN96Y7du3Z9myZVmxYkV27tw575EAAJizmpXVDRs2ZNeuXXn66aezcePGeY8DAECBmpXVNWvWzHsEAADK1KysAgDAq4lVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABq1TwUYGFhIQsLC0mSJ598MkmydevWrF27NkmyfPnyrF+/fk7TAQAwDzWxum3bttx2222vOLZ79+7s3r07SbJy5UqxCgDwFjOmaTrU+UOeBADgzeHUa7fksU9cNM8RxoEO2rMKAEAtsQoAQC2xCgBArdpYPfXaLfMeAQCAOauNVQAAEKsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BrTNB385BhfSLJ86cZ5heVJvjOn9wYAeCuaZ399Z5qmX3v1wUPG6jyNMb4+TdMvznsOAIC3isb+sg0AAIBaYhUAgFrNsfpX8x4AAOAtpq6/avesAgBA88oqAABvcXWxOsY4ZozxwBhj+xjj4THG9fOeCQDgcHewxhqL/myMsWuM8Y0xxh+97PhfjjG+OcZ4aIzxznnMfdQ83vR1/CDJ+6Zp2jPGODrJfWOMz0/T9NV5DwYAcBg7YGMlOTPJKUnOmKZp3xjjhNn1v55k1ezr3Uk2zv5cUnWxOi1uot0z+/bo2ZeNtQAA/w+HaKwPJ7l8mqZ9s+u+PbvmA0k2zX7uq2OM48cYJ03T9B9LOXfdNoAkGWMcOcbYluTbSb44TdPX5jwSAMBh7yCNdXqSS8cYXx9jfH6MsWp2+clJvvWyH39idmxJVcbqNE17p2k6O8mKJO8aY/zsnEcCADjsHaSxfijJ87MnV/11klvnOOJrVMbqi6Zp+u8k/5jkNc+JBQDg/+ZVjfVEkr+fnbozydtnr/8ti3tZX7RidmxJ1cXqGOMnxhjHz17/cJILkuyc61AAAIe5QzTWQpI1s8t+Jcmu2evPJPmd2V0BfinJ95Z6v2pS+AGrJCcluW2McWQWY/pvp2n67JxnAgA43B2wscYY9yX51Bjjo1n8ANbvz67/XJLfSPLNJM8l+d05zOwJVgAA9KrbBgAAAC8SqwAA1BKrAADUEqsAANQSqwAA1BKrAK8yxrh4jDGNMc6Y9ywAb3ViFeC1Lkty3+zPuRhjNN4HG2DJiVWAlxljLEtyXpIrk/z27Nh7xxhfGmNsHmPsHGN8aowxZuc+McbYMcZ4aIyxfoxx5Bjj0dkTX44fY+wdY7xndu2Xxxirxhg/Msa4dYzxwBjjn8cYH5idXzvG+MwY494k98znbwCgi3+5A7zSB5J8YZqmXWOM/xxj/MLs+DuSnJXk35Pcn+SXxxjfSPKbSc6YpmkaYxw/TdPeMcYjSX4myU8n+ackq8cYX0tyyjRN/zrG+PMk907T9HuzRx8+MMb4h9n7vDPJ26dp+q+l+oUBmllZBXily5J8evb609m/FeCBaZqemKZpX5JtSU5N8r0kzyf5mzHGJVl8HGGSfCXJe2Zff5HFldpzkjw4O/+rSa4dY2xL8qUkxyT5qdm5LwpVgP2srALMjDF+PMn7kvzcGGNKcmSSKcmWJD942aV7kxw1TdP/jDHeleT8JB9M8oezn/9ykg8n+ckkf5Lkj5O8N4sRmyQjyW9N0/TIq97/3UmefUN+OYDDlJVVgP0+mOT2aZpWTtN06jRNpyR5NMnqA10829/6Y9M0fS7JR5P8/OzUA0nOTbJvmqbns7gS+wdZjNgkuTvJupfte33HG/T7ABz2xCrAfpclufNVx/4uB78rwI8m+ewY46Es3j3gmiSZpukHSb6V5Kuz674yu/ZfZt//aZKjkzw0xnh49j0ABzCmaZr3DAAAcEBWVgEAqCVWAQCoJVYBAKglVgEAqCVWAQCoJVYBAKglVgEAqCVWAQCo9b+lzUDoz9UHogAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 864x576 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {\n      \"needs_background\": \"light\"\n     },\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"counts = count_digits(answers)\\n\",\n    \"plot_counts(counts)\\n\",\n    \"print(f\\\"Entropy: {entropy(counts)}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"cf4cacdf-a31d-43bd-8517-eec9f656eee4\",\n   \"metadata\": {},\n   \"source\": [\n    \"The entropy of the results is much lower, we say that the model is more \\\"certain\\\" of its answers. \"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"22f31872-aab7-4a68-b9f2-d335a4f1a875\",\n   \"metadata\": {},\n   \"source\": [\n    \"## How `gpt-4` compares to `gpt-4o-mini`\\n\",\n    \"\\n\",\n    \"Let us now look at how GPT4 fares on the original question:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"id\": \"2d5ab5b8-eca5-47f5-a35c-5f3865e35755\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model = outlines.from_openai(openai.OpenAI(), \\\"gpt-4\\\")\\n\",\n    \"\\n\",\n    \"question = \\\"When I was 6, my sister was half the age of my brother. When I was 14, my sister was 3 years younger than my brother. Now I'm 70, how old is my sister now?\\\"\\n\",\n    \"prompt = gsm8k_prompt(question)\\n\",\n    \"answers = model(prompt, samples=20, max_tokens=512)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 12,\n   \"id\": \"d316a5f7-cebc-4b09-9b1b-aee219b2f088\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Entropy: -0.0\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAqwAAAHgCAYAAABgsD+6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQI0lEQVR4nO3dYahf9X3H8c+vua6J3Ui4hKKDOpVMxTmn1Tlwatt0rKKUziZgfJa5B2tg0TkY5EEZLepWYYITwTJ0o5Wi08ypXYdjMxG76OpkLc51jQUdU4ZiHaagREg8e5C/0WhiNWlyP7l9vSDk/s/v/Lnffx6Ed05+95wxTVMAAKDVhxZ6AAAAeC+CFQCAaoIVAIBqghUAgGqCFQCAaoIVAIBqcz9h3T2vAAA4EsaBFlxhBQCgmmAFAKCaYAV4H15++eXcdtttueyyy7Jq1aosW7Ysy5cvzwUXXJDbb789b7zxxn7f9+ijj+aSSy7J/Px8li1bljPPPDM33XRTdu/efYQ/AcDRa/yER7PawwqQ5Ktf/Wo2bNiQ448/Pp/61Kdywgkn5MUXX8y9996bHTt2ZM2aNbnnnnsyxltbsO6///6sWbMmS5cuzeWXX575+fl885vfzPbt27N27drcc889C/iJAOoccA+rYAV4H7Zs2ZJXX301l156aT70obf+c+qFF17Ieeedl+eeey6bN2/OmjVrkiQ//vGPs2rVquzYsSPbtm3LueeemyTZuXNnVq9encceeyx33nln1q1btyCfB6CQH7oCOBSrV6/OZz/72X1iNUmOO+64fOELX0iSPPzww3uPb968OS+99FLWrVu3N1aTZOnSpbnuuuuSJLfeeuvhHxxgERCsAIfomGOOSZLMzb11p8AtW7YkSS6++OJ3nX/RRRfl2GOPzaOPPprXX3/9yAwJcBQTrACHYNeuXfn617+eZN843b59e5LklFNOedd75ubmctJJJ2XXrl155plnjsygAEcxwQpwCDZt2pSnnnoql1xyST7zmc/sPb5jx44kyfLly/f7vjePv/LKK4d9RoCjnWAFOEg333xzbrzxxpx22mm54447FnocgEVLsAIchFtuuSVXX311Tj/99GzdujXz8/P7rL95BfXNK63v9ObxFStWHNY5ARYDwQrwAd10003ZuHFjzjjjjGzdujXHHXfcu8459dRTkyRPP/30u9Z27dqVZ599NnNzczn55JMP+7wARzvBCvAB3HDDDbnmmmty1llnZevWrfnoRz+63/NWr16dJHnwwQfftfbII4/ktddey/nnn58Pf/jDh3VegMVAsAK8T9dee202bdqUc845Jw899FBWrlx5wHPXrl2blStX5q677soTTzyx9/jOnTvzxS9+MUmyYcOGwz4zwGLgSVcA78PXvva1rF+/PkuWLMnGjRv3+9P/J554YtavX7/39X333Ze1a9dm6dKlWbduXebn5/PAAw/sfTTr3Xffvc+jXAF+xnk0K8Ch+NKXvpQvf/nL73nOJz7xiX2edpUk27Zty/XXX5/HHnssO3fuzKpVq3LllVfmqquuypIlSw7jxABHHcEKAEC1AwarPawAAFQTrAAAVBOsAABUm1voAQ7kxE3fWugRAAB+pvz3Vy5d6BH2yxVWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKqNaZoOvDjGg0lWHrlxABaFlUl+tNBDABxlfjRN08X7W3jPYAXggxtjPDFN07kLPQfAYmFLAAAA1QQrAADVBCvAT99fLvQAAIuJPawAAFRzhRUAgGpzCz0AwNFsjLEiyW1JzkgyJbkyyR8mOXV2yookr0zTdNaRnw5gcRCsAIfmL5I8OE3T2jHGzyU5dpqmy99cHGPcmGTHgk0HsAjYwwpwkMYYy5N8L8nJ037+Mh1jjCT/k2T1NE0/PMLjASwa9rACHLyTkryU5K/HGN8dY9w2xvjI29YvTPKiWAU4NIIV4ODNJfl4klunaTo7yatJNr1t/Yokdy7EYACLiWAFOHjPJ3l+mqbvzF5vzp6AzRhjLsnnk/zNAs0GsGgIVoCDNE3TC0meG2O8eUeATyf5/uzr30ryg2manl+Q4QAWEXcJADg0G5N8Y3aHgGeS/O7s+LrYDgDwU+EuAQAAVLMlAACAaoIVAIBqghUAgGqCFQCAaoIVAIBqghXgHcYYvzPGmMYYpy30LAAIVoD9uSLJv8x+XxCzJ2UBEMEKsI8xxs8nuSDJ72XPzf8zxvjkGOPhMcbmMcYPxhjfGGOM2dpXxhjfH2M8Ocb48zHGkjHGs2OPFWOM3WOMi2bnPjLG+OUxxkfGGH81xnh8jPHdMcbnZuvrxxgPjDG2JHloYf4EAPr4FzzAvj6X5MFpmp4eY7w8xjhndvzsJL+S5H+TbEvym2OM/0pyWZLTpmmaxhgrpmnaPcbYnuT0JCcl+fckF44xvpPkY9M0/XCM8adJtkzTdOUYY0WSx8cY/zz7Ph9PcuY0Tf93pD4wQDtXWAH2dUWSu2Zf35W3tgU8Pk3T89M0vZHke0lOTLIjyc4kt48xPp/ktdm5305y0ezXn2XPFdtfT/Jvs/XfTrJpjPG9JA8nWZrkhNnaP4lVgH25wgowM8aYT7I6ya+OMaYkS5JMSb6V5PW3nbo7ydw0TbvGGOcl+XSStUn+YPb+R5JsSPKLSf4kyR8n+WT2hGySjCRrpmna/o7v/xtJXj0sHw7gKOYKK8Bb1ia5Y5qmX5qm6cRpmj6W5NkkF+7v5Nl+1+XTNP1DkmuS/Nps6fEk5yd5Y5qmndlzRfb3sydkk+Qfk2x82z7Ysw/T5wFYFAQrwFuuSPJ37zj2tznw3QJ+IcnfjzGezJ67CvxRkkzT9HqS55L86+y8b8/O/Y/Z62uTHJPkyTHGf85eA3AAY5qmhZ4BAAAOyBVWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKr9P8bb7HZA9fu3AAAAAElFTkSuQmCC\",\n      \"text/plain\": [\n       \"<Figure size 864x576 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {\n      \"needs_background\": \"light\"\n     },\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"counts = count_digits(answers)\\n\",\n    \"plot_counts(counts)\\n\",\n    \"print(f\\\"Entropy: {entropy(counts)}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"2f6c8a22-fdf5-4f30-865c-8e11927b1b7c\",\n   \"metadata\": {},\n   \"source\": [\n    \"GPT4 returns the correct answer with certainty.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"50d4a55e-86df-46ab-8b38-302c79bc8add\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Conclusion\\n\",\n    \"\\n\",\n    \"When generating text completions with a language model we typically look at one output sample, trying to find the \\\"right\\\" answer. However, doing so we obscure the diversity of answers that these language models can produce. Assuming the diversity of answers reflects these models' \\\"uncertainty\\\", we can use measures such as the entropy of the answers' distribution to evaluate the quality of the answer.\\n\",\n    \"\\n\",\n    \"Which result should we be choosing once we have different samples? There is no definite answer to this question. The [self-consistency method](https://arxiv.org/abs/2203.11171) consists in choosing the result based on a majority vote. We think this choice is arbitrary and that choosing the correct answer is a [decision theory](https://en.wikipedia.org/wiki/Decision_theory) problem, which can only be solved by specifying a loss function that is adapted to the experiment's context; the majority vote being a particular case with a 0-1 loss.\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.16\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/self_consistency.py",
    "content": "import re\n\nimport numpy as np\nimport openai\n\nimport outlines\nfrom outlines import Template\n\nexamples = [\n    {\n        \"question\": \"There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\",\n        \"answer\": \"We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\",\n    },\n    {\n        \"question\": \"If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\",\n        \"answer\": \"There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\",\n    },\n    {\n        \"question\": \"Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\",\n        \"answer\": \"Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\",\n    },\n    {\n        \"question\": \"Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?\",\n        \"answer\": \"Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\",\n    },\n    {\n        \"question\": \"Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?\",\n        \"answer\": \"He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\",\n    },\n    {\n        \"question\": \"There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?\",\n        \"answer\": \"There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\",\n    },\n    {\n        \"question\": \"Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?\",\n        \"answer\": \"Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\",\n    },\n    {\n        \"question\": \"Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\",\n        \"answer\": \"She bought 5 bagels for $3 each. This means she spent 5\",\n    },\n]\n\nquestion = \"When I was 6 my sister was half my age. Now I’m 70 how old is my sister?\"\n\n\nfew_shots = Template.from_file(\"prompts/self_consistency.txt\")\n\nmodel = outlines.from_openai(openai.OpenAI(), \"gpt-4o-mini\")\ngenerator = outlines.Generator(model)\nprompt = few_shots(question=question, examples=examples)\nanswers = generator(prompt, n=10)\n\ndigits = []\nfor answer in answers:\n    try:\n        match = re.findall(r\"\\d+\", answer)[-1]\n        if match is not None:\n            digit = int(match)\n            digits.append(digit)\n    except AttributeError:\n        print(f\"Could not parse the completion: '{answer}'\")\n\nunique_digits, counts = np.unique(digits, return_counts=True)\nresults = {int(d): int(c) for d, c in zip(unique_digits, counts)}\nprint(results)\n\nmax_count = max(results.values())\nanswer_value = [key for key, value in results.items() if value == max_count][0]\ntotal_count = sum(results.values())\nprint(\n    f\"The most likely answer is {answer_value} ({max_count / total_count * 100}% consensus)\"\n)\n"
  },
  {
    "path": "examples/simulation_based_inference.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e7c7d0bb-8d45-4139-a584-02c7196db92b\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Find the best few-shot examples using simulation-based inference\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"id\": \"831a76f5-c569-4174-adab-fb0245877367\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import json\\n\",\n    \"import random\\n\",\n    \"import requests\\n\",\n    \"import re\\n\",\n    \"\\n\",\n    \"import openai\\n\",\n    \"\\n\",\n    \"import outlines\\n\",\n    \"\\n\",\n    \"random.seed(0)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 18,\n   \"id\": \"ec604edc-c8b6-4088-bf17-b77ae57d05a1\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"env: OPENAI_API_KEY=# your key here\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"%env OPENAI_API_KEY = # your key here\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"aabb4db6-fd94-4c42-ab7f-97c3de45b2cc\",\n   \"metadata\": {},\n   \"source\": [\n    \"In this example we will use GPT 4 mini to solve problems from the GSM-8K dataset. The state-of-the-art performance on this dataset is obtained using few-shot prompting with 5 examples. However, it is not clear how one should select these examples. Here, we will use **simulation-based inference** to try to infer which examples we should be using to get the best out of the model's abilities to solve the problem.\\n\",\n    \"\\n\",\n    \"Let's start with downloading the dataset:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"id\": \"367f5f89-8e5d-4381-b9eb-78c60bc50f86\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"result = requests.get(\\n\",\n    \"    \\\"https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/train.jsonl\\\"\\n\",\n    \")\\n\",\n    \"lines = result.iter_lines()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"ef0f7aa9-d528-41e9-8a9d-4497f01f0692\",\n   \"metadata\": {},\n   \"source\": [\n    \"We now divide the train set in two sets:\\n\",\n    \"- 20 problems from which we are going to sample 5 examples at random for every inference;\\n\",\n    \"- 500 problems which we are going to use to perform inference.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"id\": \"0667c4a8-cebe-4796-bbc9-575ee9498717\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"example_set = []\\n\",\n    \"for _ in range(10):\\n\",\n    \"    line = json.loads(next(lines))\\n\",\n    \"    answer = re.findall(r\\\"\\\\d+\\\", line[\\\"answer\\\"])[-1]\\n\",\n    \"    example_set.append({\\\"question\\\": line[\\\"question\\\"], \\\"answer\\\": answer})\\n\",\n    \"\\n\",\n    \"train_set = []\\n\",\n    \"for _ in range(500):\\n\",\n    \"    line = json.loads(next(lines))\\n\",\n    \"    answer = re.findall(r\\\"\\\\d+\\\", line[\\\"answer\\\"])[-1]\\n\",\n    \"    train_set.append({\\\"question\\\": line[\\\"question\\\"], \\\"answer\\\": answer})\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"4b52b470-d818-495a-a6e3-e50a1deff13c\",\n   \"metadata\": {},\n   \"source\": [\n    \"Now let's define the prompt, the model, and the sampling loop. The sampling loop consists in choosing 5 examples at random, sampling 20 model answers; if the answer is correct we keep the example ids as samples, otherwise continue:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"id\": \"9fbebaa9-f05e-4c6b-8875-73a08273bbb5\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"few_shots = outlines.Template.from_string(\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    {% for example in examples %}\\n\",\n    \"    Q: {{ example.question }}\\n\",\n    \"    A: {{ example.answer }}\\n\",\n    \"    {% endfor %}\\n\",\n    \"    Q: {{ question }}\\n\",\n    \"    A:\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \")\\n\",\n    \"\\n\",\n    \"model = outlines.from_openai(openai.OpenAI(), \\\"gpt-4o-mini\\\")\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"# TODO: This could largely benefit from vectorization in #52\\n\",\n    \"def one_train_example(problem, example_set):\\n\",\n    \"    example_ids = random.choices(range(0, len(example_set)), k=5)\\n\",\n    \"    examples = [example_set[i] for i in example_ids]\\n\",\n    \"    prompt = few_shots(question=problem[\\\"question\\\"], examples=examples)\\n\",\n    \"    answers_raw = model(prompt, samples=20)\\n\",\n    \"\\n\",\n    \"    samples = []\\n\",\n    \"    for answer_raw in answers_raw:\\n\",\n    \"        try:\\n\",\n    \"            answer = re.findall(r\\\"\\\\d+\\\", answer_raw)[-1]\\n\",\n    \"            if answer == problem[\\\"answer\\\"]:\\n\",\n    \"                samples += example_ids\\n\",\n    \"            else:\\n\",\n    \"                continue\\n\",\n    \"        except IndexError:\\n\",\n    \"            pass\\n\",\n    \"\\n\",\n    \"    return samples\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 10,\n   \"id\": \"1dae1ef2-c9e0-4c98-8686-7fbc2ff55e56\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"application/vnd.jupyter.widget-view+json\": {\n       \"model_id\": \"9efc9d077af24a2eb5ea3c05fe63f298\",\n       \"version_major\": 2,\n       \"version_minor\": 0\n      },\n      \"text/plain\": [\n       \"  0%|          | 0/500 [00:00<?, ?it/s]\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"from tqdm.contrib import tenumerate\\n\",\n    \"\\n\",\n    \"samples = []\\n\",\n    \"for i, train_example in tenumerate(train_set):\\n\",\n    \"    samples += one_train_example(train_example, example_set)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"d084a3c1-ec84-48ab-91d7-b4c64afce512\",\n   \"metadata\": {},\n   \"source\": [\n    \"We can now plot the number of times each example was sampled:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 11,\n   \"id\": \"c211e0af-33c6-4e35-af83-194dcef31f01\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"Text(0, 0.5, 'Counts')\"\n      ]\n     },\n     \"execution_count\": 11,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    },\n    {\n     \"data\": {\n      \"image/png\": \"iVBORw0KGgoAAAANSUhEUgAAAtoAAAHgCAYAAACb58plAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbiElEQVR4nO3da7BldXnn8d8jLV7wAmqHIsBMk5FxtIyJpEW8jhFjQBxxLEQdo5SFYVKjxktubZIqy5kXg1OWGmdSZFBUrFHUoJZEGJRBvE2NaItEFHTsIEozKK0i3pIo+syLszDHtpHT0P+9zuXzqTp19l5r7X2ebRfdX9f5772quwMAAOxbd5p7AAAAWI+ENgAADCC0AQBgAKENAAADCG0AABhAaAMAwACb5h5ghOOOO64vvPDCuccAAGD9q1vbsS7PaH/jG9+YewQAADa4dRnaAAAwt2GhXVVvqqobqupzy7bdp6ouqqovTd8PmrZXVb2+qnZU1Wer6qhljzllOv5LVXXKqHkBAGBfGnlG+y1Jjttt27YkF3f3kUkunu4nyfFJjpy+TktyRrIU5klekeThSY5O8opb4hwAAFazYaHd3R9N8q3dNp+Y5Ozp9tlJnrps+1t7ySeSHFhVhyT57SQXdfe3uvvGJBfl5+MdAABWnUWv0T64u6+fbn8tycHT7UOTXLvsuJ3Ttlvb/nOq6rSq2l5V23ft2rVvpwYAgL0025shu7uT9D58vjO7e2t3b928efO+eloAALhdFh3aX5+WhGT6fsO0/bokhy877rBp261tBwCAVW3RoX1ekls+OeSUJO9btv2506ePHJPkpmmJyQeSPLGqDpreBPnEaRsAAKxqw64MWVXnJHlckvtV1c4sfXrI6UneVVWnJvlKkpOnwy9I8qQkO5L8IMnzkqS7v1VV/ynJp6bj/mN37/4GSwAAWHVqaan0+rJ169bevn373GMAALD+baxLsAMAwNyENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhl2CHQCA1W/LtvPnHuEOu+b0E+YeYY+c0QYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAm+YeANayLdvOn3uEfeKa00+YewQAWHec0QYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGCATXMPwPqwZdv5c4+wT1xz+glzjwAArBPOaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAAf7wcAe+BjS4E7apYz2lX10qr6fFV9rqrOqaq7VtURVXVpVe2oqndW1f7TsXeZ7u+Y9m+ZY2YAANgbCw/tqjo0ye8n2drdD06yX5JnJnlVktd29/2T3Jjk1Okhpya5cdr+2uk4AABY1eZao70pyd2qalOSuye5Psnjk5w77T87yVOn2ydO9zPtP7aqanGjAgDA3lt4aHf3dUleneSrWQrsm5J8Osm3u/vm6bCdSQ6dbh+a5NrpsTdPx993kTMDAMDemmPpyEFZOkt9RJJfTnJAkuP2wfOeVlXbq2r7rl277ujTAQDAHTLH0pEnJPlyd+/q7h8leU+SRyU5cFpKkiSHJbluun1dksOTZNp/7yTf3P1Ju/vM7t7a3Vs3b948+jUAAMAvNEdofzXJMVV192mt9bFJrkxySZKTpmNOSfK+6fZ50/1M+z/U3b3AeQEAYK/NsUb70iy9qfGyJFdMM5yZ5E+SvKyqdmRpDfZZ00POSnLfafvLkmxb9MwAALC3ZrlgTXe/Iskrdtt8dZKj93DsPyR5+iLmAgCAfcUl2AEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGGDT3AOsN1u2nT/3CHfYNaefMPcIAABrnjPaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADLBp7gEAWN22bDt/7hH2iWtOP2HuEdaM9fBn7s+b1cAZbQAAGEBoAwDAAEIbAAAGsEYb2GvrYf1mYg0nAGM5ow0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYYNPcAwAArAZbtp0/9wh32DWnnzD3CCzjjDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABpgltKvqwKo6t6q+UFVXVdUjquo+VXVRVX1p+n7QdGxV1eurakdVfbaqjppjZgAA2BtzndH+iyQXdve/SvJrSa5Ksi3Jxd19ZJKLp/tJcnySI6ev05KcsfhxAQBg7yw8tKvq3kkem+SsJOnuH3b3t5OcmOTs6bCzkzx1un1ikrf2kk8kObCqDlno0AAAsJfmOKN9RJJdSd5cVZ+pqjdW1QFJDu7u66djvpbk4On2oUmuXfb4ndM2AABYteYI7U1JjkpyRnc/NMn380/LRJIk3d1Jem+etKpOq6rtVbV9165d+2xYAAC4PeYI7Z1Jdnb3pdP9c7MU3l+/ZUnI9P2Gaf91SQ5f9vjDpm0/o7vP7O6t3b118+bNw4YHAICVWHhod/fXklxbVQ+YNh2b5Mok5yU5Zdp2SpL3TbfPS/Lc6dNHjkly07IlJgAAsCptmunnvijJ26pq/yRXJ3lelqL/XVV1apKvJDl5OvaCJE9KsiPJD6ZjAQBgVZsltLv78iRb97Dr2D0c20leMHomAADYl1wZEgAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAH2OrSr6qCqesiIYQAAYL1YUWhX1Yer6l5VdZ8klyV5Q1W9ZuxoAACwdq30jPa9u/s7SZ6W5K3d/fAkTxg3FgAArG0rDe1NVXVIkpOTvH/gPAAAsC6sNLRfmeQDSXZ096eq6leSfGncWAAAsLZtWuFx13f3T98A2d1XW6MNAAC3bqVntP/rCrcBAAC5jTPaVfWIJI9MsrmqXrZs172S7DdyMAAAWMtua+nI/knuMR13z2Xbv5PkpFFDAQDAWvcLQ7u7P5LkI1X1lu7+yoJmAgCANW+lb4a8S1WdmWTL8sd09+NHDAUAAGvdSkP7r5P8VZI3JvnxuHEAAGB9WGlo39zdZwydBAAA1pGVfrzf31TVf6iqQ6rqPrd8DZ0MAADWsJWe0T5l+v5Hy7Z1kl/Zt+MAAMD6sKLQ7u4jRg8CAADryYpCu6qeu6ft3f3WfTsOAACsDytdOvKwZbfvmuTYJJclEdoAALAHK1068qLl96vqwCTvGDEQAACsByv91JHdfT+JddsAAHArVrpG+2+y9CkjSbJfkgcmedeooQAAYK1b6RrtVy+7fXOSr3T3zgHzAADAurCipSPd/ZEkX0hyzyQHJfnhyKEAAGCtW1FoV9XJST6Z5OlJTk5yaVWdNHIwAABYy1a6dOTPkjysu29IkqranOR/JTl31GAAALCWrfRTR+50S2RPvrkXjwUAgA1npWe0L6yqDyQ5Z7r/jCQXjBkJAADWvl8Y2lV1/yQHd/cfVdXTkjx62vV/krxt9HAAALBW3dYZ7dcleXmSdPd7krwnSarqV6d9/2bgbACrypZt5889wj5xzeknzD0CwIZwW+usD+7uK3bfOG3bMmQiAABYB24rtA/8Bfvutg/nAACAdeW2Qnt7Vf3u7hur6vlJPj1mJAAAWPtua432S5K8t6qenX8K661J9k/ybwfOBQAAa9ovDO3u/nqSR1bVbyZ58LT5/O7+0PDJAABgDVvR52h39yVJLhk8CwAArBuu7ggAAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMMFtoV9V+VfWZqnr/dP+Iqrq0qnZU1Turav9p+12m+zum/VvmmhkAAFZqzjPaL05y1bL7r0ry2u6+f5Ibk5w6bT81yY3T9tdOxwEAwKo2S2hX1WFJTkjyxul+JXl8knOnQ85O8tTp9onT/Uz7j52OBwCAVWuuM9qvS/LHSX4y3b9vkm93983T/Z1JDp1uH5rk2iSZ9t80Hf8zquq0qtpeVdt37do1cHQAALhtCw/tqnpykhu6+9P78nm7+8zu3trdWzdv3rwvnxoAAPbaphl+5qOSPKWqnpTkrknuleQvkhxYVZums9aHJbluOv66JIcn2VlVm5LcO8k3Fz82AACs3MLPaHf3y7v7sO7ekuSZST7U3c9OckmSk6bDTknyvun2edP9TPs/1N29wJEBAGCvrabP0f6TJC+rqh1ZWoN91rT9rCT3nba/LMm2meYDAIAVm2PpyE9194eTfHi6fXWSo/dwzD8kefpCBwMAgDtoNZ3RBgCAdUNoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADDAwkO7qg6vqkuq6sqq+nxVvXjafp+quqiqvjR9P2jaXlX1+qraUVWfraqjFj0zAADsrTnOaN+c5A+6+0FJjknygqp6UJJtSS7u7iOTXDzdT5Ljkxw5fZ2W5IzFjwwAAHtn4aHd3dd392XT7e8muSrJoUlOTHL2dNjZSZ463T4xyVt7ySeSHFhVhyx2agAA2DuzrtGuqi1JHprk0iQHd/f1066vJTl4un1okmuXPWzntA0AAFat2UK7qu6R5N1JXtLd31m+r7s7Se/l851WVduravuuXbv24aQAALD3ZgntqrpzliL7bd39nmnz129ZEjJ9v2Hafl2Sw5c9/LBp28/o7jO7e2t3b928efO44QEAYAXm+NSRSnJWkqu6+zXLdp2X5JTp9ilJ3rds+3OnTx85JslNy5aYAADAqrRphp/5qCTPSXJFVV0+bfvTJKcneVdVnZrkK0lOnvZdkORJSXYk+UGS5y10WgAAuB0WHtrd/fEkdSu7j93D8Z3kBUOHAgCAfcyVIQEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAYQ2gAAMIDQBgCAAYQ2AAAMILQBAGAAoQ0AAAMIbQAAGEBoAwDAAEIbAAAGENoAADCA0AYAgAGENgAADCC0AQBgAKENAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtAAAYQGgDAMAAQhsAAAZYM6FdVcdV1RerakdVbZt7HgAA+EXWRGhX1X5J/jLJ8UkelORZVfWgeacCAIBbtyZCO8nRSXZ099Xd/cMk70hy4swzAQDArVoroX1okmuX3d85bQMAgFWpunvuGW5TVZ2U5Ljufv50/zlJHt7dL1x2zGlJTpvuPiDJFxc+6OLcL8k35h5iBl73xuJ1byxe98ayUV93snFf+3p+3d/o7uP2tGPToie5na5Lcviy+4dN236qu89McuYih5pLVW3v7q1zz7FoXvfG4nVvLF73xrJRX3eycV/7Rn3da2XpyKeSHFlVR1TV/kmemeS8mWcCAIBbtSbOaHf3zVX1wiQfSLJfkjd19+dnHgsAAG7VmgjtJOnuC5JcMPccq8SGWCKzB173xuJ1byxe98ayUV93snFf+4Z83WvizZAAALDWrJU12gAAsKYI7TVko16GvqreVFU3VNXn5p5lUarq8Kq6pKqurKrPV9WL555pUarqrlX1yar62+m1v3LumRalqvarqs9U1fvnnmWRquqaqrqiqi6vqu1zz7MoVXVgVZ1bVV+oqquq6hFzzzRaVT1g+nO+5es7VfWSuedahKp66fR32ueq6pyquuvcMy1CVb14es2f3yh/1stZOrJGTJeh/79JfitLF+z5VJJndfeVsw62AFX12CTfS/LW7n7w3PMsQlUdkuSQ7r6squ6Z5NNJnrpB/rwryQHd/b2qunOSjyd5cXd/YubRhquqlyXZmuRe3f3kuedZlKq6JsnW7l6vn7G7R1V1dpKPdfcbp0/Uunt3f3vmsRZm+nftuixdF+Mrc88zUlUdmqW/yx7U3X9fVe9KckF3v2Xeycaqqgdn6WreRyf5YZILk/xed++YdbAFckZ77diwl6Hv7o8m+dbccyxSd1/f3ZdNt7+b5KpskKuh9pLvTXfvPH2t+zMCVXVYkhOSvHHuWRivqu6d5LFJzkqS7v7hRorsybFJ/m69R/Yym5Lcrao2Jbl7kv838zyL8MAkl3b3D7r75iQfSfK0mWdaKKG9drgM/QZVVVuSPDTJpTOPsjDTEorLk9yQ5KLu3giv/XVJ/jjJT2aeYw6d5INV9enpKr8bwRFJdiV587Rc6I1VdcDcQy3YM5OcM/cQi9Dd1yV5dZKvJrk+yU3d/cF5p1qIzyV5TFXdt6runuRJ+dkLEK57QhtWsaq6R5J3J3lJd39n7nkWpbt/3N2/nqWrwB49/fpx3aqqJye5obs/PfcsM3l0dx+V5PgkL5iWi613m5IcleSM7n5oku8n2Ujvvdk/yVOS/PXcsyxCVR2Upd9CH5Hkl5McUFW/M+9U43X3VUleleSDWVo2cnmSH88506IJ7bXjNi9Dz/oyrU9+d5K3dfd75p5nDtOv0i9JctzMo4z2qCRPmdYqvyPJ46vqf8w70uJMZ/vS3TckeW+WlsqtdzuT7Fz225pzsxTeG8XxSS7r7q/PPciCPCHJl7t7V3f/KMl7kjxy5pkWorvP6u7f6O7HJrkxS+832zCE9trhMvQbyPSGwLOSXNXdr5l7nkWqqs1VdeB0+25ZegPwF2YdarDufnl3H9bdW7L03/aHunvdn+1Kkqo6YHrDb6alE0/M0q+b17Xu/lqSa6vqAdOmY5Os+zc7L/OsbJBlI5OvJjmmqu4+/f1+bJbee7PuVdUvTd//WZbWZ7993okWa81cGXKj28iXoa+qc5I8Lsn9qmpnkld091nzTjXco5I8J8kV01rlJPnT6Qqp690hSc6ePpHgTkne1d0b6uPuNpiDk7x3qT2yKcnbu/vCeUdamBcledt08uTqJM+beZ6FmP4P1W8l+fdzz7Io3X1pVZ2b5LIkNyf5TDbOlRLfXVX3TfKjJC/YaG/69fF+AAAwgKUjAAAwgNAGAIABhDYAAAwgtAEAYAChDQAAAwhtgFWuqn5cVZcv+5rlCoJVdU1V3e92PO63q+qVVXWfqvqfI2YDWI18jjbA6vf30yXp16rHZOkKn49J8vGZZwFYGGe0Adagqrp3VX3xlisLVtU5VfW70+0zqmp7VX2+ql657DHXVNV/ns6Kb6+qo6rqA1X1d1X1e9Mxj6uqj1bV+dPz/1VV/dy/FVX1O1X1yem5/vt0gaHdj3nGdMGl30/yuiRvSPK8qnJVW2BDENoAq9/ddls68ozuvinJC5O8paqemeSg7n7DdPyfdffWJA9J8q+r6iHLnuur09nxjyV5S5KTkhyT5JXLjjk6S1ctfFCSf5Glyyb/VFU9MMkzkjxqeq4fJ3n27kN39zuTPDTJ57r7V5NckeSh3f2U2/8/BcDaYekIwOq3x6Uj3X1RVT09yV8m+bVlu06uqtOy9Hf8IVkK5s9O+245m3xFknt093eTfLeq/rGqDpz2fbK7r06WzpQneXSSc5c9/7FJfiPJp6ZLp98tyQ23Mvu/zNLlxZPkgOnnAWwIQhtgjZqWdDwwyQ+SHJRkZ1UdkeQPkzysu2+sqrckueuyh/3j9P0ny27fcv+WfxN6tx+1+/1KcnZ3v/w25tue5H5JNlXVlUkOmZaSvKi7P3bbrxBgbbN0BGDtemmSq5L8uyRvrqo7J7lXku8nuamqDk5y/O143qOr6ogp5J+Rn38D48VJTqqqX0qS6dNE/vnuTzItXzk/yYlJ/kuWlrT8usgGNgqhDbD67b5G+/TpTZDPT/IHU7h+NMmfd/ffJvlMki8keXuS/307ft6nkvy3LEX8l5O8d/nO7r4yyZ8n+WBVfTbJRVlaorInRyW5PEufOPKR2zELwJpV3bv/RhCAjaqqHpfkD7v7yTOPArDmOaMNAAADOKMNAAADOKMNAAADCG0AABhAaAMAwABCGwAABhDaAAAwgNAGAIAB/j/+cpD1bixMkQAAAABJRU5ErkJggg==\",\n      \"text/plain\": [\n       \"<Figure size 864x576 with 1 Axes>\"\n      ]\n     },\n     \"metadata\": {\n      \"needs_background\": \"light\"\n     },\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"import numpy as np\\n\",\n    \"import matplotlib.pylab as plt\\n\",\n    \"\\n\",\n    \"example_ids, counts = np.unique(samples, return_counts=True)\\n\",\n    \"\\n\",\n    \"fig = plt.figure(figsize=(12, 8))\\n\",\n    \"ax = fig.add_subplot(111)\\n\",\n    \"ax.bar(example_ids, counts)\\n\",\n    \"\\n\",\n    \"ax.spines[[\\\"top\\\", \\\"right\\\"]].set_visible(False)\\n\",\n    \"\\n\",\n    \"ax.set_xticks(range(10))\\n\",\n    \"ax.set_xlabel(\\\"Example #\\\")\\n\",\n    \"ax.set_ylabel(\\\"Counts\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"cde37e5b-377e-4872-af40-674d680bd2da\",\n   \"metadata\": {},\n   \"source\": [\n    \"Looking at the distribution, our best guess for which examples we should use for benchmarking on the test set would be 0, 1, 2, 6 and 9. This method can be trivially extended to other workflows that use few-shot examples to query LLMs. Of course, simulation-based inference extends beyong choosing the \\\"best\\\" prompt, and could for instance be useful to select the structure of chains of LLMs and tools as well.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 13,\n   \"id\": \"bddda20b-234a-4d30-b40a-90708fbaba23\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',\\n\",\n       \" 'answer': '72'}\"\n      ]\n     },\n     \"execution_count\": 13,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"example_set[0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 14,\n   \"id\": \"fb186bf9-62b7-485f-a8ce-401f551a9e57\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'question': 'Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?',\\n\",\n       \" 'answer': '10'}\"\n      ]\n     },\n     \"execution_count\": 14,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"example_set[1]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 15,\n   \"id\": \"ae427bb2-e3f4-4a96-a508-e8011a0fc553\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'question': 'Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?',\\n\",\n       \" 'answer': '5'}\"\n      ]\n     },\n     \"execution_count\": 15,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"example_set[2]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 16,\n   \"id\": \"fe43ae0f-c18f-4b74-b639-8481472edf4d\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'question': 'Albert is wondering how much pizza he can eat in one day. He buys 2 large pizzas and 2 small pizzas. A large pizza has 16 slices and a small pizza has 8 slices. If he eats it all, how many pieces does he eat that day?',\\n\",\n       \" 'answer': '48'}\"\n      ]\n     },\n     \"execution_count\": 16,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"example_set[6]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 17,\n   \"id\": \"19d9d936-d0f0-4927-990c-76dbbfa95b47\",\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'question': 'Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?',\\n\",\n       \" 'answer': '990'}\"\n      ]\n     },\n     \"execution_count\": 17,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"example_set[9]\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.9.16\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "examples/vllm_offline_integration.py",
    "content": "\"\"\"Example of integrating `outlines` with `vllm`.\"\"\"\n\nimport vllm\nfrom pydantic import BaseModel\nfrom transformers import AutoTokenizer\n\nfrom outlines.models.vllm_offline import adapt_tokenizer\nfrom outlines.processors import JSONLogitsProcessor\n\n\nclass Person(BaseModel):\n    first_name: str\n    surname: str\n\n\nMODEL_ID = \"mistralai/Mistral-7B-v0.1\"\nllm = vllm.LLM(model=MODEL_ID, max_model_len=512)\ntokenizer = adapt_tokenizer(AutoTokenizer.from_pretrained(MODEL_ID))\nlogits_processor = JSONLogitsProcessor(\n    schema=Person,\n    tokenizer=tokenizer,\n    tensor_library_name=\"torch\",\n    whitespace_pattern=r\" ?\"\n)\nresult = llm.generate(\n    [\"He is Tom Jones\", \"She saw Linda Smith\"],\n    sampling_params=vllm.SamplingParams(\n        temperature=0.0,\n        max_tokens=50,\n        logits_processors=[logits_processor],\n    ),\n)\nprint(result)\n"
  },
  {
    "path": "flake.nix",
    "content": "{\n  inputs.flake-utils.url = \"github:numtide/flake-utils\";\n  outputs = { self, nixpkgs, flake-utils }:\n    flake-utils.lib.eachDefaultSystem (system:\n      let\n        pkgs = import nixpkgs {\n          inherit system;\n          config.allowUnfree = true;\n        };\n      in { devShells.default = import ./shell.nix { inherit pkgs; }; });\n}\n"
  },
  {
    "path": "llm.txt",
    "content": "# Outlines Codebase Reference\n\n## Overview\n\nOutlines is a library for structured generation for type-safe LLMs. It ensures outputs conform to specified formats (JSON schemas, regex patterns, grammars) by constraining the token generation process, or calling an API that uses this process.\n\n**Core insight**: Instead of generating text and hoping it matches a format, Outlines makes it impossible for the model to generate invalid outputs by masking invalid tokens during generation.\n\n**Note**: The codebase has undergone significant refactoring. Core FSM functionality has been extracted to the `outlines-core` package.\n\n## Usage Examples\n\nFor comprehensive usage examples, see:\n- **README.md**: Quick start examples for JSON generation, regex constraints, and choice selection\n- **docs/cookbook/**: Detailed examples including:\n  - `docs/cookbook/prompting.md`: Advanced prompting techniques\n  - `docs/cookbook/models.md`: Working with different model providers\n  - `docs/cookbook/humaneval.md`: Code generation examples\n  - `docs/cookbook/qa-with-citations.md`: Question answering with structured citations\n  - `docs/cookbook/deploy-to-servers.md`: Deployment examples with vLLM and TGI\n- **examples/**: Standalone example scripts\n  - `examples/lark_grammar.py`: Grammar-based generation\n  - `examples/math_generate_code.py`: Code generation with constraints\n  - `examples/multiple_sglang_backend.py`: Using multiple backend servers\n- **tests/**: Test files contain many practical usage patterns\n\n## Architecture\n\n### Layer Stack\n\n```\nUser API (outlines.models)\n    ↓\nGenerator Classes (SteerableGenerator, BlackBoxGenerator)\n    ↓\nType System (types/dsl.py: Pydantic → JsonSchema → Regex)\n    ↓\nFSM Compilation (outlines-core: regex → FSM via interegular)\n    ↓\nGuide System (processors/guide.py: FSM state management)\n    ↓\nLogits Processing (processors/structured.py: token masking)\n    ↓\nModel Providers (transformers, OpenAI, etc.)\n```\n\n### Key Design Decisions\n\n1. **FSM-based constraints**: For local models, constraints compile to finite state machines that track valid next tokens\n2. **Provider abstraction**: Same constraint system works across local models (transformers) and APIs (OpenAI)\n3. **Lazy compilation**: FSMs are compiled on first use and cached persistently\n4. **Token-level control**: Constraints apply at the token level, not character level\n5. **Type-driven API**: Python types are the primary interface for specifying constraints\n\n## Core Components\n\n### Models (`outlines/models/`)\nBase classes and implementations for different model providers:\n- `SteerableModel`: For models where we control logits (transformers, llama.cpp)\n- `BlackBoxModel`: For API models with structured output support (OpenAI, Anthropic)\n- Each provider has an adapter class handling input and output format conversion\n\nKey files:\n- `base.py`: Abstract base classes defining the model interface\n- `transformers.py`: Integration with HuggingFace transformers\n- `openai.py`: OpenAI API integration\n- `gemini.py`: Gemini integration\n- `mlxlm.py`: MLX-LM integration\n- `vllm_offline.py`: vLLM integration\n- `llamacpp.py`: llama.cpp integration\n- `ollama.py`: Ollama integration\n- `vllm.py`: Integration with vLLM servers\n- `tgi.py`: Integration with text-generation-inferece servers\n- `sglang.py`: Integration with SGLang servers\n\n### Generation (`outlines/generator.py`)\nHandles the generation process:\n- `generator.py`: Main `Generator` class implementations (root level)\n- Stream functionality is now integrated into generator classes\n\nBase classes and implementations for different model providers:\n- `BlackBoxGenerator`: For API models with structured outputs support\n- `SteerableGenerator`: For modesl where we control the logits\n\n### FSM System (`outlines/fsm/` and `outlines/processors/`)\nCore constraint enforcement:\n- `processors/guide.py`: Base `Guide` class and `RegexGuide` implementation\n- `fsm/parsing.py`: Lark-based CFG parsing with `PartialLark` parser\n- Regex to FSM compilation now uses `outlines_core.fsm` module\n\nKey concepts:\n- **Guide**: Manages FSM state during generation\n- **State transitions**: Precomputed mapping of (state, token) → next_state\n- **Token masking**: For each state, compute which tokens are valid\n\n### Type System (`outlines/types/`)\nType conversion pipeline:\n- `dsl.py`: Term DSL defining constraint language (Sequence, Choice, etc.) and JSON schema to regex conversion\n- `__init__.py`: Common regex types and DSL functions\n- Python types → Term DSL → Regex → FSM\n\n### Logits Processors (`outlines/processors/`)\nApply constraints during generation:\n- `structured.py`: Main `StructuredLogitsProcessor`\n- `base_logits_processor.py`: Abstract base class\n- Processors mask invalid tokens by setting their logits to -inf\n\n## Key Algorithms\n\n### FSM Compilation Pipeline\n1. **Pattern definition**: User provides Pydantic model, regex, or grammar\n2. **Schema to regex**: Convert complex types to regex patterns\n   - JSON schemas become regex matching valid JSON\n   - Pydantic models extract JSON schema then convert\n3. **Regex to FSM**: Use interegular library to build FSM\n4. **FSM to token map**: For each FSM state, compute valid tokens\n   - Handle multi-character tokens\n   - Account for token boundaries\n5. **Guide creation**: Wrap FSM with state tracking\n\n### Token Masking Process\n```python\n# Simplified logits processing\ndef process_logits(logits, current_state, guide):\n    valid_tokens = guide.get_valid_tokens(current_state)\n    mask = torch.full_like(logits, -float('inf'))\n    mask[valid_tokens] = 0\n    return logits + mask\n```\n\n## File Organization\n\n```\noutlines/\n├── __init__.py              # Public API exports\n├── generator.py             # Main Generator classes\n├── models/                  # Model integrations\n│   ├── base.py             # Abstract base classes\n│   ├── transformers.py     # HuggingFace support\n│   └── [provider].py       # Other providers (openai, anthropic, etc.)\n├── fsm/                     # FSM engine\n│   ├── __init__.py\n│   └── parsing.py          # Grammar parsing\n├── types/                   # Type system\n│   ├── __init__.py         # Common regex types and DSL exports\n│   ├── dsl.py              # Term DSL and JSON schema conversion\n│   └── utils.py            # Type checking utilities\n├── processors/              # Logits processing and guides\n│   ├── guide.py            # Guide implementations\n│   ├── structured.py       # Main processor\n│   └── tensor_adapters/    # Framework-specific tensor handling\n├── caching.py               # Caching system\n├── grammars/                # Grammar files (.lark)\n```\n\n## Extension Points\n\n### Adding a Model Provider\n1. Create model class inheriting from `SteerableModel` or `BlackBoxModel`\n2. Implement required methods: `generate()`, `generate_stream()`\n3. Add constructor function in `outlines/__init__.py`\n4. Handle provider-specific input and structured output formats with a `TypeAdapter`\n\n### Adding a Constraint Type\n1. Define new Term subclass in `types/dsl.py`\n2. Implement `to_regex()` conversion\n3. Register type handler for Python type conversion in `python_types_to_terms()`\n4. Add tests for FSM compilation\n\n### Custom Logits Processor\n1. Inherit from `OutlinesLogitsProcessor`\n2. Implement `process_logits()` method\n3. Handle batch processing and state management\n4. Register with generator\n\n## Common Patterns in Codebase\n\n1. **Factory functions**: `from_transformers()`, `from_openai()` hide complexity\n2. **Abstract base classes**: Define interfaces for models, processors, guides\n3. **Lazy imports**: Optional dependencies imported only when needed\n5. **Type adapters**: Convert between Outlines types and provider formats\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "# Site information\nsite_name: Outlines\nsite_author: The Outlines developers\nsite_description: >-\n    Structured text generation with LLMs\n\n# Repository\nrepo_name: dottxt-ai/outlines\nrepo_url: https://github.com/dottxt-ai/outlines\n\n# Copyright\ncopyright: Copyright &copy; 2023- The Outlines Developers\n\n# Documentation directory\ndocs_dir: docs\n\n# Configuration\ntheme:\n    name: material\n    palette:\n        # Palette toggle for light mode\n        - media: \"(prefers-color-scheme: light)\"\n          scheme: default\n          primary: white\n    logo: assets/images/logo-square.svg\n    favicon: assets/images/logo-square.png\n    icon:\n        repo: fontawesome/brands/github\n    features:\n        - content.code.copy\n        - navigation.expand\n        - navigation.tabs\n        - navigation.sections\n        - header.autohide\n        - announce.dismiss\n    font:\n        text: Inter\n        code: Source Code Pro\n\n# Additional configuration\nextra:\n    social:\n        - icon: fontawesome/brands/github\n          link: https://github.com/dottxt-ai\n        - icon: fontawesome/brands/twitter\n          link: https://twitter.com/remilouf\n    generator: false\n    analytics:\n        provider: google\n        property: !ENV GOOGLE_ANALYTICS_KEY\n    version:\n        provider: mike\n        default: latest\n        alias: true\n\n# Extensions\nmarkdown_extensions:\n    - admonition\n    - def_list\n    - attr_list\n    - md_in_html\n    - pymdownx.highlight:\n          anchor_linenums: true\n          line_spans: __span\n          pygments_lang_class: true\n          noclasses: True\n          pygments_style: nord\n    - pymdownx.superfences:\n          custom_fences:\n              - name: mermaid\n                class: mermaid\n                format: !!python/name:pymdownx.superfences.fence_code_format\n    - pymdownx.tabbed:\n          alternate_style: true\n    - pymdownx.inlinehilite\n    - pymdownx.details\n    - pymdownx.emoji:\n          emoji_index: !!python/name:material.extensions.emoji.twemoji\n          emoji_generator: !!python/name:material.extensions.emoji.to_svg\n    - pymdownx.snippets:\n\nextra_css:\n    - stylesheets/extra.css\n\nplugins:\n    - blog\n    - mkdocstrings:\n          default_handler: python\n          handlers:\n              python:\n                  options:\n                      docstring_style: numpy\n                      show_submodules: true\n    - search\n    - section-index\n    - social:\n          cards_layout_options:\n              color: #173a58\n    - redirects:\n          redirect_maps:\n              \"welcome.md\": \"index.md\"\n\n    - git-committers:\n        repository: dottxt-ai/outlines\n        branch: main\n    - git-revision-date-localized:\n        enable_creation_date: true\n        type: timeago\n\n    - gen-files:\n        scripts:\n        - scripts/gen_ref_pages.py\n    - literate-nav:\n        nav_file: SUMMARY.md\n\nnav:\n    - Home: index.md\n\n    - Guide:\n          - Getting Started: guide/getting_started.md\n          - Installation: guide/installation.md\n          - Migrating to v1: guide/migration.md\n          - Vision-Language Models: guide/vlm.md\n          - Deploying with FastAPI: guide/fastapi_vllm_deployment.md\n          - Chat Templating for Instruct Models: guide/chat_templating.md\n          - Architecture: guide/architecture.md\n\n    - Features:\n          - Overview: features/index.md\n\n          - Core:\n                - Models:\n                    - Overview: features/models/index.md\n                    - Anthropic: features/models/anthropic.md\n                    - Dottxt: features/models/dottxt.md\n                    - Gemini: features/models/gemini.md\n                    - Llamacpp: features/models/llamacpp.md\n                    - Mlx-lm: features/models/mlxlm.md\n                    - Ollama: features/models/ollama.md\n                    - OpenAI: features/models/openai.md\n                    - OpenAI compatible API: features/models/openai_compatible.md\n                    - OpenRouter: features/models/openrouter.md\n                    - SGLang: features/models/sglang.md\n                    - TGI: features/models/tgi.md\n                    - Transformers: features/models/transformers.md\n                    - TransformersMultiModal: features/models/transformers_multimodal.md\n                    - vLLM (online server): features/models/vllm.md\n                    - vLLM (offline): features/models/vllm_offline.md\n                - Model Inputs: features/core/inputs.md\n                - Output Types:\n                    - Overview: features/core/output_types.md\n                    - Basic Types: features/core/output_types#basic-python-types\n                    - Multiple-Choices: features/core/output_types#multiple-choices\n                    - JSON: features/core/output_types#json-schemas\n                    - Regex: features/core/output_types#regex-patterns\n                    - Context-free Grammars: features/core/output_types#context-free-grammars\n                - Generator: features/core/generator.md\n\n          - Utilities:\n                - Application: features/utility/application.md\n                - Regex DSL: features/utility/regex_dsl.md\n                - Template: features/utility/template.md\n\n          - Advanced:\n                - Logits Processors: features/advanced/logits_processors.md\n                - Structured Generation Backends: features/advanced/backends.md\n\n    - API Reference: api_reference/\n\n    - Examples:\n          - examples/index.md\n          - Classification: examples/classification.md\n          - Named Entity Extraction: examples/extraction.md\n          - Dating Profiles: examples/dating_profiles.md\n          - Chain of Density: examples/chain_of_density.md\n          - Playing chess: examples/models_playing_chess.md\n          - SimTom: examples/simtom.md\n          - Q&A with Citations: examples/qa-with-citations.md\n          - Knowledge Graph Extraction: examples/knowledge_graph_extraction.md\n          - Structured Generation Workflow: examples/structured_generation_workflow.md\n          - Chain of Thought (CoT): examples/chain_of_thought.md\n          - ReAct Agent: examples/react_agent.md\n          - Structured Generation from PDFs: examples/read-pdfs.md\n          - Earnings Reports to CSV: examples/earnings-reports.md\n          - Receipt Digitization: examples/receipt-digitization.md\n          - Extract Events Details: examples/extract_event_details.md\n          - Run on the cloud:\n                - BentoML: examples/deploy-using-bentoml.md\n                - Cerebrium: examples/deploy-using-cerebrium.md\n                - Modal: examples/deploy-using-modal.md\n    - Community:\n        - community/index.md\n        - Feedback 🫶: community/feedback.md\n        - Our Discord Server ☕: https://discord.com/invite/R9DSu34mGd\n        - How to Contribute 🏗️: community/contribute.md\n        - Community Projects 👏: community/examples.md\n        - Versioning Guide 📌: community/versioning.md\n\n    - Blog: https://blog.dottxt.co\n"
  },
  {
    "path": "outlines/__init__.py",
    "content": "\"\"\"Outlines is a Generative Model Programming Framework.\"\"\"\n\n# re-export on top-level namespace\nfrom outlines import grammars as grammars\nfrom outlines import inputs as inputs\nfrom outlines import models as models\nfrom outlines import processors as processors\nfrom outlines import types as types\nfrom outlines.applications import Application as Application\nfrom outlines.caching import clear_cache as clear_cache\nfrom outlines.caching import disable_cache as disable_cache\nfrom outlines.caching import get_cache as get_cache\nfrom outlines.generator import Generator as Generator\nfrom outlines.inputs import Audio as Audio\nfrom outlines.inputs import Image as Image\nfrom outlines.inputs import Video as Video\nfrom outlines.models import *  # noqa: F403\nfrom outlines.templates import Template as Template\nfrom outlines.templates import Vision as Vision\nfrom outlines.types import cfg as cfg\nfrom outlines.types import json_schema as json_schema\nfrom outlines.types import regex as regex\n"
  },
  {
    "path": "outlines/applications.py",
    "content": "\"\"\"Encapsulate a prompt template and an output type into a reusable object.\"\"\"\n\nfrom typing import Any, Callable, Dict, Optional, Union\n\nfrom outlines.generator import (\n    BlackBoxGenerator,\n    Generator,\n    SteerableGenerator,\n)\nfrom outlines.models.base import Model\nfrom outlines.templates import Template\n\n\nclass Application:\n    \"\"\"\n    Application is a class that encapsulates a prompt template and an\n    output type. It can be called to generate a response by providing a\n    model, the values to be substituted in the template in a dictionary\n    and optional inference parameters.\n\n    Parameters\n    ----------\n    template : Union[Template, Callable]\n        A callable that takes arguments and returns a prompt string.\n    output_type : Any\n        The expected output type of the generated response.\n\n    Examples\n    --------\n    ```python\n    from pydantic import BaseModel\n    from transformers import AutoModelForCausalLM, AutoTokenizer\n    from outlines import models, Application\n    from outlines.types import JsonType\n    from outlines.templates import Template\n\n    class OutputModel(BaseModel):\n        result: int\n\n    model = models.from_transformers(\n        AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n        AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n    )\n\n    template_string = \"What is 2 times {{ num }}?\"\n    template = Template.from_string(template_string)\n\n    application = Application(template, JsonType(OutputModel))\n\n    result = application(model, {\"num\": 3}, max_new_tokens=20)\n    print(result)  # Expected output: { \"result\" : 6 }\n    ```\n\n    \"\"\"\n    def __init__(\n        self,\n        template: Union[Template, Callable],\n        output_type: Optional[Any] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        template\n            The template to use to build the prompt.\n        output_type\n            The output type provided to the generator.\n\n        \"\"\"\n        self.template = template\n        self.output_type = output_type\n        self.generator: Optional[Union[\n            BlackBoxGenerator, SteerableGenerator\n        ]] = None\n        self.model: Optional[Model] = None\n\n    def __call__(\n        self,\n        model: Model,\n        template_vars: Dict[str, Any],\n        **inference_kwargs\n    ) -> Any:\n        \"\"\"\n        Parameters\n        ----------\n        model\n            The model to use to generate the response.\n        template_vars\n            The variables to be substituted in the template.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n        Returns\n        -------\n        Any\n            The generated response.\n        \"\"\"\n        if model is None:\n            raise ValueError(\"you must provide a model\")\n        # We save the generator to avoid creating a new one for each call.\n        # If the model has changed since the last call, we create a new\n        # generator.\n        if model != self.model:\n            self.model = model\n            self.generator = Generator(model, self.output_type)  # type: ignore\n\n        prompt = self.template(**template_vars)\n        assert self.generator is not None\n        return self.generator(prompt, **inference_kwargs)\n"
  },
  {
    "path": "outlines/backends/__init__.py",
    "content": "\"\"\"Module to define the backends in charge of creating logits processors.\"\"\"\n\nfrom outlines.backends.base import (\n    BaseBackend,\n    LogitsProcessorType,\n)\nfrom outlines.backends.llguidance import LLGuidanceBackend\nfrom outlines.backends.outlines_core import OutlinesCoreBackend\nfrom outlines.backends.xgrammar import XGrammarBackend\nfrom outlines.models import SteerableModel\n\n__all__ = [\n    \"BaseBackend\",\n    \"LogitsProcessorType\",\n    \"LLGuidanceBackend\",\n    \"OutlinesCoreBackend\",\n    \"XGrammarBackend\",\n    \"SteerableModel\",\n    \"CFG_DEFAULT_BACKEND\",\n    \"JSON_SCHEMA_DEFAULT_BACKEND\",\n    \"REGEX_DEFAULT_BACKEND\",\n    \"get_json_schema_logits_processor\",\n    \"get_regex_logits_processor\",\n    \"get_cfg_logits_processor\",\n]\n\nCFG_DEFAULT_BACKEND = \"llguidance\"\nJSON_SCHEMA_DEFAULT_BACKEND = \"outlines_core\"\nREGEX_DEFAULT_BACKEND = \"outlines_core\"\n\n\ndef _get_backend(backend_name: str, model: SteerableModel) -> BaseBackend:\n    \"\"\"Create a Backend instance.\n\n    Parameters\n    ----------\n    backend_name: str\n        The name of the backend to get.\n    model: Model\n        The Outlines model of the user.\n\n    Returns\n    -------\n    backend: BaseBackend\n        The backend instance.\n\n    \"\"\"\n    if backend_name == \"outlines_core\":\n        return OutlinesCoreBackend(model)\n    elif backend_name == \"xgrammar\":\n        return XGrammarBackend(model)\n    elif backend_name == \"llguidance\":\n        return LLGuidanceBackend(model)\n    else:\n        raise ValueError(f\"Backend {backend_name} not supported\")\n\n\ndef get_json_schema_logits_processor(\n    backend_name: str | None,\n    model: SteerableModel,\n    json_schema: str,\n) -> LogitsProcessorType:\n    \"\"\"Create a logits processor from a JSON schema.\n\n    Parameters\n    ----------\n    backend_name: str | None\n        The name of the backend to use.\n    model: Model\n        The Outlines model of the user.\n    json_schema: str\n        The JSON schema to create a logits processor from.\n\n    Returns\n    -------\n    LogitsProcessorType\n        The logits processor.\n\n    \"\"\"\n    backend = _get_backend(\n        backend_name or JSON_SCHEMA_DEFAULT_BACKEND,\n        model,\n    )\n    return backend.get_json_schema_logits_processor(json_schema)\n\n\ndef get_regex_logits_processor(\n    backend_name: str | None,\n    model: SteerableModel,\n    regex: str,\n) -> LogitsProcessorType:\n    \"\"\"Create a logits processor from a regex.\n\n    Parameters\n    ----------\n    backend_name: str | None\n        The name of the backend to use.\n    model: Model\n        The Outlines model of the user.\n    regex: str\n        The regex to create a logits processor from.\n\n    Returns\n    -------\n    LogitsProcessorType\n        The logits processor.\n\n    \"\"\"\n    backend = _get_backend(\n        backend_name or REGEX_DEFAULT_BACKEND,\n        model,\n    )\n    return backend.get_regex_logits_processor(regex)\n\n\ndef get_cfg_logits_processor(\n    backend_name: str | None,\n    model: SteerableModel,\n    grammar: str,\n) -> LogitsProcessorType:\n    \"\"\"Create a logits processor from a context-free grammar.\n\n    Parameters\n    ----------\n    backend_name: str | None\n        The name of the backend to use.\n    model: Model\n        The Outlines model of the user.\n    grammar: str\n        The context-free grammar to create a logits processor from.\n\n    Returns\n    -------\n    LogitsProcessorType\n        The logits processor.\n\n    \"\"\"\n    backend = _get_backend(\n        backend_name or CFG_DEFAULT_BACKEND,\n        model,\n    )\n    return backend.get_cfg_logits_processor(grammar)\n"
  },
  {
    "path": "outlines/backends/base.py",
    "content": "\"\"\"Base class for all backends.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any\n\n\nLogitsProcessorType = Any\n\n\nclass BaseBackend(ABC):\n    \"\"\"Base class for all backends.\n\n    The subclasses must implement methods that create a logits processor\n    from a JSON schema, regex or CFG.\n\n    \"\"\"\n\n    @abstractmethod\n    def get_json_schema_logits_processor(\n        self, json_schema: str\n    ) -> LogitsProcessorType:\n        \"\"\"Create a logits processor from a JSON schema.\n\n        Parameters\n        ----------\n        json_schema: str\n            The JSON schema to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessorType\n            The logits processor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:\n        \"\"\"Create a logits processor from a regex.\n\n        Parameters\n        ----------\n        regex: str\n            The regex to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessorType\n            The logits processor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:\n        \"\"\"Create a logits processor from a context-free grammar.\n\n        Parameters\n        ----------\n        grammar: str\n            The context-free grammar to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessorType\n            The logits processor.\n\n        \"\"\"\n        ...\n"
  },
  {
    "path": "outlines/backends/llguidance.py",
    "content": "\"\"\"Backend class for LLGuidance.\"\"\"\n\nimport warnings\nfrom typing import TYPE_CHECKING\n\nfrom outlines.backends.base import BaseBackend\nfrom outlines.models import LlamaCpp, MLXLM, SteerableModel, Transformers\nfrom outlines.processors.base_logits_processor import (\n    OutlinesLogitsProcessor,\n    TensorType\n)\n\nif TYPE_CHECKING:\n    from llguidance import LLGTokenizer\n\n\nSUPPORTED_TENSOR_LIBRARIES = [\"numpy\", \"mlx\", \"torch\"]\n\n\nclass LLGuidanceLogitsProcessor(OutlinesLogitsProcessor):\n    \"\"\"Logits Processor for the LLGuidance backend.\"\"\"\n\n    def __init__(\n        self,\n        grammar: str,\n        llg_tokenizer,\n        tensor_library_name: str,\n    ) -> None:\n        \"\"\"\n        Parameters\n        ----------\n        grammar: str\n            The grammar spec to use to create the LLMatcher\n        llg_tokenizer: LLTokenizer\n            The LLGuidance tokenizer\n        tensor_library_name: str\n            The name of the tensor library used by the model\n\n        \"\"\"\n        self.is_first_token = True\n        self.grammar = grammar\n        self.llg_tokenizer = llg_tokenizer\n        self.tensor_library_name = tensor_library_name\n        super().__init__(tensor_library_name)\n\n    def reset(self):\n        \"\"\"Ensure self._setup is called again for the next generation.\"\"\"\n        self.is_first_token = True\n\n    def _setup(self, batch_size: int) -> None:\n        \"\"\"Setup the LLMatchers, the bitmask and some functions used in the\n        `process_logits` method.\n\n        This method is called when the first token is generated instead of\n        at initialization because we need to know the batch size.\n\n        Parameters\n        ----------\n        batch_size: int\n            The batch size of the input\n\n        \"\"\"\n        from llguidance import LLMatcher\n\n        self.ll_matchers = [\n            LLMatcher(self.llg_tokenizer, self.grammar)\n            for _ in range(batch_size)\n        ]\n\n        # we must adapt the bitmask creation and the bias function to the\n        # tensor library used by the model\n        if self.tensor_library_name == \"torch\":\n            import llguidance.torch\n\n            self.bitmask = llguidance.torch.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)\n            self._bias_logits = self._bias_logits_torch\n        elif self.tensor_library_name == \"numpy\":\n            import llguidance.numpy\n\n            self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)\n            self._bias_logits = self._bias_logits_numpy\n        elif self.tensor_library_name == \"mlx\": # pragma: no cover\n            import llguidance.numpy\n\n            self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size)\n            self._bias_logits = self._bias_logits_mlx\n        else: # pragma: no cover\n            raise ValueError(f\"Unsupported tensor library: {self.tensor_library_name}\")\n\n    def _bias_logits_mlx( # pragma: no cover\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for the MLX backend.\"\"\"\n        import llguidance.mlx\n        import llguidance.numpy\n\n        biased_logits_array = []\n        for i in range(self.tensor_adapter.shape(input_ids)[0]):\n            llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)\n            biased_logits = llguidance.mlx.apply_token_bitmask(\n                logits[i], self.bitmask[i] # type: ignore\n            )\n            biased_logits_array.append(biased_logits)\n\n        return self.tensor_adapter.concatenate(biased_logits_array)\n\n    def _bias_logits_torch(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for the Torch backend.\"\"\"\n        import llguidance.torch\n\n        for i in range(self.tensor_adapter.shape(input_ids)[0]):\n            llguidance.torch.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)\n            self.bitmask = self.tensor_adapter.to_device(\n                self.bitmask,\n                self.tensor_adapter.get_device(logits)\n            )\n            llguidance.torch.apply_token_bitmask_inplace(\n                logits[i], # type: ignore\n                self.bitmask[i]\n            )\n            self.bitmask = self.tensor_adapter.to_device(\n                self.bitmask,\n                \"cpu\"\n            )\n\n        return logits\n\n    def _bias_logits_numpy(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for the Numpy backend.\"\"\"\n        import llguidance.numpy\n\n        for i in range(self.tensor_adapter.shape(input_ids)[0]):\n            llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i)\n            llguidance.numpy.apply_token_bitmask_inplace(\n                logits[i], self.bitmask[i] # type: ignore\n            )\n\n        return logits\n\n    def process_logits(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Use the instances of LLMatcher to bias the logits.\n\n        Parameters\n        ----------\n        input_ids\n            The ids of the tokens of the existing sequences.\n        logits\n            The logits for the current generation step.\n\n        Returns\n        -------\n        TensorType\n            The biased logits.\n\n        \"\"\"\n        if self.is_first_token:\n            self._setup(self.tensor_adapter.shape(input_ids)[0])\n            self.is_first_token = False\n\n        # we do not make the matchers consume the last token during the first\n        # generation step because no tokens have been generated yet\n        else:\n            for i in range(self.tensor_adapter.shape(input_ids)[0]):\n                sequence = input_ids[i] # type: ignore\n                last_token = sequence[-1].item()\n                self.ll_matchers[i].consume_token(last_token)\n                error = self.ll_matchers[i].get_error()\n                if error:\n                    warnings.warn(f\"Error in LLMatcher: {error}\")\n\n        return self._bias_logits(input_ids, logits)\n\n\nclass LLGuidanceBackend(BaseBackend):\n    \"\"\"Backend for LLGuidance.\"\"\"\n\n    def __init__(self, model: SteerableModel):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            The Outlines model of the user.\n\n        \"\"\"\n        import llguidance as llg\n\n        self.llg = llg\n        self.tensor_library_name = model.tensor_library_name\n        self.llg_tokenizer = self._create_llg_tokenizer(model)\n\n    def _create_llg_tokenizer(self, model: SteerableModel) -> \"LLGTokenizer\":\n        \"\"\"Create an llg tokenizer from the Outlines model's tokenizer.\n\n        Parameters\n        ----------\n        model: Model\n            The Outlines model.\n\n        Returns\n        -------\n        LLGTokenizer\n            The llg tokenizer.\n\n        \"\"\"\n        if isinstance(model, Transformers):\n            import llguidance.hf\n\n            return llguidance.hf.from_tokenizer(model.hf_tokenizer)\n\n        elif isinstance(model, LlamaCpp):\n            import llama_cpp\n            import llguidance.llamacpp\n\n            vocab = llama_cpp.llama_model_get_vocab(model.model.model)\n            return llguidance.llamacpp.lltokenizer_from_vocab(vocab)\n\n        elif isinstance(model, MLXLM): # pragma: no cover\n            import llguidance.hf\n\n            return llguidance.hf.from_tokenizer(\n                model.mlx_tokenizer._tokenizer\n            )\n\n        else: # pragma: no cover\n            raise ValueError(\n                f\"Unsupported model type: {type(model)}. \"\n                \"Llguidance only supports LlamaCpp, MLXLM \"\n                \"and Transformers models.\"\n            )\n\n    def get_json_schema_logits_processor(\n        self, json_schema: str\n    ) -> LLGuidanceLogitsProcessor:\n        \"\"\"Create a logits processor from a JSON schema.\n\n        Parameters\n        ----------\n        json_schema: str\n            The JSON schema to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        grammar_spec = self.llg.grammar_from(\"json_schema\", json_schema)\n        return LLGuidanceLogitsProcessor(\n            grammar_spec, self.llg_tokenizer, self.tensor_library_name\n        )\n\n    def get_regex_logits_processor(\n        self, regex: str\n    ) -> LLGuidanceLogitsProcessor:\n        \"\"\"Create a logits processor from a regex.\n\n        Parameters\n        ----------\n        regex: str\n            The regex to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        grammar_spec = self.llg.grammar_from(\"regex\", regex)\n        return LLGuidanceLogitsProcessor(\n            grammar_spec, self.llg_tokenizer, self.tensor_library_name\n        )\n\n    def get_cfg_logits_processor(\n        self, grammar: str\n    ) -> LLGuidanceLogitsProcessor:\n        \"\"\"Create a logits processor from a context-free grammar.\n\n        Parameters\n        ----------\n        grammar: str\n            The context-free grammar to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        # We try both lark and ebnf\n        try:\n            grammar_spec = self.llg.grammar_from(\"grammar\", grammar)\n        except ValueError:\n            grammar_spec = self.llg.grammar_from(\"lark\", grammar)\n        return LLGuidanceLogitsProcessor(\n            grammar_spec, self.llg_tokenizer, self.tensor_library_name\n        )\n"
  },
  {
    "path": "outlines/backends/outlines_core.py",
    "content": "\"\"\"Backend class for Outlines Core.\"\"\"\n\nfrom typing import Callable, Dict, List\n\nfrom outlines_core import Guide, Index, Vocabulary\nfrom outlines_core.json_schema import build_regex_from_schema\n\nfrom outlines.backends.base import BaseBackend\nfrom outlines.models import SteerableModel\nfrom outlines.models.llamacpp import LlamaCpp\nfrom outlines.models.mlxlm import MLXLM\nfrom outlines.models.transformers import Transformers\nfrom outlines.processors.base_logits_processor import (\n    OutlinesLogitsProcessor,\n    TensorType,\n)\n\n\nclass OutlinesCoreLogitsProcessor(OutlinesLogitsProcessor):\n    \"\"\"Logits processor for Outlines Core.\"\"\"\n\n    def __init__(self, index: Index, tensor_library_name: str):\n        \"\"\"\n        Parameters\n        ----------\n        index: Index\n            The Outlines Core `Index` instance to use to create the Outlines\n            Core `Guide` instances that will be used to bias the logits\n        tensor_library_name: str\n            The tensor library name to use for the logits processor.\n\n        \"\"\"\n        self.index = index\n        self.tensor_library_name = tensor_library_name\n        self.is_first_token = True\n        super().__init__(tensor_library_name)\n\n    def reset(self) -> None:\n        \"\"\"Reset the logits processor.\"\"\"\n        self.is_first_token = True\n\n    def _setup(self, batch_size: int, vocab_size: int) -> None:\n        \"\"\"Set the guides, bitmasks and some functions used in the\n        `process_logits` method.\n\n        This method is called when the first token is generated instead of\n        at initialization because we need to know the batch size and the device\n        of the logits.\n\n        Parameters\n        ----------\n        batch_size: int\n            The batch size.\n        vocab_size: int\n            The vocabulary size.\n\n        \"\"\"\n        if self.tensor_library_name == \"torch\":\n            from outlines_core.kernels.torch import allocate_token_bitmask\n\n            self.allocate_token_bitmask = allocate_token_bitmask\n            self.bias_logits = self._bias_logits_torch\n\n        elif self.tensor_library_name == \"numpy\":\n            from outlines_core.kernels.numpy import allocate_token_bitmask\n\n            self.allocate_token_bitmask = allocate_token_bitmask\n            self.bias_logits = self._bias_logits_numpy\n\n        elif self.tensor_library_name == \"mlx\":  # pragma: no cover\n            from outlines_core.kernels.mlx import allocate_token_bitmask\n\n            self.allocate_token_bitmask = allocate_token_bitmask\n            self.bias_logits = self._bias_logits_mlx\n\n        else:  # pragma: no cover\n            raise ValueError(f\"Unsupported tensor library: {self.tensor_library_name}\")\n\n        self._guides = [Guide(self.index) for _ in range(batch_size)]\n        self._bitmasks = [\n            self.allocate_token_bitmask(vocab_size) for _ in range(batch_size)\n        ]\n\n    def _bias_logits_mlx(  # pragma: no cover\n        self, batch_size: int, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for MLX tensors.\"\"\"\n        from outlines_core.kernels.mlx import (\n            apply_token_bitmask,\n            fill_next_token_bitmask,\n        )\n\n        biased_logits_array = []\n        for i in range(batch_size):\n            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])\n            biased_logits = apply_token_bitmask(\n                self.tensor_adapter.unsqueeze(logits[i]), # type: ignore\n                self._bitmasks[i],  # type: ignore\n            )\n            biased_logits_array.append(biased_logits)\n\n        return self.tensor_adapter.concatenate(biased_logits_array)\n\n    def _bias_logits_torch(self, batch_size: int, logits: TensorType) -> TensorType:\n        \"\"\"Bias the logits for Torch tensors.\"\"\"\n        from outlines_core.kernels.torch import (\n            apply_token_bitmask_inplace,\n            fill_next_token_bitmask,\n        )\n\n        for i in range(batch_size):\n            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])\n            self._bitmasks[i] = self.tensor_adapter.to_device(\n                self._bitmasks[i], self.tensor_adapter.get_device(logits)\n            )\n            apply_token_bitmask_inplace(\n                self.tensor_adapter.unsqueeze(logits[i]),  # type: ignore\n                self._bitmasks[i],\n            )\n            self._bitmasks[i] = self.tensor_adapter.to_device(self._bitmasks[i], \"cpu\")\n\n        return logits\n\n    def _bias_logits_numpy(self, batch_size: int, logits: TensorType) -> TensorType:\n        \"\"\"Bias the logits for Numpy tensors.\"\"\"\n        from outlines_core.kernels.numpy import (\n            apply_token_bitmask_inplace,\n            fill_next_token_bitmask,\n        )\n\n        for i in range(batch_size):\n            fill_next_token_bitmask(self._guides[i], self._bitmasks[i])\n            apply_token_bitmask_inplace(\n                self.tensor_adapter.unsqueeze(logits[i]),  # type: ignore\n                self._bitmasks[i],\n            )\n\n        return logits\n\n    def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType:\n        \"\"\"Use the guides to bias the logits.\n\n        Parameters\n        ----------\n        input_ids\n            The ids of the tokens of the existing sequences.\n        logits\n            The logits for the current generation step.\n\n        Returns\n        -------\n        TensorType\n            The biased logits.\n\n        \"\"\"\n        batch_size = self.tensor_adapter.shape(input_ids)[0]\n        vocab_size = self.tensor_adapter.shape(logits)[1]\n\n        if self.is_first_token:\n            self._setup(batch_size, vocab_size)\n            self.is_first_token = False\n        else:\n            for i in range(batch_size):\n                last_token_id = self.tensor_adapter.to_scalar(input_ids[i][-1])  # type: ignore\n                # This circumvents issue #227 in outlines_core\n                # Ideally, we would be able to advance all the times as the final\n                # state would accept the eos token leading to itself\n                if not self._guides[i].is_finished() or self._guides[i].accepts_tokens(\n                    [last_token_id]\n                ):\n                    self._guides[i].advance(token_id=last_token_id, return_tokens=False)\n\n        return self.bias_logits(batch_size, logits)\n\n\nclass OutlinesCoreBackend(BaseBackend):\n    \"\"\"Backend for Outlines Core.\"\"\"\n\n    def __init__(self, model: SteerableModel):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            The Outlines model of the user.\n\n        \"\"\"\n        if isinstance(model, Transformers):\n            tokenizer = model.tokenizer\n            vocabulary = tokenizer.get_vocab()\n            eos_token_id = tokenizer.eos_token_id\n            eos_token = tokenizer.eos_token\n            token_to_str = tokenizer.convert_token_to_string\n        elif isinstance(model, LlamaCpp):\n            tokenizer = model.tokenizer  # type: ignore\n            vocabulary = tokenizer.vocabulary\n            eos_token_id = tokenizer.eos_token_id\n            eos_token = tokenizer.eos_token\n            token_to_str = tokenizer.convert_token_to_string\n        elif isinstance(model, MLXLM):  # pragma: no cover\n            tokenizer = model.mlx_tokenizer  # type: ignore\n            vocabulary = tokenizer.get_vocab()\n            eos_token_id = tokenizer.eos_token_id\n            eos_token = tokenizer.eos_token\n            token_to_str = lambda token: tokenizer.convert_tokens_to_string([token])  # type: ignore\n        else:  # pragma: no cover\n            raise ValueError(f\"Unsupported model type: {type(model)}\")\n\n        self.eos_token_id = eos_token_id\n        self.vocabulary = self.create_outlines_core_vocabulary(\n            vocabulary, eos_token_id, eos_token, token_to_str\n        )\n        self.tensor_library_name = model.tensor_library_name\n\n    def get_json_schema_logits_processor(self, json_schema: str):\n        \"\"\"Create a logits processor from a JSON schema.\n\n        Parameters\n        ----------\n        json_schema: str\n            The JSON schema to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        regex = build_regex_from_schema(json_schema)\n        return self.get_regex_logits_processor(regex)\n\n    def get_regex_logits_processor(self, regex: str):\n        \"\"\"Create a logits processor from a regex.\n\n        Parameters\n        ----------\n        regex: str\n            The regex to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        index = Index(regex, self.vocabulary)\n        return OutlinesCoreLogitsProcessor(index, self.tensor_library_name)\n\n    def get_cfg_logits_processor(self, grammar):\n        raise NotImplementedError(\n            \"Outlines Core does not support context-free grammar.\"\n        )\n\n    @staticmethod\n    def create_outlines_core_vocabulary(\n        vocab: Dict[str, int],\n        eos_token_id: int,\n        eos_token: str,\n        token_to_str: Callable[[str], str],\n    ) -> Vocabulary:\n        \"\"\"Create an Outlines Core Vocabulary instance.\n\n        Parameters\n        ----------\n        vocab: Dict[str, int]\n            The vocabulary to create an Outlines Core vocabulary from.\n        eos_token_id: int\n            The EOS token ID.\n        eos_token: str\n            The EOS token.\n        token_to_str: Callable[[str], str]\n            The function to convert a token to a string.\n\n        Returns\n        -------\n        Vocabulary\n            The Outlines Core Vocabulary instance.\n\n        \"\"\"\n        formatted_vocab: Dict[str, List[int]] = {}\n        for token, token_id in vocab.items():\n            # This step is necessary to transform special tokens into their\n            # string representation, in particular for spacing. We need those\n            # string representations as outlines core first builds an FSM from\n            # the regex provided that only contains regular strings.\n            token_as_str = token_to_str(token)\n            formatted_vocab.setdefault(token_as_str, []).append(token_id)\n        formatted_vocab.pop(eos_token)\n        return Vocabulary(eos_token_id, formatted_vocab)\n"
  },
  {
    "path": "outlines/backends/xgrammar.py",
    "content": "\"\"\"Backend class for XGrammar.\"\"\"\n\nfrom outlines.backends.base import BaseBackend\nfrom outlines.models import SteerableModel\nfrom outlines.models.mlxlm import MLXLM\nfrom outlines.models.transformers import Transformers\nfrom outlines.processors.base_logits_processor import (\n    OutlinesLogitsProcessor,\n    TensorType\n)\n\n\nclass XGrammarLogitsProcessor(OutlinesLogitsProcessor):\n    \"\"\"Logits processor for XGrammar.\"\"\"\n\n    def __init__(self, compiled_grammar: str, tensor_library_name: str,):\n        \"\"\"\n        Parameters\n        ----------\n        compiled_grammar: str\n            The compiled grammar to use to create the logits processor.\n        tensor_library_name: str\n            The name of the tensor library used by the model\n\n        \"\"\"\n        import xgrammar as xgr\n\n        self.xgr = xgr\n        self.is_first_token = True\n        self.compiled_grammar = compiled_grammar\n        self.tensor_library_name = tensor_library_name\n        super().__init__(tensor_library_name)\n\n    def reset(self):\n        \"\"\"Ensure self._setup is called again for the next generation.\"\"\"\n        self.is_first_token = True\n\n    def _setup(self, batch_size: int, vocab_size: int) -> None:\n        \"\"\"Setup the logits processor for a new generation.\"\"\"\n        if self.tensor_library_name == \"torch\":\n            self._bias_logits = self._bias_logits_torch\n        elif self.tensor_library_name == \"mlx\": # pragma: no cover\n            self._bias_logits = self._bias_logits_mlx\n        else: # pragma: no cover\n            raise ValueError(\n                f\"Unsupported tensor library: {self.tensor_library_name}\"\n            )\n\n        self._matchers = [\n            self.xgr.GrammarMatcher(self.compiled_grammar)\n            for _ in range(batch_size)\n        ]\n        self._bitmask = self.xgr.allocate_token_bitmask(batch_size, vocab_size)\n\n    def _bias_logits_torch(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for Torch tensors.\"\"\"\n        for i in range(self.tensor_adapter.shape(input_ids)[0]):\n            if not self._matchers[i].is_terminated():\n                self._matchers[i].fill_next_token_bitmask(self._bitmask, i)\n\n        self._bitmask = self.tensor_adapter.to_device(\n            self._bitmask,\n            self.tensor_adapter.get_device(logits)\n        )\n        self.xgr.apply_token_bitmask_inplace(logits, self._bitmask)\n        self._bitmask = self.tensor_adapter.to_device(\n            self._bitmask,\n            \"cpu\"\n        )\n\n        return logits\n\n    def _bias_logits_mlx( # pragma: no cover\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Bias the logits for MLX tensors.\"\"\"\n        import mlx.core as mx\n        from xgrammar.kernels.apply_token_bitmask_mlx import apply_token_bitmask_mlx\n\n        for i in range(self.tensor_adapter.shape(input_ids)[0]):\n            if not self._matchers[i].is_terminated():\n                self._matchers[i].fill_next_token_bitmask(self._bitmask, i)\n\n        biased_logits = apply_token_bitmask_mlx(\n            mx.array(self._bitmask.numpy()), logits, self.tensor_adapter.shape(logits)[1]\n        )\n\n        return biased_logits\n\n    def process_logits(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Use the XGrammar matchers to bias the logits.\"\"\"\n        batch_size = self.tensor_adapter.shape(input_ids)[0]\n        vocab_size = self.tensor_adapter.shape(logits)[1]\n\n        if self.is_first_token:\n            self._setup(batch_size, vocab_size)\n            self.is_first_token = False\n        else:\n            for i in range(batch_size):\n                if not self._matchers[i].is_terminated(): # pragma: no cover\n                    last_token_id = self.tensor_adapter.to_scalar(\n                        input_ids[i][-1] # type: ignore\n                    )\n                    assert self._matchers[i].accept_token(last_token_id)\n\n        return self._bias_logits(input_ids, logits)\n\n\nclass XGrammarBackend(BaseBackend):\n    \"\"\"Backend for XGrammar.\"\"\"\n\n    def __init__(self, model: SteerableModel):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            The Outlines model of the user.\n\n        \"\"\"\n        import xgrammar as xgr\n\n        if isinstance(model, Transformers):\n            tokenizer = model.hf_tokenizer\n        elif isinstance(model, MLXLM): # pragma: no cover\n            tokenizer = model.mlx_tokenizer._tokenizer\n        else: # pragma: no cover\n            raise ValueError(\n                \"The xgrammar backend only supports Transformers and \"\n                + \"MLXLM models\"\n            )\n\n        tokenizer_info = xgr.TokenizerInfo.from_huggingface(\n            tokenizer,\n            vocab_size=len(tokenizer.get_vocab())\n        )\n        self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info)\n        self.tensor_library_name = model.tensor_library_name\n\n    def get_json_schema_logits_processor(\n        self, json_schema: str\n    ) -> XGrammarLogitsProcessor:\n        \"\"\"Create a logits processor from a JSON schema.\n\n        Parameters\n        ----------\n        json_schema: str\n            The JSON schema to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        compiled_grammar = self.grammar_compiler.compile_json_schema(\n            json_schema\n        )\n        return XGrammarLogitsProcessor(\n            compiled_grammar,\n            self.tensor_library_name\n        )\n\n    def get_regex_logits_processor(\n        self, regex: str\n    ) -> XGrammarLogitsProcessor:\n        \"\"\"Create a logits processor from a regex.\n\n        Parameters\n        ----------\n        regex: str\n            The regex to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        compiled_grammar = self.grammar_compiler.compile_regex(regex)\n        return XGrammarLogitsProcessor(\n            compiled_grammar,\n            self.tensor_library_name\n        )\n\n    def get_cfg_logits_processor(\n        self, grammar: str\n    ) -> XGrammarLogitsProcessor:\n        \"\"\"Create a logits processor from a context-free grammar.\n\n        Parameters\n        ----------\n        grammar: str\n            The context-free grammar to create a logits processor from.\n\n        Returns\n        -------\n        LogitsProcessor\n            The logits processor to use to constrain the generation.\n\n        \"\"\"\n        compiled_grammar = self.grammar_compiler.compile_grammar(grammar)\n        return XGrammarLogitsProcessor(\n            compiled_grammar,\n            self.tensor_library_name\n        )\n"
  },
  {
    "path": "outlines/caching.py",
    "content": "\"\"\"Caching and memoization of function calls.\"\"\"\n\nimport asyncio\nimport contextlib\nimport functools\nimport os\nimport tempfile\nfrom typing import Callable, Optional\n\nimport cloudpickle\nfrom diskcache import Cache, Disk\nfrom diskcache.core import ENOVAL, UNKNOWN, args_to_key, full_name\n\n_caching_enabled = True\n\n\nclass CloudpickleDisk(Disk): # pragma: no cover\n    def __init__(self, directory, compress_level=1, **kwargs):\n        self.compress_level = compress_level\n        super().__init__(directory, **kwargs)\n\n    def put(self, key):\n        data = cloudpickle.dumps(key)\n        return super().put(data)\n\n    def get(self, key, raw):\n        data = super().get(key, raw)\n        return cloudpickle.loads(data)\n\n    def store(self, value, read, key=UNKNOWN):\n        if not read:\n            value = cloudpickle.dumps(value)\n        return super().store(value, read, key=key)\n\n    def fetch(self, mode, filename, value, read):\n        data = super().fetch(mode, filename, value, read)\n        if not read:\n            data = cloudpickle.loads(data)\n        return data\n\n\n@functools.lru_cache(1)\ndef get_cache():\n    \"\"\"Get the context object that contains previously-computed return values.\n\n    The cache is used to avoid unnecessary computations and API calls, which can\n    be long and expensive for large models.\n\n    The cache directory defaults to `HOMEDIR/.cache/outlines`, but this choice\n    can be overridden by the user by setting the value of the `OUTLINES_CACHE_DIR`\n    environment variable.\n\n    \"\"\"\n    from outlines._version import __version__ as outlines_version  # type: ignore\n\n    outlines_cache_dir = os.environ.get(\"OUTLINES_CACHE_DIR\")\n    xdg_cache_home = os.environ.get(\"XDG_CACHE_HOME\")\n    home_dir = os.path.normpath(os.path.expanduser(\"~\"))\n    if outlines_cache_dir:\n        # OUTLINES_CACHE_DIR takes precedence\n        cache_dir = outlines_cache_dir\n    elif xdg_cache_home:  # pragma: no cover\n        cache_dir = os.path.join(xdg_cache_home, \"outlines\")\n    elif home_dir != \"/\": # pragma: no cover\n        cache_dir = os.path.join(home_dir, \".cache\", \"outlines\")\n    else:  # pragma: no cover\n        # home_dir may be / inside a docker container without existing user\n        tempdir = tempfile.gettempdir()\n        cache_dir = os.path.join(tempdir, \".cache\", \"outlines\")\n\n    memory = Cache(\n        cache_dir,\n        eviction_policy=\"none\",\n        cull_limit=0,\n        disk=CloudpickleDisk,\n    )\n\n    # ensure if version upgrade occurs, old cache is pruned\n    if outlines_version != memory.get(\"__version__\"):\n        memory.clear()\n    memory[\"__version__\"] = outlines_version\n\n    return memory\n\n\ndef cache(expire: Optional[float] = None, typed=False, ignore=()):\n    \"\"\"Caching decorator for memoizing function calls.\n\n    The cache key is created based on the values returned by the key_function callable\n    if provided or based on the arguments of the decorated function directly otherwise\n\n    This is based on `diskcache`'s `memoize`.\n\n    Parameters\n    ----------\n    expire\n        Seconds until arguments expire.\n    typed\n        Cache different types separately.\n    ignore\n        Positional or keyword arguments to ignore.\n\n    Returns\n    -------\n        A decorator function that can be applied to other functions.\n    \"\"\"\n\n    def decorator(cached_function: Callable):\n        memory = get_cache()\n\n        base = (full_name(cached_function),)\n\n        if asyncio.iscoroutinefunction(cached_function):  # pragma: no cover\n\n            async def wrapper(*args, **kwargs):\n                if not _caching_enabled:\n                    return await cached_function(*args, **kwargs)\n\n                cache_key = wrapper.__cache_key__(*args, **kwargs)\n                result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True)\n\n                if result is ENOVAL:\n                    result = await cached_function(*args, **kwargs)\n                    wrapper.__memory__.set(cache_key, result, expire, retry=True)\n\n                return result\n\n        else:\n\n            def wrapper(*args, **kwargs):\n                if not _caching_enabled:\n                    return cached_function(*args, **kwargs)\n\n                cache_key = wrapper.__cache_key__(*args, **kwargs)\n                result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True)\n\n                if result is ENOVAL:\n                    result = cached_function(*args, **kwargs)\n                    wrapper.__memory__.set(cache_key, result, expire, retry=True)\n\n                return result\n\n        def __cache_key__(*args, **kwargs):\n            \"\"\"Make key for cache given function arguments.\"\"\"\n            return args_to_key(base, args, kwargs, typed, ignore)\n\n        wrapper.__cache_key__ = __cache_key__  # type: ignore\n        wrapper.__memory__ = memory  # type: ignore\n        wrapper.__wrapped__ = cached_function  # type: ignore\n\n        return wrapper\n\n    return decorator\n\n\ndef disable_cache():\n    \"\"\"Disable the cache for this session.\n\n    Generative models output different results each time they are called when\n    sampling. This can be a desirable property for some workflows, in which case\n    one can call `outlines.call.disable` to disable the cache for the session.\n\n    This function does not delete the cache, call `outlines.cache.clear`\n    instead. It also does not overwrite the cache with the values returned\n    during the session.\n\n    Example\n    -------\n\n    `outlines.cache.disable` should be called right after importing outlines:\n\n    >>> import outlines.caching as cache\n    >>> cache.disable_cache()\n\n    \"\"\"\n    global _caching_enabled\n    _caching_enabled = False\n\n\ndef clear_cache():\n    \"\"\"Erase the cache completely.\"\"\"\n    memory = get_cache()\n    memory.clear()\n\n\n@contextlib.contextmanager\ndef cache_disabled():\n    # outlines.caching._caching_enabled\n    global _caching_enabled\n    original_state = _caching_enabled\n    _caching_enabled = False\n    try:\n        yield\n    finally:\n        _caching_enabled = original_state\n"
  },
  {
    "path": "outlines/generator.py",
    "content": "\"\"\"Encapsulate a model and an output type into a reusable object.\"\"\"\n\nfrom typing import (\n    Any,\n    AsyncIterator,\n    Iterator,\n    List,\n    Optional,\n    Union,\n)\n\nfrom outlines.models import (\n    AsyncBlackBoxModel,\n    BlackBoxModel,\n    SteerableModel,\n)\nfrom outlines.models.base import AsyncModel, Model\nfrom outlines.backends import (\n    get_cfg_logits_processor,\n    get_json_schema_logits_processor,\n    get_regex_logits_processor,\n)\nfrom outlines.backends.base import LogitsProcessorType\nfrom outlines.types import CFG, JsonSchema\nfrom outlines.types.dsl import python_types_to_terms, to_regex\n\n\nclass BlackBoxGenerator:\n    \"\"\"Synchronous generator for which we don't control constrained\n    generation.\n\n    The output type provided is not compiled into a logits processor, but is\n    instead directly passed on to the model.\n\n    \"\"\"\n    output_type: Optional[Any]\n\n    def __init__(self, model: BlackBoxModel, output_type: Optional[Any]):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            An instance of an Outlines model.\n        output_type\n            The output type that will be used to constrain the generation.\n\n        \"\"\"\n        self.model = model\n        self.output_type = output_type\n\n    def __call__(self, prompt: Any, **inference_kwargs) -> Any:\n        \"\"\"Generate a response from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        return self.model.generate(\n            prompt, self.output_type, **inference_kwargs\n        )\n\n    def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:\n        \"\"\"Generate a batch of responses from the model.\n\n        Parameters\n        ----------\n        prompts\n            The list of prompts to use to generate a batch of responses.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        return self.model.generate_batch(\n            prompts, self.output_type, **inference_kwargs\n        )\n\n    def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:\n        \"\"\"Generate a stream of responses from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        return self.model.generate_stream(\n            prompt, self.output_type, **inference_kwargs\n        )\n\n\nclass AsyncBlackBoxGenerator:\n    \"\"\"Asynchronous generator for which we don't control constrained\n    generation.\n\n    The output type provided is not compiled into a logits processor, but is\n    instead directly passed on to the model.\n\n    \"\"\"\n    output_type: Optional[Any]\n\n    def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[Any]):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            An instance of an Outlines model.\n        output_type\n            The output type that will be used to constrain the generation.\n\n        \"\"\"\n        self.model = model\n        self.output_type = output_type\n\n    async def __call__(self, prompt: Any, **inference_kwargs) -> Any:\n        \"\"\"Generate a response from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        return await self.model.generate(\n            prompt, self.output_type, **inference_kwargs\n        )\n\n    async def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:\n        \"\"\"Generate a batch of responses from the model.\n\n        Parameters\n        ----------\n        prompts\n            The list of prompts to use to generate a batch of responses.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        return await self.model.generate_batch(\n            prompts, self.output_type, **inference_kwargs\n        )\n\n    async def stream(self, prompt: Any, **inference_kwargs) -> AsyncIterator[Any]:\n        \"\"\"Generate a stream of responses from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        async for chunk in self.model.generate_stream(  # pragma: no cover\n            prompt, self.output_type, **inference_kwargs\n        ):\n            yield chunk\n\n\nclass SteerableGenerator:\n    \"\"\"Represents a generator for which we control constrained generation.\n\n    The generator is responsible for building and storing the logits processor\n    (which can be quite expensive to build), and then passing it to the model\n    when the generator is called.\n\n    The argument defining constrained generation can be of 2 types associated\n    to different methods to create an instance of the generator:\n    - `output_type` (through `__init__`): an output type as defined in the\n      `outlines.types` module\n    - `processor` (through `from_processor`): an already built logits processor\n       as defined in the `outlines.processors` module\n\n    The 2 parameters are mutually exclusive.\n\n    \"\"\"\n    logits_processor: Optional[LogitsProcessorType]\n\n    def __init__(\n        self,\n        model: SteerableModel,\n        output_type: Optional[Any],\n        backend_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            An instance of an Outlines model.\n        output_type\n            The output type expressed as a Python type\n        backend_name\n            The name of the backend to use to create the logits processor.\n\n        \"\"\"\n        self.model = model\n        if output_type is None:\n            self.logits_processor = None\n        else:\n            term = python_types_to_terms(output_type)\n            if isinstance(term, CFG):\n                cfg_string = term.definition\n                self.logits_processor = get_cfg_logits_processor(\n                    backend_name,\n                    model,\n                    cfg_string,\n                )\n            elif isinstance(term, JsonSchema):\n                self.logits_processor = get_json_schema_logits_processor(\n                    backend_name,\n                    model,\n                    term.schema,\n                )\n            else:\n                regex_string = to_regex(term)\n                self.logits_processor = get_regex_logits_processor(\n                    backend_name,\n                    model,\n                    regex_string,\n                )\n\n    @classmethod\n    def from_processor(\n        cls, model: SteerableModel, processor: LogitsProcessorType\n    ):\n        \"\"\"Create a generator from a logits processor.\n\n        Parameters\n        ----------\n        model\n            An instance of an Outlines model.\n        processor\n            An instance of a logits processor.\n\n        \"\"\"\n        instance = cls.__new__(cls)\n        instance.model = model\n        instance.logits_processor = processor\n\n        return instance\n\n    def __call__(self, prompt: Any, **inference_kwargs) -> Any:\n        \"\"\"Generate a response from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        if self.logits_processor is not None:\n            self.logits_processor.reset()\n        return self.model.generate(\n            prompt, self.logits_processor, **inference_kwargs\n        )\n\n    def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:\n        \"\"\"Generate a batch of responses from the model.\n\n        Parameters\n        ----------\n        prompts\n            The list of prompts to use to generate a batch of responses.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        if self.logits_processor is not None:\n            self.logits_processor.reset()\n        return self.model.generate_batch(\n            prompts, self.logits_processor, **inference_kwargs\n        )\n\n    def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:\n        \"\"\"Generate a stream of responses from the model.\n\n        Parameters\n        ----------\n        prompt\n            The prompt to use to generate a response.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        if self.logits_processor is not None:\n            self.logits_processor.reset()\n        return self.model.generate_stream(\n            prompt, self.logits_processor, **inference_kwargs\n        )\n\n\ndef Generator(\n    model: Union[Model, AsyncModel],\n    output_type: Optional[Any] = None,\n    backend: Optional[str] = None,\n    *,\n    processor: Optional[LogitsProcessorType] = None,\n) -> Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]:\n    \"\"\"Create a generator for the given model and output parameters.\n\n    The 2 parameters output_type and processor are mutually exclusive. The\n    parameters processor is only supported for SteerableModel instances\n    (typically local models) and is intended to be only used by advanced users.\n\n    Parameters\n    ----------\n    model\n        An instance of an Outlines model.\n    output_type\n        The output type expressed as a Python type or a type defined in the\n        outlines.types.dsl module.\n    backend\n        The name of the backend to use to create the logits processor. Only\n        used for steerable models if there is an output type and `processor` is\n        not provided.\n    processor\n        An instance of a logits processor.\n\n    Returns\n    -------\n    Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]\n        A generator instance.\n\n    \"\"\"\n    provided_output_params = sum(\n        param is not None\n        for param in [output_type, processor]\n    )\n    if provided_output_params > 1:\n        raise ValueError(\n            \"At most one of output_type or processor can be provided\"\n        )\n\n    if isinstance(model, SteerableModel): # type: ignore\n        if processor is not None:\n            return SteerableGenerator.from_processor(model, processor) # type: ignore\n        else:\n            return SteerableGenerator(model, output_type, backend) # type: ignore\n    else:\n        if processor is not None:\n            raise NotImplementedError(\n                \"This model does not support logits processors\"\n            )\n        if isinstance(model, AsyncBlackBoxModel): # type: ignore\n            return AsyncBlackBoxGenerator(model, output_type) # type: ignore\n        elif isinstance(model, BlackBoxModel): # type: ignore\n            return BlackBoxGenerator(model, output_type) # type: ignore\n        else:\n            raise ValueError(\n                \"The model argument must be an instance of \"\n                \"SteerableModel, BlackBoxModel or AsyncBlackBoxModel\"\n            )\n"
  },
  {
    "path": "outlines/grammars/arithmetic.lark",
    "content": "?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n"
  },
  {
    "path": "outlines/grammars/common.lark",
    "content": "// Adapted from https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark\n\n// Lark License:\n// Copyright © 2017 Erez Shinan\n//\n// Permission is hereby granted, free of charge, to any person obtaining a copy of\n// this software and associated documentation files (the \"Software\"), to deal in\n// the Software without restriction, including without limitation the rights to\n// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of\n// the Software, and to permit persons to whom the Software is furnished to do so,\n// subject to the following conditions:\n//\n// The above copyright notice and this permission notice shall be included in all\n// copies or substantial portions of the Software.\n//\n// THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS\n// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR\n// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\n// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\n\n// Basic terminals for common use\n\n\n//\n// Numbers\n//\n\nDIGIT: \"0\"..\"9\"\nHEXDIGIT: \"a\"..\"f\"|\"A\"..\"F\"|DIGIT\n\nINT: DIGIT+\nSIGNED_INT: [\"+\"|\"-\"] INT\nDECIMAL: INT \".\" INT? | \".\" INT\n\n// float = /-?\\d+(\\.\\d+)?([eE][+-]?\\d+)?/\n_EXP: (\"e\"|\"E\") SIGNED_INT\nFLOAT: INT _EXP | DECIMAL _EXP?\nSIGNED_FLOAT: [\"+\"|\"-\"] FLOAT\n\nNUMBER: FLOAT | INT\nSIGNED_NUMBER: [\"+\"|\"-\"] NUMBER\n\nUNESCAPED_STRING: /\\\"[^\"]*\\\"/\n\n// based on `outlines/fsm/json_schema.py`\n_NON_CONTROL_CHAR: /([^\"\\\\\\x00-\\x1F\\x7F-\\x9F])/\n_ESCAPED_CHAR: /\\\\/ (_NON_CONTROL_CHAR | /\\\\/ | /\"/)\nESCAPED_STRING_INNER: _NON_CONTROL_CHAR | _ESCAPED_CHAR\nESCAPED_STRING: /\"/ ESCAPED_STRING_INNER* /\"/\n\n\n\n//\n// Names (Variables)\n//\nLCASE_LETTER: \"a\"..\"z\"\nUCASE_LETTER: \"A\"..\"Z\"\n\nLETTER: UCASE_LETTER | LCASE_LETTER\nWORD: LETTER+\n\nCNAME: (\"_\"|LETTER) (\"_\"|LETTER|DIGIT)*\n\n\n//\n// Whitespace\n//\nWS_INLINE: (\" \"|/\\t/)+\nWS: /[ \\t\\f\\r\\n]/+\n\nCR : /\\r/\nLF : /\\n/\nNEWLINE: (CR? LF)+\n\n\n// Comments\nSH_COMMENT: /#[^\\n]*/\nCPP_COMMENT: /\\/\\/[^\\n]*/\nC_COMMENT: \"/*\" /(.|\\n)*?/ \"*/\"\nSQL_COMMENT: /--[^\\n]*/\n"
  },
  {
    "path": "outlines/grammars/json.lark",
    "content": "?start: value\n\n?value: object\n| array\n| ESCAPED_STRING\n| SIGNED_NUMBER      -> number\n| \"true\"             -> true\n| \"false\"            -> false\n| \"null\"             -> null\n\narray  : \"[\" [value (\",\" value)*] \"]\"\nobject : \"{\" [pair (\",\" pair)*] \"}\"\npair   : ESCAPED_STRING \":\" value\n\n%import common.ESCAPED_STRING\n%import common.SIGNED_NUMBER\n%import common.WS\n\n%ignore WS\n"
  },
  {
    "path": "outlines/grammars.py",
    "content": "\"\"\"A few common Lark grammars.\"\"\"\n\nfrom pathlib import Path\n\nGRAMMAR_PATH = Path(__file__).parent / \"grammars\"\n\n\ndef read_grammar(\n    grammar_file_name: str,\n    base_grammar_path: Path = GRAMMAR_PATH,\n) -> str:\n    \"\"\"Read grammar file from default grammar path.\n\n    Parameters\n    ----------\n    grammar_file_name\n        The name of the grammar file to read.\n    base_grammar_path\n        The path to the directory containing the grammar file.\n\n    Returns\n    -------\n    str\n        The contents of the grammar file.\n\n    \"\"\"\n    full_path = base_grammar_path / grammar_file_name\n    with open(full_path) as file:\n        return file.read()\n\n\narithmetic = read_grammar(\"arithmetic.lark\")\njson = read_grammar(\"json.lark\")\n"
  },
  {
    "path": "outlines/inputs.py",
    "content": "\"\"\"Contain classes used to define the inputs of a model.\"\"\"\n\nimport base64\nfrom dataclasses import dataclass\nfrom io import BytesIO\nfrom typing import Any, Dict, List, Optional\n\nfrom PIL import Image as PILImage\n\n\n@dataclass\nclass Image:\n    \"\"\"Contains an image that can be passed to a multimodal model.\n\n    Provide one or several instances of this class along with a text prompt\n    in a list as the `model_input` argument to a model that supports vision.\n\n    Parameters\n    ----------\n    image\n        The image to use in the text generation.\n\n    \"\"\"\n    image: PILImage.Image\n\n    def __post_init__(self):\n        image = self.image\n\n        if not image.format:\n            raise TypeError(\n                \"Could not read the format of the image passed to the model.\"\n            )\n\n        buffer = BytesIO()\n        image.save(buffer, format=image.format)\n        self.image_str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        self.image_format = f\"image/{image.format.lower()}\"\n\n\n@dataclass\nclass Video:\n    \"\"\"Contains a video that can be passed to a multimodal model.\n\n    Provide one or several instances of this class along with a text prompt\n    in a list as the `model_input` argument to a model that supports video\n    processing.\n\n    Parameters\n    ----------\n    video\n        The video to use in the text generation.\n\n    \"\"\"\n    video: Any\n\n\n@dataclass\nclass Audio:\n    \"\"\"Contains an audio that can be passed to a multimodal model.\n\n    Provide one or several instances of this class along with a text prompt\n    in a list as the `model_input` argument to a model that supports audio\n    processing.\n\n    Parameters\n    ----------\n    audio\n        The audio to use in the text generation.\n\n    \"\"\"\n    audio: Any\n\n\n@dataclass\nclass Chat:\n    \"\"\"Contains the input for a chat model.\n\n    Provide an instance of this class as the `model_input` argument to a model\n    that supports chat.\n\n    Each message contained in the messages list must be a dict with 'role' and\n    'content' keys. The role can be 'user', 'assistant', or 'system'. The content\n    supports either:\n    - a text string,\n    - a list containing text and assets (e.g., [\"Describe...\", Image(...)]),\n    - only for HuggingFace transformers models, a list of dict items with explicit types (e.g.,\n      [{\"type\": \"text\", \"text\": \"Describe...\"}, {\"type\": \"image\", \"image\": Image(...)}])\n\n    Examples\n    --------\n    ```python\n    # Initialize the chat with a system message.\n    chat_prompt = Chat([\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n    ])\n\n    # Add a user message with an image and call the model (not shown here).\n    chat_prompt.add_user_message([\"Describe the image below\", Image(image)])\n\n    # Add as an assistant message the response from the model.\n    chat_prompt.add_assistant_message(\"There is a black cat sitting on a couch.\")\n    ```\n\n    Parameters\n    ----------\n    messages\n        The list of messages that will be provided to the model.\n\n    \"\"\"\n    messages: List[Dict[str, Any]] = None # type: ignore\n\n    def __post_init__(self):\n        if self.messages is None:\n            self.messages = []\n\n    def append(self, message: Dict[str, Any]):\n        \"\"\"Add a message to the chat.\n\n        Parameters\n        ----------\n        message\n            The message to add to the chat.\n\n        \"\"\"\n        self.messages.append(message)\n\n    def extend(self, messages: List[Dict[str, Any]]):\n        \"\"\"Add a list of messages to the chat.\n\n        Parameters\n        ----------\n        messages\n            The list of messages to add to the chat.\n\n        \"\"\"\n        self.messages.extend(messages)\n\n    def pop(self) -> Dict[str, Any]:\n        \"\"\"Remove the last message from the chat.\n\n        Returns\n        -------\n        message\n            The removed message.\n\n        \"\"\"\n        return self.messages.pop()\n\n    def add_system_message(self, content: str | List[Any]):\n        \"\"\"Add a system message to the chat.\n\n        Parameters\n        ----------\n        content\n            The content of the system message.\n\n        \"\"\"\n        self.messages.append({\"role\": \"system\", \"content\": content})\n\n    def add_user_message(self, content: str | List[Any]):\n        \"\"\"Add a user message to the chat.\n\n        Parameters\n        ----------\n        content\n            The content of the user message.\n\n        \"\"\"\n        self.messages.append({\"role\": \"user\", \"content\": content})\n\n    def add_assistant_message(self, content: str | List[Any]):\n        \"\"\"Add an assistant message to the chat.\n\n        Parameters\n        ----------\n        content\n            The content of the assistant message.\n\n        \"\"\"\n        self.messages.append({\"role\": \"assistant\", \"content\": content})\n\n    def __str__(self):\n        return \"\\n\".join(str(message) for message in self.messages)\n\n    def __repr__(self):\n        return f\"Chat(messages={self.messages})\"\n"
  },
  {
    "path": "outlines/models/__init__.py",
    "content": "\"\"\"Module that contains all the models integrated in outlines.\n\nWe group the models in submodules by provider instead of theme (completion, chat\ncompletion, diffusers, etc.) and use routing functions everywhere else in the\ncodebase.\n\n\"\"\"\n\nfrom typing import Union\n\nfrom .anthropic import Anthropic, from_anthropic\nfrom .base import Model, ModelTypeAdapter\nfrom .dottxt import Dottxt, from_dottxt\nfrom .gemini import Gemini, from_gemini\nfrom .llamacpp import LlamaCpp, from_llamacpp\nfrom .lmstudio import AsyncLMStudio, LMStudio, from_lmstudio\nfrom .mistral import AsyncMistral, Mistral, from_mistral\nfrom .mlxlm import MLXLM, from_mlxlm\nfrom .ollama import AsyncOllama, Ollama, from_ollama\nfrom .openai import AsyncOpenAI, OpenAI, from_openai\nfrom .sglang import AsyncSGLang, SGLang, from_sglang\nfrom .tgi import TGI, AsyncTGI, from_tgi\nfrom .transformers import (\n    Transformers,\n    TransformersMultiModal,\n    TransformerTokenizer,\n    from_transformers,\n)\nfrom .vllm import VLLM, AsyncVLLM, from_vllm\nfrom .vllm_offline import VLLMOffline, from_vllm_offline\n\nSteerableModel = Union[LlamaCpp, MLXLM, Transformers]\nBlackBoxModel = Union[\n    Anthropic,\n    Dottxt,\n    Gemini,\n    LMStudio,\n    Ollama,\n    OpenAI,\n    Mistral,\n    SGLang,\n    TGI,\n    VLLM,\n    VLLMOffline,\n]\nAsyncBlackBoxModel = Union[\n    AsyncLMStudio,\n    AsyncMistral,\n    AsyncOllama,\n    AsyncOpenAI,\n    AsyncTGI,\n    AsyncSGLang,\n    AsyncVLLM,\n]\n\n__all__ = [\n\n    \"Anthropic\",\n    \"from_anthropic\",\n    \"Model\",\n    \"ModelTypeAdapter\",\n    \"Dottxt\",\n    \"from_dottxt\",\n    \"Gemini\",\n    \"from_gemini\",\n    \"LlamaCpp\",\n    \"from_llamacpp\",\n    \"AsyncLMStudio\",\n    \"LMStudio\",\n    \"from_lmstudio\",\n    \"AsyncMistral\",\n    \"Mistral\",\n    \"from_mistral\",\n    \"MLXLM\",\n    \"from_mlxlm\",\n    \"AsyncOllama\",\n    \"Ollama\",\n    \"from_ollama\",\n    \"AsyncOpenAI\",\n    \"OpenAI\",\n    \"from_openai\",\n    \"AsyncSGLang\",\n    \"SGLang\",\n    \"from_sglang\",\n    \"AsyncTGI\",\n    \"TGI\",\n    \"from_tgi\",\n    \"Transformers\",\n    \"TransformerTokenizer\",\n    \"TransformersMultiModal\",\n    \"from_transformers\",\n    \"VLLMOffline\",\n    \"from_vllm_offline\",\n    \"AsyncVLLM\",\n    \"VLLM\",\n    \"from_vllm\",\n    \"SteerableModel\",\n    \"BlackBoxModel\",\n    \"AsyncBlackBoxModel\",\n]\n"
  },
  {
    "path": "outlines/models/anthropic.py",
    "content": "\"\"\"Integration with Anthropic's API.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKING, Any, Iterator, Optional, Union\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.base import Model, ModelTypeAdapter\n\nif TYPE_CHECKING:\n    from anthropic import Anthropic as AnthropicClient\n\n__all__ = [\"Anthropic\", \"from_anthropic\"]\n\n\nclass AnthropicTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Anthropic` model.\n\n    `AnthropicTypeAdapter` is responsible for preparing the arguments to\n    Anthropic's `messages.create` method: the input (prompt and possibly\n    image).\n    Anthropic does not support defining the output type, so\n    `format_output_type` is not implemented.\n\n    \"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the `messages` argument to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        dict\n            The `messages` argument to pass to the client.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"Anthropic. The only available types are `str`, `list` and `Chat` \"\n            \"(containing a prompt and images).\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> dict:\n        return {\n            \"messages\": [self._create_message(\"user\", model_input)]\n        }\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> dict:\n        return {\n            \"messages\": [\n                self._create_message(\"user\", model_input)\n            ]\n        }\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> dict:\n        \"\"\"Generate the `messages` argument to pass to the client when the user\n        passes a Chat instance.\n\n        \"\"\"\n        return {\n            \"messages\": [\n                self._create_message(message[\"role\"], message[\"content\"])\n                for message in model_input.messages\n            ]\n        }\n\n    def _create_message(self, role: str, content: str | list) -> dict:\n        \"\"\"Create a message.\"\"\"\n\n        if isinstance(content, str):\n            return {\n                \"role\": role,\n                \"content\": content,\n            }\n\n        elif isinstance(content, list):\n            prompt = content[0]\n            images = content[1:]\n\n            if not all(isinstance(image, Image) for image in images):\n                raise ValueError(\"All assets provided must be of type Image\")\n\n            image_content_messages = [\n                {\n                    \"type\": \"image\",\n                    \"source\": {\n                        \"type\": \"base64\",\n                        \"media_type\": image.image_format,\n                        \"data\": image.image_str,\n                    },\n                }\n                for image in images\n            ]\n\n            return {\n                \"role\": role,\n                \"content\": [\n                    *image_content_messages,\n                    {\"type\": \"text\", \"text\": prompt},\n                ],\n            }\n\n        else:\n            raise ValueError(\n                f\"Invalid content type: {type(content)}. \"\n                \"The content must be a string or a list containing a string \"\n                \"and a list of images.\"\n            )\n\n    def format_output_type(self, output_type):\n        \"\"\"Not implemented for Anthropic.\"\"\"\n        if output_type is None:\n            return {}\n        else:\n            raise NotImplementedError(\n                f\"The output type {output_type} is not available with \"\n                \"Anthropic.\"\n            )\n\n\nclass Anthropic(Model):\n    \"\"\"Thin wrapper around the `anthropic.Anthropic` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `anthropic.Anthropic` client.\n\n    \"\"\"\n    def __init__(\n        self, client: \"AnthropicClient\", model_name: Optional[str] = None\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            An `anthropic.Anthropic` client.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = AnthropicTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using Anthropic.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            As structured generation is not supported by Anthropic, the value\n            of this argument must be `None`. Otherwise, an error will be\n            raised at runtime.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The response generated by the model.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n\n        if output_type is not None:\n            raise NotImplementedError(\n                f\"The type {output_type} is not available with Anthropic.\"\n            )\n\n        if (\n            \"model\" not in inference_kwargs\n            and self.model_name is not None\n        ):\n            inference_kwargs[\"model\"] = self.model_name\n\n        completion = self.client.messages.create(\n            **messages,\n            **inference_kwargs,\n        )\n        return completion.content[0].text\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"Anthropic does not support batch generation.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using Anthropic.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            As structured generation is not supported by Anthropic, the value\n            of this argument must be `None`. Otherwise, an error will be\n            raised at runtime.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n\n        if output_type is not None:\n            raise NotImplementedError(\n                f\"The type {output_type} is not available with Anthropic.\"\n            )\n\n        if (\n            \"model\" not in inference_kwargs\n            and self.model_name is not None\n        ):\n            inference_kwargs[\"model\"] = self.model_name\n\n        stream = self.client.messages.create(\n            **messages,\n            stream=True,\n            **inference_kwargs,\n        )\n\n        for chunk in stream:\n            if (\n                chunk.type == \"content_block_delta\"\n                and chunk.delta.type == \"text_delta\"\n            ):\n                yield chunk.delta.text\n\n\ndef from_anthropic(\n    client: \"AnthropicClient\", model_name: Optional[str] = None\n) -> Anthropic:\n    \"\"\"Create an Outlines `Anthropic` model instance from an\n    `anthropic.Anthropic` client instance.\n\n    Parameters\n    ----------\n    client\n        An `anthropic.Anthropic` client instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Anthropic\n        An Outlines `Anthropic` model instance.\n\n    \"\"\"\n    return Anthropic(client, model_name)\n"
  },
  {
    "path": "outlines/models/base.py",
    "content": "\"\"\"Base classes for all models and model type adapters.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any, AsyncIterator, Iterator, List, Optional\n\n\nclass ModelTypeAdapter(ABC):\n    \"\"\"Base class for all model type adapters.\n\n    A type adapter instance must be given as a value to the `type_adapter`\n    attribute when instantiating a model.\n    The type adapter is responsible for formatting the input and output types\n    passed to the model to match the specific format expected by the\n    associated model.\n\n    \"\"\"\n\n    @abstractmethod\n    def format_input(self, model_input: Any) -> Any:\n        \"\"\"Format the user input to the expected format of the model.\n\n        For API-based models, it typically means creating the `messages`\n        argument passed to the client. For local models, it can mean casting\n        the input from str to list for instance.\n        This method is also used to validate that the input type provided by\n        the user is supported by the model.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        Any\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def format_output_type(self, output_type: Optional[Any] = None) -> Any:\n        \"\"\"Format the output type to the expected format of the model.\n\n        For black-box models, this typically means creating a `response_format`\n        argument. For steerable models, it means formatting the logits processor\n        to create the object type expected by the model.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        Any\n            The formatted output type to be passed to the model.\n\n        \"\"\"\n        ...\n\nclass Model(ABC):\n    \"\"\"Base class for all synchronous models.\n\n    This class defines shared `__call__`, `batch` and `stream` methods that can\n    be used to call the model directly. The `generate`, `generate_batch`, and\n    `generate_stream` methods must be implemented by the subclasses.\n    All models inheriting from this class must define a `type_adapter`\n    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`\n    attribute are used in the `generate`, `generate_batch`, and\n    `generate_stream` methods to format the input and output types received by\n    the model.\n    Additionally, steerable models must define a `tensor_library_name`\n    attribute.\n\n    \"\"\"\n    type_adapter: ModelTypeAdapter\n    tensor_library_name: str\n\n    def __call__(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> Any:\n        \"\"\"Call the model.\n\n        Users can call the model directly, in which case we will create a\n        generator instance with the output type provided and call it.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        generator(\"prompt\")\n        ```\n        and\n        ```python\n        model(\"prompt\", Foo)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        from outlines.generator import Generator\n\n        return Generator(self, output_type, backend)(model_input, **inference_kwargs)\n\n    def batch(\n        self,\n        model_input: List[Any],\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> List[Any]:\n        \"\"\"Make a batch call to the model (several inputs at once).\n\n        Users can use the `batch` method from the model directly, in which\n        case we will create a generator instance with the output type provided\n        and then invoke its `batch` method.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        generator.batch([\"prompt1\", \"prompt2\"])\n        ```\n        and\n        ```python\n        model.batch([\"prompt1\", \"prompt2\"], Foo)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The list of inputs provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        from outlines import Generator\n\n        generator = Generator(self, output_type, backend)\n        return generator.batch(model_input, **inference_kwargs) # type: ignore\n\n    def stream(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> Iterator[Any]:\n        \"\"\"Stream a response from the model.\n\n        Users can use the `stream` method from the model directly, in which\n        case we will create a generator instance with the output type provided\n        and then invoke its `stream` method.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        for chunk in generator(\"prompt\"):\n            print(chunk)\n        ```\n        and\n        ```python\n        for chunk in model.stream(\"prompt\", Foo):\n            print(chunk)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Iterator[Any]\n            A stream of responses from the model.\n\n        \"\"\"\n        from outlines import Generator\n\n        generator = Generator(self, output_type, backend)\n        return generator.stream(model_input, **inference_kwargs) # type: ignore\n\n    @abstractmethod\n    def generate(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> Any:\n        \"\"\"Generate a response from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def generate_batch(\n        self,\n        model_input: List[Any],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> List[Any]:\n        \"\"\"Generate a batch of responses from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The list of inputs provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        ...\n    @abstractmethod\n    def generate_stream(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> Iterator[Any]:\n        \"\"\"Generate a stream of responses from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Iterator[Any]\n            A stream of responses from the model.\n\n        \"\"\"\n        ...\n\nclass AsyncModel(ABC):\n    \"\"\"Base class for all asynchronous models.\n\n    This class defines shared `__call__`, `batch` and `stream` methods that can\n    be used to call the model directly. The `generate`, `generate_batch`, and\n    `generate_stream` methods must be implemented by the subclasses.\n    All models inheriting from this class must define a `type_adapter`\n    attribute of type `ModelTypeAdapter`. The methods of the `type_adapter`\n    attribute are used in the `generate`, `generate_batch`, and\n    `generate_stream` methods to format the input and output types received by\n    the model.\n    Additionally, steerable models must define a `tensor_library_name`\n    attribute.\n\n    \"\"\"\n    type_adapter: ModelTypeAdapter\n    tensor_library_name: str\n\n    async def __call__(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> Any:\n        \"\"\"Call the model.\n\n        Users can call the model directly, in which case we will create a\n        generator instance with the output type provided and call it.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        await generator(\"prompt\")\n        ```\n        and\n        ```python\n        await model(\"prompt\", Foo)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        from outlines import Generator\n\n        generator = Generator(self, output_type, backend)\n        return await generator(model_input, **inference_kwargs)\n\n    async def batch(\n        self,\n        model_input: List[Any],\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> List[Any]:\n        \"\"\"Make a batch call to the model (several inputs at once).\n\n        Users can use the `batch` method from the model directly, in which\n        case we will create a generator instance with the output type provided\n        and then invoke its `batch` method.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        await generator.batch([\"prompt1\", \"prompt2\"])\n        ```\n        and\n        ```python\n        await model.batch([\"prompt1\", \"prompt2\"], Foo)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The list of inputs provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        from outlines import Generator\n\n        generator = Generator(self, output_type, backend)\n        return await generator.batch(model_input, **inference_kwargs) # type: ignore\n\n    async def stream(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        backend: Optional[str] = None,\n        **inference_kwargs: Any\n    ) -> AsyncIterator[Any]:\n        \"\"\"Stream a response from the model.\n\n        Users can use the `stream` method from the model directly, in which\n        case we will create a generator instance with the output type provided\n        and then invoke its `stream` method.\n        Thus, those commands are equivalent:\n        ```python\n        generator = Generator(model, Foo)\n        async for chunk in generator(\"prompt\"):\n            print(chunk)\n        ```\n        and\n        ```python\n        async for chunk in model.stream(\"prompt\", Foo):\n            print(chunk)\n        ```\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        backend\n            The name of the backend to use to create the logits processor that\n            will be used to generate the response. Only used for steerable\n            models if `output_type` is provided.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        AsyncIterator[Any]\n            A stream of responses from the model.\n\n        \"\"\"\n        from outlines import Generator\n\n        generator = Generator(self, output_type, backend)\n\n        async for chunk in generator.stream(model_input, **inference_kwargs):  # type: ignore\n            yield chunk\n\n    @abstractmethod\n    async def generate(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> Any:\n        \"\"\"Generate a response from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Any\n            The response generated by the model.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    async def generate_batch(\n        self,\n        model_input: List[Any],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> List[Any]:\n        \"\"\"Generate a batch of responses from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The list of inputs provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        List[Any]\n            The list of responses generated by the model.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    async def generate_stream(\n        self,\n        model_input: Any,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any\n    ) -> AsyncIterator[Any]:\n        \"\"\"Generate a stream of responses from the model.\n\n        The output_type argument contains a logits processor for steerable\n        models while it contains a type (Json, Enum...) for black-box models.\n        This method is not intended to be used directly by end users.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n        output_type\n            The output type provided by the user.\n        **inference_kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        AsyncIterator[Any]\n            A coroutine that will produce an async iterator of responses from the model.\n\n        \"\"\"\n        ...\n"
  },
  {
    "path": "outlines/models/dottxt.py",
    "content": "\"\"\"Integration with Dottxt's API.\"\"\"\n\nfrom typing import TYPE_CHECKING, Any, Optional, cast\n\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.types import CFG, JsonSchema, Regex\n\nif TYPE_CHECKING:\n    from dottxt import Dottxt as DottxtClient\n\n__all__ = [\"Dottxt\", \"from_dottxt\"]\n\n\nclass DottxtTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Dottxt` model.\"\"\"\n\n    def format_input(self, model_input: str) -> str:\n        \"\"\"Format the prompt to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        str\n            The input to pass to the client.\n\n        \"\"\"\n        if isinstance(model_input, str):\n            return model_input\n        raise TypeError(\n            f\"The input type {model_input} is not available with Dottxt. \"\n            \"The only available type is `str`.\"\n        )\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> str:\n        \"\"\"Format the output type to pass to the client.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        str\n            The output type to pass to the client.\n\n        \"\"\"\n        # Unsupported languages\n        if output_type is None:\n            raise TypeError(\n                \"You must provide an output type. Dottxt only supports \"\n                \"constrained generation.\"\n            )\n        elif isinstance(output_type, Regex):\n            raise TypeError(\n                \"Regex-based structured outputs will soon be available with \"\n                \"Dottxt. Use an open source model in the meantime.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs will soon be available with \"\n                \"Dottxt. Use an open source model in the meantime.\"\n            )\n        elif JsonSchema.is_json_schema(output_type):\n            return cast(str, JsonSchema.convert_to(output_type, [\"str\"]))\n        else:\n            type_name = getattr(output_type, \"__name__\", output_type)\n            raise TypeError(\n                f\"The type `{type_name}` is not supported by Dottxt. \"\n                \"Consider using a local mode instead.\"\n            )\n\n\nclass Dottxt(Model):\n    \"\"\"Thin wrapper around the `dottxt.client.Dottxt` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `dottxt.client.Dottxt` client.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        client: \"DottxtClient\",\n        model_name: Optional[str] = None,\n        model_revision: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A `dottxt.Dottxt` client.\n        model_name\n            The name of the model to use.\n        model_revision\n            The revision of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.model_revision = model_revision\n        self.type_adapter = DottxtTypeAdapter()\n\n    def generate(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using Dottxt.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        prompt = self.type_adapter.format_input(model_input)\n        json_schema = self.type_adapter.format_output_type(output_type)\n\n        if (\n            \"model_name\" not in inference_kwargs\n            and self.model_name is not None\n        ):\n            inference_kwargs[\"model_name\"] = self.model_name\n\n        if (\n            \"model_revision\" not in inference_kwargs\n            and self.model_revision is not None\n        ):\n            inference_kwargs[\"model_revision\"] = self.model_revision\n\n        completion = self.client.json(\n            prompt,\n            json_schema,\n            **inference_kwargs,\n        )\n        return completion.data\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"Dottxt does not support batch generation.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input,\n        output_type=None,\n        **inference_kwargs,\n    ):\n        \"\"\"Not available for Dottxt.\"\"\"\n        raise NotImplementedError(\n            \"Dottxt does not support streaming. Call the model/generator for \"\n            + \"regular generation instead.\"\n        )\n\n\ndef from_dottxt(\n    client: \"DottxtClient\",\n    model_name: Optional[str] = None,\n    model_revision: Optional[str] = None,\n) -> Dottxt:\n    \"\"\"Create an Outlines `Dottxt` model instance from a `dottxt.Dottxt`\n    client instance.\n\n    Parameters\n    ----------\n    client\n        A `dottxt.Dottxt` client instance.\n    model_name\n        The name of the model to use.\n    model_revision\n        The revision of the model to use.\n\n    Returns\n    -------\n    Dottxt\n        An Outlines `Dottxt` model instance.\n\n    \"\"\"\n    return Dottxt(client, model_name, model_revision)\n"
  },
  {
    "path": "outlines/models/gemini.py",
    "content": "\"\"\"Integration with Gemini's API.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Iterator,\n    Optional,\n    Union,\n    get_args,\n)\n\nfrom outlines.inputs import Image, Chat\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.types import CFG, Choice, JsonSchema, Regex\nfrom outlines.types.utils import (\n    is_enum,\n    get_enum_from_choice,\n    get_enum_from_literal,\n    is_genson_schema_builder,\n    is_literal,\n    is_typing_list,\n)\n\nif TYPE_CHECKING:\n    from google.genai import Client\n\n__all__ = [\"Gemini\", \"from_gemini\"]\n\n\nclass GeminiTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Gemini` model.\n\n    `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's\n    client `models.generate_content` method: the input (prompt and possibly\n    image), as well as the output type (either JSON or multiple choice).\n\n    \"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the `contents` argument to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        dict\n            The `contents` argument to pass to the client.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"Gemini. The only available types are `str`, `list` and `Chat` \"\n            \"(containing a prompt and images).\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> dict:\n        return {\"contents\": [self._create_text_part(model_input)]}\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> dict:\n        return {\n            \"contents\": [\n                self._create_message(\"user\", model_input)\n            ]\n        }\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> dict:\n        \"\"\"Generate the `contents` argument to pass to the client when the user\n        passes a Chat instance.\n\n        \"\"\"\n        return {\n            \"contents\": [\n                self._create_message(message[\"role\"], message[\"content\"])\n                for message in model_input.messages\n            ]\n        }\n\n    def _create_message(self, role: str, content: str | list) -> dict:\n        \"\"\"Create a message.\"\"\"\n\n        # Gemini uses \"model\" instead of \"assistant\"\n        if role == \"assistant\":\n            role = \"model\"\n\n        if isinstance(content, str):\n            return {\n                \"role\": role,\n                \"parts\": [self._create_text_part(content)],\n            }\n\n        elif isinstance(content, list):\n            prompt = content[0]\n            images = content[1:]\n\n            if not all(isinstance(image, Image) for image in images):\n                raise ValueError(\"All assets provided must be of type Image\")\n\n            image_parts = [\n                self._create_img_part(image)\n                for image in images\n            ]\n\n            return {\n                \"role\": role,\n                \"parts\": [\n                    self._create_text_part(prompt),\n                    *image_parts,\n                ],\n            }\n\n        else:\n            raise ValueError(\n                f\"Invalid content type: {type(content)}. \"\n                \"The content must be a string or a list containing a string \"\n                \"and a list of images.\"\n            )\n\n        return {\"contents\": [prompt, *image_parts]}\n\n\n    def _create_text_part(self, text: str) -> dict:\n        \"\"\"Create a text input part for a message.\"\"\"\n        return {\n            \"text\": text,\n        }\n\n    def _create_img_part(self, image: Image) -> dict:\n        \"\"\"Create an image input part for a message.\"\"\"\n        return {\n            \"inline_data\": {\n                \"mime_type\": image.image_format,\n                \"data\": image.image_str,\n            }\n        }\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the `generation_config` argument to pass to the client.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        dict\n            The `generation_config` argument to pass to the client.\n\n        \"\"\"\n\n        # Unsupported output pytes\n        if isinstance(output_type, Regex):\n            raise TypeError(\n                \"Neither regex-based structured outputs nor the `pattern` \"\n                \"keyword in Json Schema are available with Gemini. Use an \"\n                \"open source model or dottxt instead.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs are not available with Gemini. \"\n                \"Use an open source model or dottxt instead.\"\n            )\n\n        if output_type is None:\n            return {}\n\n        # JSON schema types\n        elif JsonSchema.is_json_schema(output_type):\n            return self.format_json_output_type(\n                JsonSchema.convert_to(\n                    output_type,\n                    [\"dataclass\", \"typeddict\", \"pydantic\"]\n                )\n            )\n\n        # List of structured types\n        elif is_typing_list(output_type):\n            return self.format_list_output_type(output_type)\n\n        # Multiple choice types\n        elif is_enum(output_type):\n            return self.format_enum_output_type(output_type)\n        elif is_literal(output_type):\n            enum = get_enum_from_literal(output_type)\n            return self.format_enum_output_type(enum)\n        elif isinstance(output_type, Choice):\n            enum = get_enum_from_choice(output_type)\n            return self.format_enum_output_type(enum)\n\n        else:\n            type_name = getattr(output_type, \"__name__\", output_type)\n            raise TypeError(\n                f\"The type `{type_name}` is not supported by Gemini. \"\n                \"Consider using a local model or dottxt instead.\"\n            )\n\n    def format_enum_output_type(self, output_type: Optional[Any]) -> dict:\n        return {\n            \"response_mime_type\": \"text/x.enum\",\n            \"response_schema\": output_type,\n        }\n\n    def format_json_output_type(self, output_type: Optional[Any]) -> dict:\n        return {\n            \"response_mime_type\": \"application/json\",\n            \"response_schema\": output_type,\n        }\n\n    def format_list_output_type(self, output_type: Optional[Any]) -> dict:\n        args = get_args(output_type)\n\n        if len(args) == 1:\n            item_type = args[0]\n\n            if JsonSchema.is_json_schema(item_type):\n                return {\n                    \"response_mime_type\": \"application/json\",\n                    \"response_schema\": list[  # type: ignore\n                        JsonSchema.convert_to(\n                            item_type,\n                            [\"dataclass\", \"typeddict\", \"pydantic\"]\n                        )\n                    ],\n                }\n            else:\n                raise TypeError(\n                    \"The list items output type must contain a JSON schema \"\n                    \"type.\"\n                )\n\n        raise TypeError(\n            f\"Gemini only supports homogeneous lists: \"\n            \"list[BaseModel], list[TypedDict] or list[dataclass]. \"\n            f\"Got {output_type} instead.\"\n        )\n\n\nclass Gemini(Model):\n    \"\"\"Thin wrapper around the `google.genai.Client` client.\n\n    This wrapper is used to convert the input and output types specified by\n    the users at a higher level to arguments to the `google.genai.Client`\n    client.\n\n    \"\"\"\n\n    def __init__(self, client: \"Client\", model_name: Optional[str] = None):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A `google.genai.Client` instance.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = GeminiTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs,\n    ) -> str:\n        \"\"\"Generate a response from the model.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema, a list of such types, or a multiple choice type.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The response generated by the model.\n\n        \"\"\"\n        contents = self.type_adapter.format_input(model_input)\n        generation_config = self.type_adapter.format_output_type(output_type)\n\n        completion = self.client.models.generate_content(\n            **contents,\n            model=inference_kwargs.pop(\"model\", self.model_name),\n            config={**generation_config, **inference_kwargs}\n        )\n\n        return completion.text\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"Gemini does not support batch generation.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs,\n    ) -> Iterator[str]:\n        \"\"\"Generate a stream of responses from the model.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema, a list of such types, or a multiple choice type.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        contents = self.type_adapter.format_input(model_input)\n        generation_config = self.type_adapter.format_output_type(output_type)\n\n        stream = self.client.models.generate_content_stream(\n            **contents,\n            model=inference_kwargs.pop(\"model\", self.model_name),\n            config={**generation_config, **inference_kwargs},\n        )\n\n        for chunk in stream:\n            if hasattr(chunk, \"text\") and chunk.text:\n                yield chunk.text\n\n\ndef from_gemini(client: \"Client\", model_name: Optional[str] = None) -> Gemini:\n    \"\"\"Create an Outlines `Gemini` model instance from a\n    `google.genai.Client` instance.\n\n    Parameters\n    ----------\n    client\n        A `google.genai.Client` instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Gemini\n        An Outlines `Gemini` model instance.\n\n    \"\"\"\n    return Gemini(client, model_name)\n"
  },
  {
    "path": "outlines/models/llamacpp.py",
    "content": "\"\"\"Integration with the `llama-cpp-python` library.\"\"\"\n\nimport ctypes\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Dict,\n    Iterator,\n    List,\n    Optional,\n    Set,\n    Tuple,\n    Union,\n)\n\nfrom outlines.inputs import Chat\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.models.tokenizer import Tokenizer\nfrom outlines.processors import OutlinesLogitsProcessor\n\nif TYPE_CHECKING:\n    from llama_cpp import Llama, LogitsProcessorList\n\n__all__ = [\"LlamaCpp\", \"from_llamacpp\"]\n\n\nclass LlamaCppTokenizer(Tokenizer):\n    def __init__(self, model: \"Llama\"):\n        self.tokenizer = model.tokenizer()\n        self.special_tokens: Set[str] = set()\n        self.vocabulary: Dict[str, int] = dict()\n\n        # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613\n        # is resolved\n        self._hf_tokenizer = None\n        if (\n            hasattr(model, \"tokenizer_\")\n            and hasattr(model.tokenizer_, \"hf_tokenizer\")\n        ):\n            self._hf_tokenizer = model.tokenizer_.hf_tokenizer\n            self.eos_token_id = self._hf_tokenizer.eos_token_id\n            self.eos_token = self._hf_tokenizer.eos_token\n            self.vocabulary = self._hf_tokenizer.get_vocab()\n        else:\n            from llama_cpp import (\n                llama_model_get_vocab,\n                llama_token_to_piece,\n            )\n\n            self.eos_token_id = model.token_eos()\n            size = 32\n            buffer = (ctypes.c_char * size)()\n            vocab = llama_model_get_vocab(model.model)\n            for i in range(model.n_vocab()):\n                n = llama_token_to_piece(\n                    vocab,\n                    i,\n                    buffer,\n                    size,\n                    0,\n                    True\n                )\n                # n < 0 is an error return from llama_token_to_piece;\n                # skip invalid tokens so they don't pollute the vocabulary.\n                if n < 0:\n                    continue\n                # n > size means the piece was truncated; retry with a\n                # larger buffer so distinct tokens are not collapsed.\n                if n > size:\n                    big = (ctypes.c_char * n)()\n                    llama_token_to_piece(vocab, i, big, n, 0, True)\n                    token_piece = big[:n].decode(\"utf-8\", errors=\"replace\")  # type: ignore\n                else:\n                    token_piece = buffer[:n].decode(\"utf-8\", errors=\"replace\")  # type: ignore\n                self.vocabulary[token_piece] = i\n                if i == self.eos_token_id:\n                    self.eos_token = token_piece\n\n        self.pad_token_id = self.eos_token_id\n        # ensure stable ordering of vocabulary\n        self.vocabulary = {\n            tok: tok_id\n            for tok, tok_id\n            in sorted(self.vocabulary.items(), key=lambda x: x[1])\n        }\n        self._hash = None\n\n    def decode(self, token_ids: List[int]) -> List[str]:\n        decoded_bytes = self.tokenizer.detokenize(token_ids)\n        return [decoded_bytes.decode(\"utf-8\", errors=\"ignore\")]\n\n    def encode(\n        self,\n        prompt: Union[str, List[str]],\n        add_bos: bool = True,\n        special: bool = True,\n    ) -> Tuple[List[int], List[int]]:\n        if isinstance(prompt, list):\n            raise NotImplementedError(\n                \"llama-cpp-python tokenizer doesn't support batch tokenization\"\n            )\n        token_ids = self.tokenizer.tokenize(\n            prompt.encode(\"utf-8\", errors=\"ignore\"),\n            add_bos=add_bos,\n            special=special,\n        )\n        # generate attention mask, missing from llama-cpp-python.\n        # For a single (non-batched) prompt there is no real padding, so\n        # every token — including EOS when it appears inside the prompt —\n        # should be attended.  We therefore always set the mask to 1.\n        attention_mask = [1] * len(token_ids)\n        return token_ids, attention_mask\n\n    def convert_token_to_string(self, token: str) -> str:\n        if self._hf_tokenizer is not None:\n            from transformers.file_utils import SPIECE_UNDERLINE\n\n            token_str = self._hf_tokenizer.convert_tokens_to_string([token])\n            if (\n                token.startswith(SPIECE_UNDERLINE)\n                or token == \"<0x20>\"\n            ):  # pragma: no cover\n                token_str = \" \" + token_str\n            return token_str\n        else:\n            return token\n\n    def __eq__(self, other):\n        if not isinstance(other, LlamaCppTokenizer):\n            return False\n        return self.__getstate__() == other.__getstate__()\n\n    def __hash__(self):\n        # We create a custom hash as pickle.dumps(self) is not stable\n        if self._hash is None:\n            self._hash = hash((\n                tuple(sorted(self.vocabulary.items())),\n                self.eos_token_id,\n                self.eos_token,\n                self.pad_token_id,\n                tuple(sorted(self.special_tokens)),\n            ))\n        return self._hash\n\n    def __getstate__(self):\n        \"\"\"Create a stable representation for outlines.caching\"\"\"\n        return (\n            self.vocabulary,\n            self.eos_token_id,\n            self.eos_token,\n            self.pad_token_id,\n            sorted(self.special_tokens),\n        )\n\n    def __setstate__(self, state):\n        raise NotImplementedError(\"Cannot load a pickled llamacpp tokenizer\")\n\n\nclass LlamaCppTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `LlamaCpp` model.\n\n    `LlamaCppTypeAdapter` is responsible for preparing the arguments to\n    the `Llama` object text generation methods.\n\n    \"\"\"\n\n    def __init__(self, has_chat_template: bool = False):\n        \"\"\"\n        Parameters\n        ----------\n        has_chat_template\n            Whether the model has a chat template defined.\n        \"\"\"\n        self.has_chat_template = has_chat_template\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the prompt argument to pass to the model.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        str\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        raise NotImplementedError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"LlamaCpp. The only available types are `str` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_input(self, model_input: str) -> str | list:\n        if self.has_chat_template:\n            return [{\"role\": \"user\", \"content\": model_input}]\n        return model_input\n\n    @format_input.register(Chat)\n    def format_chat_input(self, model_input: Chat) -> list:\n        if not all(\n            isinstance(message[\"content\"], str)\n            for message in model_input.messages\n        ):\n            raise ValueError(\n                \"LlamaCpp does not support multi-modal messages.\"\n                + \"The content of each message must be a string.\"\n            )\n\n        return  [\n            {\n                \"role\": message[\"role\"],\n                \"content\": message[\"content\"],\n            }\n            for message in model_input.messages\n        ]\n\n    def format_output_type(\n        self, output_type: Optional[OutlinesLogitsProcessor] = None,\n    ) -> Optional[\"LogitsProcessorList\"]:\n        \"\"\"Generate the logits processor argument to pass to the model.\n\n        Parameters\n        ----------\n        output_type\n            The logits processor provided.\n\n        Returns\n        -------\n        LogitsProcessorList\n            The logits processor to pass to the model.\n\n        \"\"\"\n        from llama_cpp import LogitsProcessorList\n\n        if output_type is not None:\n            return LogitsProcessorList([output_type])\n        return None\n\n\nclass LlamaCpp(Model):\n    \"\"\"Thin wrapper around the `llama_cpp.Llama` model.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `llama_cpp.Llama` model.\n    \"\"\"\n\n    tensor_library_name = \"numpy\"\n\n    def __init__(self, model: \"Llama\", chat_mode: bool = True):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            A `llama_cpp.Llama` model instance.\n        chat_mode\n            Whether to enable chat mode. If `False`, the model will regard\n            all `str` inputs as plain text prompts. If `True`, the model will\n            regard all `str` inputs as user messages in a chat conversation.\n\n        \"\"\"\n        self.model = model\n        self.tokenizer = LlamaCppTokenizer(self.model)\n\n        # Note: llama-cpp-python provides a default chat-template fallback even when\n        # the user hasn't explicitly configured one:\n        # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545\n        # We keep the default as True because the upstream library generally favors chat-style usage.\n        self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode)\n\n    def generate(\n        self,\n        model_input: Union[Chat, str],\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **inference_kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using `llama-cpp-python`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        **inference_kwargs\n            Additional keyword arguments to pass to the `Llama.__call__`\n            method of the `llama-cpp-python` library.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        prompt = self.type_adapter.format_input(model_input)\n\n        if isinstance(prompt, str):\n            completion = self.model(\n                prompt,\n                logits_processor=self.type_adapter.format_output_type(output_type),\n                **inference_kwargs,\n            )\n            result = completion[\"choices\"][0][\"text\"]\n        elif isinstance(prompt, list):\n            completion = self.model.create_chat_completion(\n                prompt,\n                logits_processor=self.type_adapter.format_output_type(output_type),\n                **inference_kwargs,\n            )\n            result = completion[\"choices\"][0][\"message\"][\"content\"]\n        else:  # Never reached  # pragma: no cover\n            raise ValueError(\"Unexpected prompt type.\")\n\n        self.model.reset()\n\n        return result\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\"LlamaCpp does not support batch generation.\")\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, str],\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **inference_kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using `llama-cpp-python`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        **inference_kwargs\n            Additional keyword arguments to pass to the `Llama.__call__`\n            method of the `llama-cpp-python` library.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        prompt = self.type_adapter.format_input(model_input)\n\n        if isinstance(prompt, str):\n            generator = self.model(\n                prompt,\n                logits_processor=self.type_adapter.format_output_type(output_type),\n                stream=True,\n                **inference_kwargs,\n            )\n            for chunk in generator:\n                yield chunk[\"choices\"][0][\"text\"]\n\n        elif isinstance(prompt, list):\n            generator = self.model.create_chat_completion(\n                prompt,\n                logits_processor=self.type_adapter.format_output_type(output_type),\n                stream=True,\n                **inference_kwargs,\n            )\n            for chunk in generator:\n                yield chunk[\"choices\"][0][\"delta\"].get(\"content\", \"\")\n        else:  # Never reached  # pragma: no cover\n            raise ValueError(\"Unexpected prompt type.\")\n\ndef from_llamacpp(model: \"Llama\", chat_mode: bool = True) -> LlamaCpp:\n    \"\"\"Create an Outlines `LlamaCpp` model instance from a\n    `llama_cpp.Llama` instance.\n\n    Parameters\n    ----------\n    model\n        A `llama_cpp.Llama` instance.\n    chat_mode\n        Whether to enable chat mode. If `False`, the model will regard\n        all `str` inputs as plain text prompts. If `True`, the model will\n        regard all `str` inputs as user messages in a chat conversation.\n\n    Returns\n    -------\n    LlamaCpp\n        An Outlines `LlamaCpp` model instance.\n\n    \"\"\"\n    return LlamaCpp(model, chat_mode=chat_mode)\n"
  },
  {
    "path": "outlines/models/lmstudio.py",
    "content": "\"\"\"Integration with the `lmstudio` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    AsyncIterator,\n    Iterator,\n    Optional,\n    Union,\n    cast,\n)\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.base import AsyncModel, Model, ModelTypeAdapter\nfrom outlines.types import CFG, JsonSchema, Regex\n\nif TYPE_CHECKING:\n    from lmstudio import AsyncClient, Chat as LMStudioChat, Client\n\n__all__ = [\"LMStudio\", \"AsyncLMStudio\", \"from_lmstudio\"]\n\n\nclass LMStudioTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `LMStudio` model.\"\"\"\n\n    def _prepare_lmstudio_image(self, image: Image):\n        \"\"\"Convert Outlines Image to LMStudio image handle.\n\n        LMStudio's SDK only accepts file paths, raw bytes, or binary IO objects.\n        Unlike Ollama which accepts base64 directly, we must decode from base64.\n        \"\"\"\n        import base64\n\n        import lmstudio as lms\n\n        image_bytes = base64.b64decode(image.image_str)\n        return lms.prepare_image(image_bytes)\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Format input for LMStudio model.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        str | LMStudioChat\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"LMStudio. The only available types are `str`, `list` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> str:\n        \"\"\"Pass through string input directly to LMStudio.\"\"\"\n        return model_input\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> \"LMStudioChat\":\n        \"\"\"Handle list input containing prompt and images.\"\"\"\n        from lmstudio import Chat as LMSChat\n\n        prompt = model_input[0]\n        images = model_input[1:]\n\n        if not all(isinstance(img, Image) for img in images):\n            raise ValueError(\"All assets provided must be of type Image\")\n\n        chat = LMSChat()\n        image_handles = [self._prepare_lmstudio_image(img) for img in images]\n        chat.add_user_message(prompt, images=image_handles)\n        return chat\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> \"LMStudioChat\":\n        \"\"\"Convert Outlines Chat to LMStudio Chat with image support.\"\"\"\n        from lmstudio import Chat as LMSChat\n\n        system_prompt = None\n        messages = model_input.messages\n\n        if messages and messages[0][\"role\"] == \"system\":\n            system_prompt = messages[0][\"content\"]\n            messages = messages[1:]\n\n        chat = LMSChat(system_prompt) if system_prompt else LMSChat()\n\n        for message in messages:\n            role = message[\"role\"]\n            content = message[\"content\"]\n\n            if role == \"user\":\n                if isinstance(content, str):\n                    chat.add_user_message(content)\n                elif isinstance(content, list):\n                    prompt = content[0]\n                    images = content[1:]\n                    if not all(isinstance(img, Image) for img in images):\n                        raise ValueError(\"All assets provided must be of type Image\")\n                    image_handles = [self._prepare_lmstudio_image(img) for img in images]\n                    chat.add_user_message(prompt, images=image_handles)\n                else:\n                    raise ValueError(\n                        f\"Invalid content type: {type(content)}. \"\n                        \"The content must be a string or a list containing a string \"\n                        \"and a list of images.\"\n                    )\n            elif role == \"assistant\":\n                chat.add_assistant_response(content)\n            else:\n                raise ValueError(f\"Unsupported role: {role}\")\n\n        return chat\n\n    def format_output_type(\n        self, output_type: Optional[Any] = None\n    ) -> Optional[dict]:\n        \"\"\"Format the output type to pass to the model.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        Optional[dict]\n            The formatted output type (JSON schema) to be passed to the model.\n\n        \"\"\"\n        if output_type is None:\n            return None\n        elif isinstance(output_type, Regex):\n            raise TypeError(\n                \"Regex-based structured outputs are not supported by LMStudio. \"\n                \"Use an open source model in the meantime.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs are not supported by LMStudio. \"\n                \"Use an open source model in the meantime.\"\n            )\n        elif JsonSchema.is_json_schema(output_type):\n            return cast(dict, JsonSchema.convert_to(output_type, [\"dict\"]))\n        else:\n            type_name = getattr(output_type, \"__name__\", output_type)\n            raise TypeError(\n                f\"The type `{type_name}` is not supported by LMStudio. \"\n                \"Consider using a local model instead.\"\n            )\n\n\nclass LMStudio(Model):\n    \"\"\"Thin wrapper around a `lmstudio.Client` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the LMStudio client.\n\n    \"\"\"\n\n    def __init__(self, client: \"Client\", model_name: Optional[str] = None):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A LMStudio Client instance obtained via `lmstudio.Client()` or\n            `lmstudio.get_default_client()`.\n        model_name\n            The name of the model to use. If not provided, uses the default\n            loaded model in LMStudio.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = LMStudioTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using LMStudio.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        model_key = kwargs.pop(\"model\", None)\n        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()\n\n        formatted_input = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if response_format is not None:\n            kwargs[\"response_format\"] = response_format\n\n        result = model.respond(formatted_input, **kwargs)\n        return result.content\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type=None,\n        **kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `lmstudio` library does not support batch inference.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using LMStudio.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        model_key = kwargs.pop(\"model\", None)\n        model = self.client.llm.model(model_key) if model_key else self.client.llm.model()\n\n        formatted_input = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if response_format is not None:\n            kwargs[\"response_format\"] = response_format\n\n        stream = model.respond_stream(formatted_input, **kwargs)\n        for fragment in stream:\n            yield fragment.content\n\n\nclass AsyncLMStudio(AsyncModel):\n    \"\"\"Thin wrapper around a `lmstudio.AsyncClient` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the LMStudio async client.\n\n    \"\"\"\n\n    def __init__(\n        self, client: \"AsyncClient\", model_name: Optional[str] = None\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A LMStudio AsyncClient instance.\n        model_name\n            The name of the model to use. If not provided, uses the default\n            loaded model in LMStudio.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = LMStudioTypeAdapter()\n        self._context_entered = False\n\n    async def close(self) -> None:\n        \"\"\"Close the async client and release resources.\"\"\"\n        if self._context_entered:\n            await self.client.__aexit__(None, None, None)\n            self._context_entered = False\n\n    async def generate(\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using LMStudio asynchronously.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        if not self._context_entered:\n            await self.client.__aenter__()\n            self._context_entered = True\n\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        model_key = kwargs.pop(\"model\", None)\n        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()\n\n        formatted_input = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if response_format is not None:\n            kwargs[\"response_format\"] = response_format\n\n        result = await model.respond(formatted_input, **kwargs)\n        return result.content\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type=None,\n        **kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `lmstudio` library does not support batch inference.\"\n        )\n\n    async def generate_stream(  # type: ignore\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> AsyncIterator[str]:\n        \"\"\"Stream text using LMStudio asynchronously.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the model.\n\n        Returns\n        -------\n        AsyncIterator[str]\n            An async iterator that yields the text generated by the model.\n\n        \"\"\"\n        if not self._context_entered:\n            await self.client.__aenter__()\n            self._context_entered = True\n\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        model_key = kwargs.pop(\"model\", None)\n        model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model()\n\n        formatted_input = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if response_format is not None:\n            kwargs[\"response_format\"] = response_format\n\n        stream = await model.respond_stream(formatted_input, **kwargs)\n        async for fragment in stream:\n            yield fragment.content\n\n\ndef from_lmstudio(\n    client: Union[\"Client\", \"AsyncClient\"],\n    model_name: Optional[str] = None,\n) -> Union[LMStudio, AsyncLMStudio]:\n    \"\"\"Create an Outlines `LMStudio` model instance from a\n    `lmstudio.Client` or `lmstudio.AsyncClient` instance.\n\n    Parameters\n    ----------\n    client\n        A `lmstudio.Client` or `lmstudio.AsyncClient` instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Union[LMStudio, AsyncLMStudio]\n        An Outlines `LMStudio` or `AsyncLMStudio` model instance.\n\n    \"\"\"\n    from lmstudio import AsyncClient, Client\n\n    if isinstance(client, Client):\n        return LMStudio(client, model_name)\n    elif isinstance(client, AsyncClient):\n        return AsyncLMStudio(client, model_name)\n    else:\n        raise ValueError(\n            \"Invalid client type, the client must be an instance of \"\n            \"`lmstudio.Client` or `lmstudio.AsyncClient`.\"\n        )\n"
  },
  {
    "path": "outlines/models/mistral.py",
    "content": "\"\"\"Integration with Mistral AI API.\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    Iterator,\n    List,\n    Dict,\n    Optional,\n    Union,\n)\n\nfrom pydantic import TypeAdapter\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.base import AsyncModel, Model, ModelTypeAdapter\nfrom outlines.models.utils import set_additional_properties_false_json_schema\nfrom outlines.types import JsonSchema, Regex, CFG\nfrom outlines.types.utils import (\n    is_dataclass,\n    is_genson_schema_builder,\n    is_native_dict,\n    is_pydantic_model,\n    is_typed_dict,\n)\n\nif TYPE_CHECKING:\n    from mistralai import Mistral as MistralClient\n\n__all__ = [\"AsyncMistral\", \"Mistral\", \"from_mistral\"]\n\n\nclass MistralTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Mistral` model.\n\n    Prepares arguments for Mistral's client `chat.complete`,\n    `chat.complete_async`, or `chat.stream` methods. Handles input (prompt or\n    chat messages) and output type (JSON schema types).\n    \"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the `messages` argument to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        list\n            The `messages` argument to pass to the client.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"Mistral. The only available types are `str`, `list` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> list:\n        \"\"\"Format a string input into a list of messages.\n\n        Parameters\n        ----------\n        model_input : str\n            The input string prompt.\n\n        Returns\n        -------\n        list\n            A list of Mistral message objects.\n\n        \"\"\"\n        from mistralai import UserMessage\n\n        return [UserMessage(content=model_input)]\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> list:\n        \"\"\"Format a list input into a list of messages.\n\n        Parameters\n        ----------\n        model_input : list\n            The input list, containing a string prompt and optionally Image\n            objects (vision models only).\n\n        Returns\n        -------\n        list\n            A list of Mistral message objects.\n\n        \"\"\"\n        from mistralai import UserMessage\n\n        return [UserMessage(content=self._create_message_content(model_input))]\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> list:\n        \"\"\"Format a Chat input into a list of messages.\n\n        Parameters\n        ----------\n        model_input : Chat\n            The Chat object containing a list of message dictionaries.\n\n        Returns\n        -------\n        list\n            A list of Mistral message objects.\n\n        \"\"\"\n        from mistralai import UserMessage, AssistantMessage, SystemMessage\n\n        messages = []\n\n        for message in model_input.messages:\n            role = message[\"role\"]\n            content = message[\"content\"]\n            if role == \"user\":\n                messages.append(\n                    UserMessage(content=self._create_message_content(content))\n                )\n            elif role == \"assistant\":\n                messages.append(AssistantMessage(content=content))\n            elif role == \"system\":\n                messages.append(SystemMessage(content=content))\n            else:\n                raise ValueError(f\"Unsupported role: {role}\")\n\n        return messages\n\n    def _create_message_content(\n        self, content: Union[str, list]\n    ) -> Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]:\n        \"\"\"Create message content from an input.\n\n        Parameters\n        ----------\n        content : Union[str, list]\n            The content to format, either a string or a list containing a\n            string and optionally Image objects.\n\n        Returns\n        -------\n        Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]\n            The formatted content, either a string or a list of content parts\n            (text and image URLs).\n\n        \"\"\"\n        if isinstance(content, str):\n            return content\n        elif isinstance(content, list):\n            if not content:\n                raise ValueError(\"Content list cannot be empty.\")\n            if not isinstance(content[0], str):\n                raise ValueError(\n                    \"The first item in the list should be a string.\"\n                )\n            if len(content) == 1:\n                return content[0]\n            content_parts: List[Dict[str, Union[str, Dict[str, str]]]] = [\n                {\"type\": \"text\", \"text\": content[0]}\n            ]\n            for item in content[1:]:\n                if isinstance(item, Image):\n                    data_url = f\"data:{item.image_format};base64,{item.image_str}\"\n                    content_parts.append({\n                        \"type\": \"image_url\",\n                        \"image_url\": {\"url\": data_url}\n                    })\n                else:\n                    raise ValueError(\n                        f\"Invalid item type in content list: {type(item)}. \"\n                        + \"Expected Image objects after the first string.\"\n                    )\n            return content_parts\n        else:\n            raise TypeError(\n                f\"Invalid content type: {type(content)}. \"\n                + \"Content must be a string or a list starting with a string \"\n                + \"followed by optional Image objects.\"\n            )\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the `response_format` argument to pass to the client.\n\n        Parameters\n        ----------\n        output_type : Optional[Any]\n            The desired output type provided by the user.\n\n        Returns\n        -------\n        dict\n            The `response_format` dict to pass to the client.\n\n        \"\"\"\n        if output_type is None:\n            return {}\n\n        # JSON schema types\n        elif is_pydantic_model(output_type):\n            schema = output_type.model_json_schema()\n            return self.format_json_schema_type(schema, output_type.__name__)\n        elif is_dataclass(output_type):\n            schema = TypeAdapter(output_type).json_schema()\n            return self.format_json_schema_type(schema, output_type.__name__)\n        elif is_typed_dict(output_type):\n            schema = TypeAdapter(output_type).json_schema()\n            return self.format_json_schema_type(schema, output_type.__name__)\n        elif is_genson_schema_builder(output_type):\n            schema = json.loads(output_type.to_json())\n            return self.format_json_schema_type(schema)\n        elif isinstance(output_type, JsonSchema):\n            return self.format_json_schema_type(json.loads(output_type.schema))\n\n        # Json mode\n        elif is_native_dict(output_type):\n            return {\"type\": \"json_object\"}\n\n        # Unsupported types\n        elif isinstance(output_type, Regex):\n            raise TypeError(\n                \"Regex-based structured outputs are not available with \"\n                \"Mistral.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs are not available with Mistral.\"\n            )\n        else:\n            type_name = getattr(output_type, \"__name__\", str(output_type))\n            raise TypeError(\n                f\"The type {type_name} is not available with Mistral.\"\n            )\n\n    def format_json_schema_type(\n        self, schema: dict, schema_name: str = \"default\"\n    ) -> dict:\n        \"\"\"Create the `response_format` argument to pass to the client from a\n        JSON schema dictionary.\n\n        Parameters\n        ----------\n        schema : dict\n            The JSON schema to format.\n        schema_name : str\n            The name of the schema.\n\n        Returns\n        -------\n        dict\n            The value of the `response_format` argument to pass to the client.\n\n        \"\"\"\n        schema = set_additional_properties_false_json_schema(schema)\n\n        return {\n            \"type\": \"json_schema\",\n            \"json_schema\": {\n                \"schema\": schema,\n                \"name\": schema_name.lower(),\n                \"strict\": True\n            }\n        }\n\n\nclass Mistral(Model):\n    \"\"\"Thin wrapper around the `mistralai.Mistral` client.\n\n    Converts input and output types to arguments for the `mistralai.Mistral`\n    client's `chat.complete` or `chat.stream` methods.\n\n    \"\"\"\n\n    def __init__(\n        self, client: \"MistralClient\", model_name: Optional[str] = None\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client : MistralClient\n            A mistralai.Mistral client instance.\n        model_name : Optional[str]\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = MistralTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate a response from the model.\n\n        Parameters\n        ----------\n        model_input : Union[Chat, list, str]\n            The prompt or chat messages to generate a response from.\n        output_type : Optional[Any]\n            The desired format of the response (e.g., JSON schema).\n        **inference_kwargs : Any\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The response generated by the model as text.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            result = self.client.chat.complete(\n                messages=messages,\n                response_format=response_format,\n                **inference_kwargs,\n            )\n        except Exception as e:\n            if \"schema\" in str(e).lower() or \"json_schema\" in str(e).lower():\n                raise TypeError(\n                    f\"Mistral does not support your schema: {e}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise RuntimeError(f\"Mistral API error: {e}\") from e\n\n        outputs = [choice.message for choice in result.choices]\n\n        if len(outputs) == 1:\n            return outputs[0].content\n        else:\n            return [m.content for m in outputs]\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type=None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `mistralai` library does not support batch inference.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs,\n    ) -> Iterator[str]:\n        \"\"\"Generate a stream of responses from the model.\n\n        Parameters\n        ----------\n        model_input : Union[Chat, list, str]\n            The prompt or chat messages to generate a response from.\n        output_type : Optional[Any]\n            The desired format of the response (e.g., JSON schema).\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text chunks generated by the model.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            stream = self.client.chat.stream(\n                messages=messages,\n                response_format=response_format,\n                **inference_kwargs\n            )\n        except Exception as e:\n            if \"schema\" in str(e).lower() or \"json_schema\" in str(e).lower():\n                raise TypeError(\n                    f\"Mistral does not support your schema: {e}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise RuntimeError(f\"Mistral API error: {e}\") from e\n\n        for chunk in stream:\n            if (\n                hasattr(chunk, \"data\")\n                and chunk.data.choices\n                and chunk.data.choices[0].delta.content is not None\n            ):\n                yield chunk.data.choices[0].delta.content\n\n\nclass AsyncMistral(AsyncModel):\n    \"\"\"Async thin wrapper around the `mistralai.Mistral` client.\n\n    Converts input and output types to arguments for the `mistralai.Mistral`\n    client's async methods (`chat.complete_async` or `chat.stream_async`).\n\n    \"\"\"\n\n    def __init__(\n        self, client: \"MistralClient\", model_name: Optional[str] = None\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client : MistralClient\n            A mistralai.Mistral client instance.\n        model_name : Optional[str]\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = MistralTypeAdapter()\n\n    async def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate a response from the model asynchronously.\n\n        Parameters\n        ----------\n        model_input : Union[Chat, list, str]\n            The prompt or chat messages to generate a response from.\n        output_type : Optional[Any]\n            The desired format of the response (e.g., JSON schema).\n        **inference_kwargs : Any\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The response generated by the model as text.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            result = await self.client.chat.complete_async(\n                messages=messages,\n                response_format=response_format,\n                stream=False,\n                **inference_kwargs,\n            )\n        except Exception as e:\n            if \"schema\" in str(e).lower() or \"json_schema\" in str(e).lower():\n                raise TypeError(\n                    f\"Mistral does not support your schema: {e}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise RuntimeError(f\"Mistral API error: {e}\") from e\n\n        outputs = [choice.message for choice in result.choices]\n\n        if len(outputs) == 1:\n            return outputs[0].content\n        else:\n            return [m.content for m in outputs]\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type=None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"The mistralai library does not support batch inference.\"\n        )\n\n    async def generate_stream(\n        self,\n        model_input,\n        output_type=None,\n        **inference_kwargs,\n    ):\n        \"\"\"Generate text from the model as an async stream of chunks.\n\n        Parameters\n        ----------\n        model_input\n            str, list, or chat input to generate from.\n        output_type\n            Optional type for structured output.\n        **inference_kwargs\n            Extra kwargs like \"model\" name.\n\n        Yields\n        ------\n        str\n            Chunks of text as they are streamed.\n\n        \"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            response = await self.client.chat.stream_async(\n                messages=messages,\n                response_format=response_format,\n                **inference_kwargs\n            )\n        except Exception as e:\n            if \"schema\" in str(e).lower() or \"json_schema\" in str(e).lower():\n                raise TypeError(\n                    f\"Mistral does not support your schema: {e}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise RuntimeError(f\"Mistral API error: {e}\") from e\n\n        async for chunk in response:\n            if (\n                hasattr(chunk, \"data\")\n                and chunk.data.choices\n                and len(chunk.data.choices) > 0\n                and hasattr(chunk.data.choices[0], \"delta\")\n                and chunk.data.choices[0].delta.content is not None\n            ):\n                yield chunk.data.choices[0].delta.content\n\n\ndef from_mistral(\n    client: \"MistralClient\",\n    model_name: Optional[str] = None,\n    async_client: bool = False,\n) -> Union[Mistral, AsyncMistral]:\n    \"\"\"Create an Outlines Mistral model instance from a mistralai.Mistral\n    client.\n\n    Parameters\n    ----------\n    client : MistralClient\n        A mistralai.Mistral client instance.\n    model_name : Optional[str]\n        The name of the model to use.\n    async_client : bool\n        If True, return an AsyncMistral instance;\n        otherwise, return a Mistral instance.\n\n    Returns\n    -------\n    Union[Mistral, AsyncMistral]\n        An Outlines Mistral or AsyncMistral model instance.\n\n    \"\"\"\n    from mistralai import Mistral as MistralClient\n\n    if not isinstance(client, MistralClient):\n        raise ValueError(\n            \"Invalid client type. The client must be an instance of \"\n            \"`mistralai.Mistral`.\"\n        )\n\n    if async_client:\n        return AsyncMistral(client, model_name)\n    else:\n        return Mistral(client, model_name)\n"
  },
  {
    "path": "outlines/models/mlxlm.py",
    "content": "\"\"\"Integration with the `mlx_lm` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKING, Iterator, List, Optional\n\nfrom outlines.inputs import Chat\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.models.tokenizer import _check_hf_chat_template\nfrom outlines.models.transformers import TransformerTokenizer\nfrom outlines.processors import OutlinesLogitsProcessor\n\nif TYPE_CHECKING:\n    import mlx.nn as nn\n    from transformers import PreTrainedTokenizer\n\n__all__ = [\"MLXLM\", \"from_mlxlm\"]\n\n\nclass MLXLMTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `MLXLM` model.\"\"\"\n\n    def __init__(self, tokenizer: \"PreTrainedTokenizer\", has_chat_template: bool = False):\n        self.tokenizer = tokenizer\n        self.has_chat_template = has_chat_template\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the prompt argument to pass to the model.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        str\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        raise NotImplementedError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"mlx-lm. The available types are `str` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_input(self, model_input: str) -> str:\n        if self.has_chat_template:\n            return self.format_chat_input(Chat([{\"role\": \"user\", \"content\": model_input}]))\n        return model_input\n\n    @format_input.register(Chat)\n    def format_chat_input(self, model_input: Chat) -> str:\n        if not all(\n            isinstance(message[\"content\"], str)\n            for message in model_input.messages\n        ):\n            raise ValueError(\n                \"mlx-lm does not support multi-modal messages.\"\n                + \"The content of each message must be a string.\"\n            )\n\n        return self.tokenizer.apply_chat_template(\n            model_input.messages,\n            tokenize=False,\n            add_generation_prompt=True,\n        )\n\n    def format_output_type(\n        self, output_type: Optional[OutlinesLogitsProcessor] = None,\n    ) -> Optional[List[OutlinesLogitsProcessor]]:\n        \"\"\"Generate the logits processor argument to pass to the model.\n\n        Parameters\n        ----------\n        output_type\n            The logits processor provided.\n\n        Returns\n        -------\n        Optional[list[OutlinesLogitsProcessor]]\n            The logits processor argument to be passed to the model.\n\n        \"\"\"\n        if not output_type:\n            return None\n        return [output_type]\n\n\nclass MLXLM(Model):\n    \"\"\"Thin wrapper around an `mlx_lm` model.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `mlx_lm` library.\n\n    \"\"\"\n\n    tensor_library_name = \"mlx\"\n\n    def __init__(\n        self,\n        model: \"nn.Module\",\n        tokenizer: \"PreTrainedTokenizer\",\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        model\n            An instance of an `mlx_lm` model.\n        tokenizer\n            An instance of an `mlx_lm` tokenizer or of a compatible\n            `transformers` tokenizer.\n\n        \"\"\"\n        self.model = model\n        # self.mlx_tokenizer is used by the mlx-lm in its generate function\n        self.mlx_tokenizer = tokenizer\n        # self.tokenizer is used by the logits processor\n        self.tokenizer = TransformerTokenizer(tokenizer._tokenizer)\n        self.type_adapter = MLXLMTypeAdapter(\n            tokenizer=tokenizer,\n            has_chat_template=_check_hf_chat_template(tokenizer)\n        )\n\n    def generate(\n        self,\n        model_input: str,\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **kwargs,\n    ) -> str:\n        \"\"\"Generate text using `mlx-lm`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        kwargs\n            Additional keyword arguments to pass to the `mlx-lm` library.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        from mlx_lm import generate\n\n        return generate(\n            self.model,\n            self.mlx_tokenizer,\n            self.type_adapter.format_input(model_input),\n            logits_processors=self.type_adapter.format_output_type(output_type),\n            **kwargs,\n        )\n\n    def generate_batch(\n        self,\n        model_input: list[str],\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **kwargs,\n    ) -> list[str]:\n        \"\"\"Generate a batch of text using `mlx-lm`.\n\n        Parameters\n        ----------\n        model_input\n            The list of prompts based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        kwargs\n            Additional keyword arguments to pass to the `mlx-lm` library.\n\n        Returns\n        -------\n        list[str]\n            The list of text generated by the model.\n\n        \"\"\"\n        from mlx_lm import batch_generate\n\n        if output_type:\n            raise NotImplementedError(\n                \"mlx-lm does not support constrained generation with batching.\"\n                + \"You cannot provide an `output_type` with this method.\"\n            )\n\n        model_input = [self.type_adapter.format_input(item) for item in model_input]\n\n        # Contrarily to the other generate methods, batch_generate requires\n        # tokenized prompts\n        add_special_tokens = [\n            (\n                self.mlx_tokenizer.bos_token is None\n                or not prompt.startswith(self.mlx_tokenizer.bos_token)\n            )\n            for prompt in model_input\n        ]\n        tokenized_model_input = [\n            self.mlx_tokenizer.encode(\n                model_input[i], add_special_tokens=add_special_tokens[i]\n            )\n            for i in range(len(model_input))\n        ]\n\n        response = batch_generate(\n            self.model,\n            self.mlx_tokenizer,\n            tokenized_model_input,\n            **kwargs,\n        )\n\n        return response.texts\n\n    def generate_stream(\n        self,\n        model_input: str,\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **kwargs,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using `mlx-lm`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        kwargs\n            Additional keyword arguments to pass to the `mlx-lm` library.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        from mlx_lm import stream_generate\n\n        for gen_response in stream_generate(\n            self.model,\n            self.mlx_tokenizer,\n            self.type_adapter.format_input(model_input),\n            logits_processors=self.type_adapter.format_output_type(output_type),\n            **kwargs,\n        ):\n            yield gen_response.text\n\n\ndef from_mlxlm(model: \"nn.Module\", tokenizer: \"PreTrainedTokenizer\") -> MLXLM:\n    \"\"\"Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a\n    tokenizer.\n\n    Parameters\n    ----------\n    model\n        An instance of an `mlx_lm` model.\n    tokenizer\n        An instance of an `mlx_lm` tokenizer or of a compatible\n        transformers tokenizer.\n\n    Returns\n    -------\n    MLXLM\n        An Outlines `MLXLM` model instance.\n\n    \"\"\"\n    return MLXLM(model, tokenizer)\n"
  },
  {
    "path": "outlines/models/ollama.py",
    "content": "\"\"\"Integration with the `ollama` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    AsyncIterator,\n    Iterator,\n    Optional,\n    Union,\n    cast,\n)\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.base import AsyncModel, Model, ModelTypeAdapter\nfrom outlines.types import CFG, JsonSchema, Regex\n\nif TYPE_CHECKING:\n    from ollama import Client\n    from ollama import AsyncClient\n\n__all__ = [\"AsyncOllama\", \"Ollama\", \"from_ollama\"]\n\n\nclass OllamaTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Ollama` model.\"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the value of the `messages` argument to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        list\n            The formatted value of the `messages` argument to be passed to\n            the client.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"Ollama. The only available types are `str`, `list` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user only passes a prompt.\n\n        \"\"\"\n        return [\n            self._create_message(\"user\", model_input)\n        ]\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user passes a prompt and images.\n\n        \"\"\"\n        return [\n            self._create_message(\"user\", model_input)\n        ]\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user passes a Chat instance.\n\n        \"\"\"\n        return [\n            self._create_message(message[\"role\"], message[\"content\"])\n            for message in model_input.messages\n        ]\n\n    def _create_message(self, role: str, content: str | list) -> dict:\n        \"\"\"Create a message.\"\"\"\n\n        if isinstance(content, str):\n            return {\n                \"role\": role,\n                \"content\": content,\n            }\n\n        elif isinstance(content, list):\n            prompt = content[0]\n            images = content[1:]\n\n            if not all(isinstance(image, Image) for image in images):\n                raise ValueError(\"All assets provided must be of type Image\")\n\n            return {\n                \"role\": role,\n                \"content\": prompt,\n                \"images\": [image.image_str for image in images],\n            }\n\n        else:\n            raise ValueError(\n                f\"Invalid content type: {type(content)}. \"\n                \"The content must be a string or a list containing a string \"\n                \"and a list of images.\"\n            )\n\n    def format_output_type(\n        self, output_type: Optional[Any] = None\n    ) -> Optional[dict]:\n        \"\"\"Format the output type to pass to the client.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        Optional[str]\n            The formatted output type to be passed to the model.\n\n        \"\"\"\n        if output_type is None:\n            return None\n        elif isinstance(output_type, Regex):\n            raise TypeError(\n                \"Regex-based structured outputs are not supported by Ollama. \"\n                \"Use an open source model in the meantime.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs are not supported by Ollama. \"\n                \"Use an open source model in the meantime.\"\n            )\n        elif JsonSchema.is_json_schema(output_type):\n            return cast(dict, JsonSchema.convert_to(output_type, [\"dict\"]))\n        else:\n            type_name = getattr(output_type, \"__name__\", output_type)\n            raise TypeError(\n                f\"The type `{type_name}` is not supported by Ollama. \"\n                \"Consider using a local model instead.\"\n            )\n\n\nclass Ollama(Model):\n    \"\"\"Thin wrapper around the `ollama.Client` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `ollama.Client` client.\n\n    \"\"\"\n\n    def __init__(self, client: \"Client\", model_name: Optional[str] = None):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            The `ollama.Client` client.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = OllamaTypeAdapter()\n\n    def generate(self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using Ollama.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        print(self.type_adapter.format_input(model_input))\n\n        response = self.client.chat(\n            messages=self.type_adapter.format_input(model_input),\n            format=self.type_adapter.format_output_type(output_type),\n            **kwargs,\n        )\n        return response.message.content\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `ollama` library does not support batch inference.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using Ollama.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        response = self.client.chat(\n            messages=self.type_adapter.format_input(model_input),\n            format=self.type_adapter.format_output_type(output_type),\n            stream=True,\n            **kwargs,\n        )\n        for chunk in response:\n            yield chunk.message.content\n\n\nclass AsyncOllama(AsyncModel):\n    \"\"\"Thin wrapper around the `ollama.AsyncClient` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `ollama.AsyncClient` client.\n\n    \"\"\"\n\n    def __init__(\n        self,client: \"AsyncClient\", model_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            The `ollama.Client` client.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = OllamaTypeAdapter()\n\n    async def generate(self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using Ollama.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        response = await self.client.chat(\n            messages=self.type_adapter.format_input(model_input),\n            format=self.type_adapter.format_output_type(output_type),\n            **kwargs,\n        )\n        return response.message.content\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `ollama` library does not support batch inference.\"\n        )\n\n    async def generate_stream( # type: ignore\n        self,\n        model_input: Chat | str | list,\n        output_type: Optional[Any] = None,\n        **kwargs: Any,\n    ) -> AsyncIterator[str]:\n        \"\"\"Stream text using Ollama.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema.\n        **kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        if \"model\" not in kwargs and self.model_name is not None:\n            kwargs[\"model\"] = self.model_name\n\n        stream = await self.client.chat(\n            messages=self.type_adapter.format_input(model_input),\n            format=self.type_adapter.format_output_type(output_type),\n            stream=True,\n            **kwargs,\n        )\n        async for chunk in stream:\n            yield chunk.message.content\n\n\ndef from_ollama(\n    client: Union[\"Client\", \"AsyncClient\"], model_name: Optional[str] = None\n) -> Union[Ollama, AsyncOllama]:\n    \"\"\"Create an Outlines `Ollama` model instance from an `ollama.Client`\n    or `ollama.AsyncClient` instance.\n\n    Parameters\n    ----------\n    client\n        A `ollama.Client` or `ollama.AsyncClient` instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Union[Ollama, AsyncOllama]\n        An Outlines `Ollama` or `AsyncOllama` model instance.\n\n    \"\"\"\n    from ollama import AsyncClient, Client\n\n    if isinstance(client, Client):\n        return Ollama(client, model_name)\n    elif isinstance(client, AsyncClient):\n        return AsyncOllama(client, model_name)\n    else:\n        raise ValueError(\n            \"Invalid client type, the client must be an instance of \"\n            \"`ollama.Client` or `ollama.AsyncClient`.\"\n        )\n"
  },
  {
    "path": "outlines/models/openai.py",
    "content": "\"\"\"Integration with OpenAI's API.\"\"\"\n\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    AsyncIterator,\n    Iterator,\n    Optional,\n    Union,\n    cast,\n)\nfrom functools import singledispatchmethod\n\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.base import AsyncModel, Model, ModelTypeAdapter\nfrom outlines.models.utils import set_additional_properties_false_json_schema\nfrom outlines.types import JsonSchema, Regex, CFG\nfrom outlines.types.utils import is_native_dict\n\nif TYPE_CHECKING:\n    from openai import (\n        OpenAI as OpenAIClient,\n        AsyncOpenAI as AsyncOpenAIClient,\n        AzureOpenAI as AzureOpenAIClient,\n        AsyncAzureOpenAI as AsyncAzureOpenAIClient,\n    )\n\n__all__ = [\"AsyncOpenAI\", \"OpenAI\", \"from_openai\"]\n\n\nclass OpenAITypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `OpenAI` model.\n\n    `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's\n    `completions.create` methods: the input (prompt and possibly image), as\n    well as the output type (only JSON).\n\n    \"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the `messages` argument to pass to the client.\n\n        Parameters\n        ----------\n        model_input\n            The input provided by the user.\n\n        Returns\n        -------\n        dict\n            The formatted input to be passed to the client.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"OpenAI. The only available types are `str`, `list` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_model_input(self, model_input: str) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user only passes a prompt.\n\n        \"\"\"\n        return [\n            self._create_message(\"user\", model_input)\n        ]\n\n    @format_input.register(list)\n    def format_list_model_input(self, model_input: list) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user passes a prompt and images.\n\n        \"\"\"\n        return [\n            self._create_message(\"user\", model_input)\n        ]\n\n    @format_input.register(Chat)\n    def format_chat_model_input(self, model_input: Chat) -> list:\n        \"\"\"Generate the value of the `messages` argument to pass to the\n        client when the user passes a Chat instance.\n\n        \"\"\"\n        return [\n            self._create_message(message[\"role\"], message[\"content\"])\n            for message in model_input.messages\n        ]\n\n    def _create_message(self, role: str, content: str | list) -> dict:\n        \"\"\"Create a message.\"\"\"\n\n        if isinstance(content, str):\n            return {\n                \"role\": role,\n                \"content\": content,\n            }\n\n        elif isinstance(content, list):\n            prompt = content[0]\n            images = content[1:]\n\n            if not all(isinstance(image, Image) for image in images):\n                raise ValueError(\"All assets provided must be of type Image\")\n\n            image_parts = [\n                self._create_img_content(image)\n                for image in images\n            ]\n\n            return {\n                \"role\": role,\n                \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                    *image_parts,\n                ],\n            }\n\n        else:\n            raise ValueError(\n                f\"Invalid content type: {type(content)}. \"\n                \"The content must be a string or a list containing a string \"\n                \"and a list of images.\"\n            )\n\n    def _create_img_content(self, image: Image) -> dict:\n        \"\"\"Create the content for an image input.\"\"\"\n        return {\n            \"type\": \"image_url\",\n            \"image_url\": {\n                \"url\": f\"data:{image.image_format};base64,{image.image_str}\"  # noqa: E702\n            },\n        }\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the `response_format` argument to the client based on the\n        output type specified by the user.\n\n        Parameters\n        ----------\n        output_type\n            The output type provided by the user.\n\n        Returns\n        -------\n        dict\n            The formatted output type to be passed to the client.\n\n        \"\"\"\n        # Unsupported languages\n        if isinstance(output_type, Regex):\n            raise TypeError(\n                \"Neither regex-based structured outputs nor the `pattern` keyword \"\n                \"in Json Schema are available with OpenAI. Use an open source \"\n                \"model or dottxt instead.\"\n            )\n        elif isinstance(output_type, CFG):\n            raise TypeError(\n                \"CFG-based structured outputs are not available with OpenAI. \"\n                \"Use an open source model or dottxt instead.\"\n            )\n\n        if output_type is None:\n            return {}\n        elif is_native_dict(output_type):\n            return self.format_json_mode_type()\n        elif JsonSchema.is_json_schema(output_type):\n            return self.format_json_output_type(\n                cast(dict, JsonSchema.convert_to(output_type, [\"dict\"]))\n            )\n        else:\n            type_name = getattr(output_type, \"__name__\", output_type)\n            raise TypeError(\n                f\"The type `{type_name}` is not available with OpenAI. \"\n                \"Use an open source model or dottxt instead.\"\n            )\n\n    def format_json_output_type(self, schema: dict) -> dict:\n        \"\"\"Generate the `response_format` argument to the client when the user\n        specified a `Json` output type.\n\n        \"\"\"\n        # OpenAI requires `additionalProperties` to be set to False\n        schema = set_additional_properties_false_json_schema(schema)\n\n        return {\n            \"response_format\": {\n                \"type\": \"json_schema\",\n                \"json_schema\": {\n                    \"name\": \"default\",\n                    \"strict\": True,\n                    \"schema\": schema,\n                },\n            }\n        }\n\n    def format_json_mode_type(self) -> dict:\n        \"\"\"Generate the `response_format` argument to the client when the user\n        specified the output type should be a JSON but without specifying the\n        schema (also called \"JSON mode\").\n\n        \"\"\"\n        return {\"response_format\": {\"type\": \"json_object\"}}\n\n\nclass OpenAI(Model):\n    \"\"\"Thin wrapper around the `openai.OpenAI` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.OpenAI` client.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        client: Union[\"OpenAIClient\", \"AzureOpenAIClient\"],\n        model_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            The `openai.OpenAI` client.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = OpenAITypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Union[type[BaseModel], str]] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema or an empty dictionary.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        import openai\n\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            result = self.client.chat.completions.create(\n                messages=messages,\n                **response_format,\n                **inference_kwargs,\n            )\n        except openai.BadRequestError as e:\n            if e.body[\"message\"].startswith(\"Invalid schema\"):\n                raise TypeError(\n                    f\"OpenAI does not support your schema: {e.body['message']}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise e\n\n        messages = [choice.message for choice in result.choices]\n        for message in messages:\n            if message.refusal is not None:\n                raise ValueError(\n                    f\"OpenAI refused to answer the request: {message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `openai` library does not support batch inference.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Union[type[BaseModel], str]] = None,\n        **inference_kwargs,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema or an empty dictionary.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        import openai\n\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            stream = self.client.chat.completions.create(\n                stream=True,\n                messages=messages,\n                **response_format,\n                **inference_kwargs\n            )\n        except openai.BadRequestError as e:\n            if e.body[\"message\"].startswith(\"Invalid schema\"):\n                raise TypeError(\n                    f\"OpenAI does not support your schema: {e.body['message']}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise e\n\n        for chunk in stream:\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n\nclass AsyncOpenAI(AsyncModel):\n    \"\"\"Thin wrapper around the `openai.AsyncOpenAI` client.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.AsyncOpenAI` client.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        client: Union[\"AsyncOpenAIClient\", \"AsyncAzureOpenAIClient\"],\n        model_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = OpenAITypeAdapter()\n\n    async def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Union[type[BaseModel], str]] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema or an empty dictionary.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        import openai\n\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            result = await self.client.chat.completions.create(\n                messages=messages,\n                **response_format,\n                **inference_kwargs,\n            )\n        except openai.BadRequestError as e:\n            if e.body[\"message\"].startswith(\"Invalid schema\"):\n                raise TypeError(\n                    f\"OpenAI does not support your schema: {e.body['message']}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise e\n\n        messages = [choice.message for choice in result.choices]\n        for message in messages:\n            if message.refusal is not None:\n                raise ValueError(\n                    f\"OpenAI refused to answer the request: {message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"The `openai` library does not support batch inference.\"\n        )\n\n    async def generate_stream( # type: ignore\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Union[type[BaseModel], str]] = None,\n        **inference_kwargs,\n    ) -> AsyncIterator[str]:\n        \"\"\"Stream text using OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. The\n            output type must be of a type that can be converted to a JSON\n            schema or an empty dictionary.\n        **inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        import openai\n\n        messages = self.type_adapter.format_input(model_input)\n        response_format = self.type_adapter.format_output_type(output_type)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        try:\n            stream = await self.client.chat.completions.create(\n                stream=True,\n                messages=messages,\n                **response_format,\n                **inference_kwargs\n            )\n        except openai.BadRequestError as e:\n            if e.body[\"message\"].startswith(\"Invalid schema\"):\n                raise TypeError(\n                    f\"OpenAI does not support your schema: {e.body['message']}. \"\n                    \"Try a local model or dottxt instead.\"\n                )\n            else:\n                raise e\n\n        async for chunk in stream:\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n\ndef from_openai(\n    client: Union[\n        \"OpenAIClient\",\n        \"AsyncOpenAIClient\",\n        \"AzureOpenAIClient\",\n        \"AsyncAzureOpenAIClient\",\n    ],\n    model_name: Optional[str] = None,\n) -> Union[OpenAI, AsyncOpenAI]:\n    \"\"\"Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an\n    `openai.OpenAI` or `openai.AsyncOpenAI` client.\n\n    Parameters\n    ----------\n    client\n        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or\n        `openai.AsyncAzureOpenAI` client instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    OpenAI\n        An Outlines `OpenAI` or `AsyncOpenAI` model instance.\n\n    \"\"\"\n    import openai\n\n    if isinstance(client, openai.OpenAI):\n        return OpenAI(client, model_name)\n    elif isinstance(client, openai.AsyncOpenAI):\n        return AsyncOpenAI(client, model_name)\n    else:\n        raise ValueError(\n            \"Invalid client type. The client must be an instance of \"\n            \"+ `openai.OpenAI` or `openai.AsyncOpenAI`.\"\n        )\n"
  },
  {
    "path": "outlines/models/sglang.py",
    "content": "\"\"\"Integration with an SGLang server.\"\"\"\n\nimport json\nimport warnings\nfrom typing import (\n    TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union\n)\n\nfrom outlines.inputs import Chat\nfrom outlines.models.base import AsyncModel, Model, ModelTypeAdapter\nfrom outlines.models.openai import OpenAITypeAdapter\nfrom outlines.types.dsl import (\n    CFG,\n    JsonSchema,\n    python_types_to_terms,\n    to_regex,\n)\n\nif TYPE_CHECKING:\n    from openai import AsyncOpenAI, OpenAI\n\n__all__ = [\"AsyncSGLang\", \"SGLang\", \"from_sglang\"]\n\n\nclass SGLangTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `SGLang` and `AsyncSGLang` models.\"\"\"\n\n    def format_input(self, model_input: Union[Chat, list, str]) -> list:\n        \"\"\"Generate the value of the messages argument to pass to the client.\n\n        We rely on the OpenAITypeAdapter to format the input as the sglang\n        server expects input in the same format as OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The input passed by the user.\n\n        Returns\n        -------\n        list\n            The formatted input to be passed to the client.\n\n        \"\"\"\n        return OpenAITypeAdapter().format_input(model_input)\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the structured output argument to pass to the client.\n\n        Parameters\n        ----------\n        output_type\n            The structured output type provided.\n\n        Returns\n        -------\n        dict\n            The formatted output type to be passed to the client.\n\n        \"\"\"\n        if output_type is None:\n            return {}\n\n        term = python_types_to_terms(output_type)\n        if isinstance(term, CFG):\n            warnings.warn(\n                \"SGLang grammar-based structured outputs expects an EBNF \"\n                \"grammar instead of a Lark grammar as is generally used in \"\n                \"Outlines. The grammar cannot be used as a structured output \"\n                \"type with an outlines backend, it is only compatible with \"\n                \"the sglang and llguidance backends.\"\n            )\n            return {\"extra_body\": {\"ebnf\": term.definition}}\n        elif isinstance(term, JsonSchema):\n            return OpenAITypeAdapter().format_json_output_type(\n                json.loads(term.schema)\n            )\n        else:\n            return {\"extra_body\": {\"regex\": to_regex(term)}}\n\n\nclass SGLang(Model):\n    \"\"\"Thin wrapper around the `openai.OpenAI` client used to communicate with\n    an SGLang server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.OpenAI` client for the\n    SGLang server.\n\n    \"\"\"\n\n    def __init__(self, client, model_name: Optional[str] = None):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            An `openai.OpenAI` client instance.\n        model_name\n            The name of the model to use.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = SGLangTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using SGLang.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input,\n            output_type,\n            **inference_kwargs,\n        )\n\n        response = self.client.chat.completions.create(**client_args)\n\n        messages = [choice.message for choice in response.choices]\n        for message in messages:\n            if message.refusal is not None:  # pragma: no cover\n                raise ValueError(\n                    f\"The SGLang server refused to answer the request: \"\n                    f\"{message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"SGLang does not support batch inference.\"\n        )\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, list, str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using SGLang.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = self.client.chat.completions.create(\n            **client_args, stream=True,\n        )\n\n        for chunk in stream:  # pragma: no cover\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n    def _build_client_args(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the SGLang client.\"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        inference_kwargs.update(output_type_args)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        client_args = {\n            \"messages\": messages,\n            **inference_kwargs,\n        }\n\n        return client_args\n\n\nclass AsyncSGLang(AsyncModel):\n    \"\"\"Thin async wrapper around the `openai.OpenAI` client used to communicate\n    with an SGLang server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.OpenAI` client for the\n    SGLang server.\n\n    \"\"\"\n\n    def __init__(self, client, model_name: Optional[str] = None):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            An `openai.AsyncOpenAI` client instance.\n        model_name\n            The name of the model to use.\n\n        Parameters\n        ----------\n        client\n            An `openai.AsyncOpenAI` client instance.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = SGLangTypeAdapter()\n\n    async def generate(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using `sglang`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        response = await self.client.chat.completions.create(**client_args)\n\n        messages = [choice.message for choice in response.choices]\n        for message in messages:\n            if message.refusal is not None:  # pragma: no cover\n                raise ValueError(\n                    f\"The sglang server refused to answer the request: \"\n                    f\"{message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\n            \"SGLang does not support batch inference.\"\n        )\n\n    async def generate_stream( # type: ignore\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> AsyncIterator[str]:\n        \"\"\"Return a text generator.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        AsyncIterator[str]\n            An async iterator that yields the text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = await self.client.chat.completions.create(\n            **client_args,\n            stream=True,\n        )\n\n        async for chunk in stream:  # pragma: no cover\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n    def _build_client_args(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the SGLang client.\"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        inference_kwargs.update(output_type_args)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        client_args = {\n            \"messages\": messages,\n            **inference_kwargs,\n        }\n\n        return client_args\n\n\ndef from_sglang(\n    client: Union[\"OpenAI\", \"AsyncOpenAI\"],\n    model_name: Optional[str] = None,\n) -> Union[SGLang, AsyncSGLang]:\n    \"\"\"Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or\n    `openai.AsyncOpenAI` instance.\n\n    Parameters\n    ----------\n    client\n        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Union[SGLang, AsyncSGLang]\n        An Outlines `SGLang` or `AsyncSGLang` model instance.\n\n    \"\"\"\n    from openai import AsyncOpenAI, OpenAI\n\n    if isinstance(client, OpenAI):\n        return SGLang(client, model_name)\n    elif isinstance(client, AsyncOpenAI):\n        return AsyncSGLang(client, model_name)\n    else:\n        raise ValueError(\n            f\"Unsupported client type: {type(client)}.\\n\"\n            \"Please provide an OpenAI or AsyncOpenAI instance.\"\n        )\n"
  },
  {
    "path": "outlines/models/tgi.py",
    "content": "\"\"\"Integration with a TGI server.\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom typing import (\n    TYPE_CHECKING,\n    Any,\n    AsyncIterator,\n    Iterator,\n    Optional,\n    Union,\n)\n\nfrom outlines.models.base import AsyncModel,Model, ModelTypeAdapter\nfrom outlines.types.dsl import python_types_to_terms, to_regex, JsonSchema, CFG\n\nif TYPE_CHECKING:\n    from huggingface_hub import AsyncInferenceClient, InferenceClient\n\n__all__ = [\"AsyncTGI\", \"TGI\", \"from_tgi\"]\n\n\nclass TGITypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `TGI` and `AsyncTGI` models.\"\"\"\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the prompt argument to pass to the client.\n\n        Argument\n        --------\n        model_input\n            The input passed by the user.\n\n        Returns\n        -------\n        str\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        raise NotImplementedError(\n            f\"The input type {input} is not available with TGI. \"\n            + \"The only available type is `str`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_input(self, model_input: str) -> str:\n        return model_input\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the structured output argument to pass to the client.\n\n        Argument\n        --------\n        output_type\n            The structured output type provided.\n\n        Returns\n        -------\n        dict\n            The structured output argument to pass to the client.\n\n        \"\"\"\n        if output_type is None:\n            return {}\n\n        term = python_types_to_terms(output_type)\n        if isinstance(term, CFG):\n            raise NotImplementedError(\n                \"TGI does not support CFG-based structured outputs.\"\n            )\n        elif isinstance(term, JsonSchema):\n            return {\n                \"grammar\": {\n                    \"type\": \"json\",\n                    \"value\": json.loads(term.schema),\n                }\n            }\n        else:\n            return {\n                \"grammar\": {\n                    \"type\": \"regex\",\n                    \"value\": to_regex(term),\n                }\n            }\n\n\nclass TGI(Model):\n    \"\"\"Thin wrapper around a `huggingface_hub.InferenceClient` client used to\n    communicate with a `TGI` server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the\n    `huggingface_hub.InferenceClient` client.\n\n    \"\"\"\n\n    def __init__(self, client):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A huggingface `InferenceClient` client instance.\n\n        \"\"\"\n        self.client = client\n        self.type_adapter = TGITypeAdapter()\n\n    def generate(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using TGI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types except `CFG` are supported provided your server uses\n            a backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input,\n            output_type,\n            **inference_kwargs,\n        )\n\n        return self.client.text_generation(**client_args)\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\"TGI does not support batch inference.\")\n\n    def generate_stream(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using TGI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types except `CFG` are supported provided your server uses\n            a backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = self.client.text_generation(\n            **client_args, stream=True,\n        )\n\n        for chunk in stream:  # pragma: no cover\n            yield chunk\n\n    def _build_client_args(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the TGI client.\"\"\"\n        prompt = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        inference_kwargs.update(output_type_args)\n\n        client_args = {\n            \"prompt\": prompt,\n            **inference_kwargs,\n        }\n\n        return client_args\n\n\nclass AsyncTGI(AsyncModel):\n    \"\"\"Thin async wrapper around a `huggingface_hub.AsyncInferenceClient`\n    client used to communicate with a `TGI` server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the\n    `huggingface_hub.AsyncInferenceClient` client.\n\n    \"\"\"\n\n    def __init__(self, client):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            A huggingface `AsyncInferenceClient` client instance.\n\n        \"\"\"\n        self.client = client\n        self.type_adapter = TGITypeAdapter()\n\n    async def generate(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> str:\n        \"\"\"Generate text using TGI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types except `CFG` are supported provided your server uses\n            a backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        str\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        response = await self.client.text_generation(**client_args)\n\n        return response\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\"TGI does not support batch inference.\")\n\n    async def generate_stream( # type: ignore\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> AsyncIterator[str]:\n        \"\"\"Stream text using TGI.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types except `CFG` are supported provided your server uses\n            a backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        AsyncIterator[str]\n            An async iterator that yields the text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = await self.client.text_generation(\n            **client_args, stream=True\n        )\n\n        async for chunk in stream:  # pragma: no cover\n            yield chunk\n\n    def _build_client_args(\n        self,\n        model_input: str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the TGI client.\"\"\"\n        prompt = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        inference_kwargs.update(output_type_args)\n\n        client_args = {\n            \"prompt\": prompt,\n            **inference_kwargs,\n        }\n\n        return client_args\n\n\ndef from_tgi(\n    client: Union[\"InferenceClient\", \"AsyncInferenceClient\"],\n) -> Union[TGI, AsyncTGI]:\n    \"\"\"Create an Outlines `TGI` or `AsyncTGI` model instance from an\n    `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient`\n    instance.\n\n    Parameters\n    ----------\n    client\n        An `huggingface_hub.InferenceClient` or\n        `huggingface_hub.AsyncInferenceClient` instance.\n\n    Returns\n    -------\n    Union[TGI, AsyncTGI]\n        An Outlines `TGI` or `AsyncTGI` model instance.\n\n    \"\"\"\n    from huggingface_hub import AsyncInferenceClient, InferenceClient\n\n    if isinstance(client, InferenceClient):\n        return TGI(client)\n    elif isinstance(client, AsyncInferenceClient):\n        return AsyncTGI(client)\n    else:\n        raise ValueError(\n            f\"Unsupported client type: {type(client)}.\\n\"\n            + \"Please provide an HuggingFace InferenceClient \"\n            + \"or AsyncInferenceClient instance.\"\n        )\n"
  },
  {
    "path": "outlines/models/tokenizer.py",
    "content": "from typing import Dict, Hashable, List, Protocol, Set, Tuple, Union, TYPE_CHECKING\n\n\nif TYPE_CHECKING:\n    import numpy as np\n    from numpy.typing import NDArray\n    from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast\n\n\nclass Tokenizer(Hashable, Protocol):\n    eos_token: str\n    eos_token_id: int\n    pad_token_id: int\n    vocabulary: Dict[str, int]\n    special_tokens: Set[str]\n\n    def encode(\n        self, prompt: Union[str, List[str]]\n    ) -> \"Tuple['NDArray[np.int64]', 'NDArray[np.int64]']\":\n        \"\"\"Translate the input prompts into arrays of token ids and attention mask.\"\"\"\n        ...\n\n    def decode(self, token_ids: \"NDArray[np.int64]\") -> List[str]:\n        \"\"\"Translate an array of token ids to a string or list of strings.\"\"\"\n        ...\n\n    def convert_token_to_string(self, token: str) -> str:\n        \"\"\"Convert a token to its equivalent string.\n\n        This is for instance useful for BPE tokenizers where whitespaces are\n        represented by the special characted `Ġ`. This prevents matching a raw\n        token that includes `Ġ` with a string.\n        \"\"\"\n        ...\n\n\ndef _check_hf_chat_template(tokenizer: \"PreTrainedTokenizer | PreTrainedTokenizerFast\") -> bool:\n    \"\"\"Check if the HuggingFace tokenizer has a chat template.\"\"\"\n    try:\n        tokenizer.get_chat_template()\n        return True\n    except ValueError:\n        return False\n"
  },
  {
    "path": "outlines/models/transformers.py",
    "content": "\"\"\"Integration with the `transformers` library. \"\"\"\n\nimport warnings\n\nfrom collections import defaultdict\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union\n\nfrom outlines.inputs import Audio, Chat, Image, Video\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.models.tokenizer import Tokenizer, _check_hf_chat_template\nfrom outlines.processors import OutlinesLogitsProcessor\n\nif TYPE_CHECKING:\n    import torch\n    from transformers import (\n        PreTrainedTokenizer,\n        PreTrainedModel,\n        ProcessorMixin,\n        LogitsProcessorList,\n    )\n\n__all__ = [\"Transformers\", \"TransformersMultiModal\", \"from_transformers\"]\n\n\ndef get_llama_tokenizer_types():\n    \"\"\"Get all the Llama tokenizer types/classes that need work-arounds.\n\n    When they can't be imported, a dummy class is created.\n\n    \"\"\"\n    try:\n        from transformers.models.llama import LlamaTokenizer\n    except ImportError:  # pragma: no cover\n\n        class LlamaTokenizer:  # type: ignore\n            pass\n\n    try:\n        from transformers.models.llama import LlamaTokenizerFast\n    except ImportError:  # pragma: no cover\n\n        class LlamaTokenizerFast:  # type: ignore\n            pass\n\n    try:\n        from transformers.models.code_llama import CodeLlamaTokenizer\n    except ImportError:  # pragma: no cover\n\n        class CodeLlamaTokenizer:  # type: ignore\n            pass\n\n    try:\n        from transformers.models.code_llama import CodeLlamaTokenizerFast\n    except ImportError:  # pragma: no cover\n\n        class CodeLlamaTokenizerFast:  # type: ignore\n            pass\n\n    return (\n        LlamaTokenizer,\n        LlamaTokenizerFast,\n        CodeLlamaTokenizer,\n        CodeLlamaTokenizerFast,\n    )\n\n\nclass TransformerTokenizer(Tokenizer):\n    \"\"\"Represents a tokenizer for models in the `transformers` library.\"\"\"\n\n    def __init__(self, tokenizer: \"PreTrainedTokenizer\", **kwargs):\n        self.tokenizer = tokenizer\n        self.eos_token_id = self.tokenizer.eos_token_id\n        self.eos_token = self.tokenizer.eos_token\n        self.get_vocab = self.tokenizer.get_vocab\n\n        if self.tokenizer.pad_token_id is None:\n            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id\n            self.pad_token_id = self.eos_token_id\n        else:\n            self.pad_token_id = self.tokenizer.pad_token_id\n            self.pad_token = self.tokenizer.pad_token\n\n        self.special_tokens = set(self.tokenizer.all_special_tokens)\n\n        self.vocabulary = self.tokenizer.get_vocab()\n        self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types())\n\n    def encode(\n        self, prompt: Union[str, List[str]], **kwargs\n    ) -> Tuple[\"torch.LongTensor\", \"torch.LongTensor\"]:\n        kwargs[\"padding\"] = True\n        kwargs[\"return_tensors\"] = \"pt\"\n        output = self.tokenizer(prompt, **kwargs)\n        return output[\"input_ids\"], output[\"attention_mask\"]\n\n    def decode(self, token_ids: \"torch.LongTensor\") -> List[str]:\n        text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)\n        return text\n\n    def convert_token_to_string(self, token: str) -> str:\n        from transformers.file_utils import SPIECE_UNDERLINE\n\n        string = self.tokenizer.convert_tokens_to_string([token])\n\n        if token.startswith(SPIECE_UNDERLINE) or token == \"<0x20>\":\n            return \" \" + string\n\n        return string\n\n    def __eq__(self, other):\n        if isinstance(other, type(self)):\n            if hasattr(self, \"model_name\") and hasattr(self, \"kwargs\"):\n                return (\n                    other.model_name == self.model_name and other.kwargs == self.kwargs\n                )\n            else:\n                return other.tokenizer == self.tokenizer\n        return NotImplemented\n\n    def __hash__(self):\n        from datasets.fingerprint import Hasher\n\n        return hash(Hasher.hash(self.tokenizer))\n\n    def __getstate__(self):\n        state = {\"tokenizer\": self.tokenizer}\n        return state\n\n    def __setstate__(self, state):\n        self.__init__(state[\"tokenizer\"])\n\n\nclass TransformersTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `Transformers` model.\"\"\"\n\n    def __init__(self, tokenizer: \"PreTrainedTokenizer\", has_chat_template: bool = False):\n        self.tokenizer = tokenizer\n        self.has_chat_template = has_chat_template\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the prompt argument to pass to the model.\n\n        Parameters\n        ----------\n        model_input\n            The input passed by the user.\n\n        Returns\n        -------\n        str\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available.\"\n            \"The only available types are `str` and `Chat`.\"\n        )\n\n    @format_input.register(str)\n    def format_str_input(self, model_input: str) -> str:\n        if self.has_chat_template:\n            return self.format_chat_input(Chat([{\"role\": \"user\", \"content\": model_input}]))\n        return model_input\n\n    @format_input.register(Chat)\n    def format_chat_input(self, model_input: Chat) -> str:\n        return self.tokenizer.apply_chat_template(\n            model_input.messages,\n            tokenize=False,\n            add_generation_prompt=True,\n        )\n\n    def format_output_type(\n        self,\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n    ) -> Optional[\"LogitsProcessorList\"]:\n        \"\"\"Generate the logits processor argument to pass to the model.\n\n        Parameters\n        ----------\n        output_type\n            The logits processor provided.\n\n        Returns\n        -------\n        Optional[LogitsProcessorList]\n            The logits processor to pass to the model.\n\n        \"\"\"\n        from transformers import LogitsProcessorList\n\n        if output_type is not None:\n            return LogitsProcessorList([output_type])\n        return None\n\n\nclass Transformers(Model):\n    \"\"\"Thin wrapper around a `transformers` model and a `transformers`\n    tokenizer.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `transformers` model and\n    tokenizer.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        model: \"PreTrainedModel\",\n        tokenizer: \"PreTrainedTokenizer\",\n        *,\n        device_dtype: Optional[\"torch.dtype\"] = None,\n    ):\n        \"\"\"\n        Parameters:\n        ----------\n        model\n            A `PreTrainedModel`, or any model that is compatible with the\n            `transformers` API for models.\n        tokenizer\n            A `PreTrainedTokenizer`, or any tokenizer that is compatible with\n            the `transformers` API for tokenizers.\n        device_dtype\n            The dtype to use for the model. If not provided, the model will use\n            the default dtype.\n\n        \"\"\"\n        # We need to handle the cases in which jax/flax or tensorflow\n        # is not available in the environment.\n        try:\n            from transformers import FlaxPreTrainedModel\n        except ImportError:  # pragma: no cover\n            FlaxPreTrainedModel = None\n\n        try:\n            from transformers import TFPreTrainedModel\n        except ImportError:  # pragma: no cover\n            TFPreTrainedModel = None\n\n        tokenizer.padding_side = \"left\"\n        self.model = model\n        self.hf_tokenizer = tokenizer\n        self.tokenizer = TransformerTokenizer(tokenizer)\n        self.device_dtype = device_dtype\n        self.type_adapter = TransformersTypeAdapter(\n            tokenizer=tokenizer,\n            has_chat_template=_check_hf_chat_template(tokenizer)\n        )\n\n        if (\n            FlaxPreTrainedModel is not None\n            and isinstance(model, FlaxPreTrainedModel)\n        ):  # pragma: no cover\n            self.tensor_library_name = \"jax\"\n            warnings.warn(\"\"\"\n                Support for `jax` has been deprecated and will be removed in\n                version 1.4.0 of Outlines. Please use `torch` instead.\n                Transformers models using `jax` do not support structured\n                generation.\n                \"\"\",\n                DeprecationWarning,\n                stacklevel=2,\n            )\n        elif (\n            TFPreTrainedModel is not None\n            and isinstance(model, TFPreTrainedModel)\n        ):  # pragma: no cover\n            self.tensor_library_name = \"tensorflow\"\n            warnings.warn(\"\"\"\n                Support for `tensorflow` has been deprecated and will be removed in\n                version 1.4.0 of Outlines. Please use `torch` instead.\n                Transformers models using `tensorflow` do not support structured\n                generation.\n                \"\"\",\n                DeprecationWarning,\n                stacklevel=2,\n            )\n        else:\n            self.tensor_library_name = \"torch\"\n\n    def _prepare_model_inputs(\n        self,\n        model_input,\n        is_batch: bool = False,\n    ) -> Tuple[Union[str, List[str]], dict]:\n        \"\"\"Turn the user input into arguments to pass to the model\"\"\"\n        # Format validation\n        if is_batch:\n            prompts = [\n                self.type_adapter.format_input(item)\n                for item in model_input\n            ]\n        else:\n            prompts = self.type_adapter.format_input(model_input)\n        input_ids, attention_mask = self.tokenizer.encode(prompts)\n        inputs = {\n            \"input_ids\": input_ids.to(self.model.device),\n            \"attention_mask\": (\n                attention_mask.to(self.model.device, dtype=self.device_dtype)\n                if self.device_dtype is not None\n                else attention_mask.to(self.model.device)\n            ),\n        }\n\n        return prompts, inputs\n\n    def generate(\n        self,\n        model_input: Union[str, dict, Chat],\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, List[str]]:\n        \"\"\"Generate text using `transformers`.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response. For\n            multi-modal models, the input should be a dictionary containing the\n            `text` key with a value of type `Union[str, List[str]]` and the\n            other keys required by the model.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        inference_kwargs\n            Additional keyword arguments to pass to the `generate` method\n            of the `transformers` model.\n\n        Returns\n        -------\n        Union[str, List[str]]\n            The text generated by the model.\n\n        \"\"\"\n        prompts, inputs = self._prepare_model_inputs(model_input, False)\n        logits_processor = self.type_adapter.format_output_type(output_type)\n\n        generated_ids = self._generate_output_seq(\n            prompts,\n            inputs,\n            logits_processor=logits_processor,\n            **inference_kwargs,\n        )\n\n        # required for multi-modal models that return a 2D tensor even when\n        # num_return_sequences is 1\n        num_samples = inference_kwargs.get(\"num_return_sequences\", 1)\n        if num_samples == 1 and len(generated_ids.shape) == 2:\n            generated_ids = generated_ids.squeeze(0)\n\n        return self._decode_generation(generated_ids)\n\n    def generate_batch(\n        self,\n        model_input: List[Union[str, dict, Chat]],\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n        **inference_kwargs: Any,\n    ) -> List[Union[str, List[str]]]:\n        \"\"\"\"\"\"\n        prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore\n        logits_processor = self.type_adapter.format_output_type(output_type)\n\n        generated_ids = self._generate_output_seq(\n            prompts, inputs, logits_processor=logits_processor, **inference_kwargs\n        )\n\n        # if there are multiple samples per input, convert generated_id to 3D\n        num_samples = inference_kwargs.get(\"num_return_sequences\", 1)\n        if num_samples > 1:\n            generated_ids = generated_ids.view(len(model_input), num_samples, -1)\n\n        return self._decode_generation(generated_ids)\n\n    def generate_stream(self, model_input, output_type, **inference_kwargs):\n        \"\"\"Not available for `transformers` models.\n\n        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810\n\n        \"\"\"\n        raise NotImplementedError(\n            \"Streaming is not implemented for Transformers models.\"\n        )\n\n    def _generate_output_seq(self, prompts, inputs, **inference_kwargs):\n        input_ids = inputs[\"input_ids\"]\n\n        output_ids = self.model.generate(\n            **inputs,\n            **inference_kwargs,\n        )\n\n        # encoder-decoder returns output_ids only, decoder-only returns full seq ids\n        if self.model.config.is_encoder_decoder:\n            generated_ids = output_ids\n        else:\n            generated_ids = output_ids[:, input_ids.shape[1] :]\n\n        return generated_ids\n\n    def _decode_generation(self, generated_ids: \"torch.Tensor\"):\n        if len(generated_ids.shape) == 1:\n            return self.tokenizer.decode([generated_ids])[0]\n        elif len(generated_ids.shape) == 2:\n            return self.tokenizer.decode(generated_ids)\n        elif len(generated_ids.shape) == 3:\n            return [\n                self.tokenizer.decode(generated_ids[i])\n                for i in range(len(generated_ids))\n            ]\n        else:  # pragma: no cover\n            raise TypeError(\n                \"Generated outputs aren't 1D, 2D or 3D, but instead are \"\n                f\"{generated_ids.shape}\"\n            )\n\n\nclass TransformersMultiModalTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for `TransformersMultiModal` model.\"\"\"\n\n    def __init__(self, **kwargs):\n        self.tokenizer = kwargs.get(\"tokenizer\")\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Fomat the prompt arguments to pass to the model.\n\n        Argument\n        --------\n        model_input\n            The input passed by the user.\n\n        Returns\n        -------\n        dict\n            The formatted input.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available. Please \"\n            + \"provide a list containing a text prompt and assets \"\n            + \"(`Image`, `Audio` or `Video` instances) supported by your \"\n            + \"model or a `Chat` instance.\"\n        )\n\n    @format_input.register(Chat)\n    def format_chat_input(self, model_input: Chat) -> dict:\n        conversation = []\n        assets = []\n\n        # process each message, convert if needed to standardized multimodal chat template format\n        # and collect assets for HF processor\n        for message in model_input.messages:\n            processed_message, message_assets = self._prepare_message(\n                message[\"role\"], message[\"content\"]\n            )\n            conversation.append(processed_message)\n            assets.extend(message_assets)\n\n        formatted_prompt = self.tokenizer.apply_chat_template(\n            conversation,\n            tokenize=False,\n            add_generation_prompt=True\n        )\n        # use the formatted prompt and the assets to format the input\n        return self.format_list_input([formatted_prompt, *assets])\n\n    def _prepare_message(self, role: str, content: str | list) -> tuple[dict, list]:\n        \"\"\"Create a message.\"\"\"\n        if isinstance(content, str):\n            return {\"role\": role, \"content\": content}, []\n\n        elif isinstance(content, list):\n            if all(isinstance(item, dict) for item in content): # HF multimodal chat template\n                return {\"role\": role, \"content\": content}, self._extract_assets_from_content(content)\n            else: # list of string + assets\n                prompt = content[0]\n                assets = content[1:]\n                assets_dict = [self._format_asset_for_template(asset) for asset in assets]\n\n                return {\"role\": role, \"content\": [\n                    {\"type\": \"text\", \"text\": prompt},\n                    *assets_dict\n                ]}, assets\n        else:\n            raise ValueError(\n                f\"Invalid content type: {type(content)}. \"\n                + \"The content must be a string or a list containing text and assets \"\n                + \"or a list of dict items with explicit types.\"\n            )\n\n    def _extract_assets_from_content(self, content: list) -> list:\n        \"\"\"Process a list of dict items.\"\"\"\n        assets = []\n\n        for item in content:\n            if len(item) > 2:\n                raise ValueError(\n                    f\"Found item with multiple keys: {item}. \"\n                    + \"Each item in the content list must be a dictionary with a 'type' key and a single asset key. \"\n                    + \"To include multiple assets, use separate dictionary items. \"\n                    + \"For example: [{{'type': 'image', 'image': image1}}, {{'type': 'image', 'image': image2}}]. \"\n                )\n\n            if \"type\" not in item:\n                raise ValueError(\n                    \"Each item in the content list must be a dictionary with a 'type' key. \"\n                    + \"Valid types are 'text', 'image', 'video', or 'audio'. \"\n                    + \"For instance {{'type': 'text', 'text': 'your message'}}. \"\n                    + f\"Found item without 'type' key: {item}\"\n                )\n            if item[\"type\"] == \"text\":\n                continue\n            elif item[\"type\"] in [\"image\", \"video\", \"audio\"]:\n                asset_key = item[\"type\"]\n                if asset_key not in item:\n                    raise ValueError(\n                        f\"Item with type '{asset_key}' must contain a '{asset_key}' key. \"\n                        + f\"Found item: {item}\"\n                    )\n                if isinstance(item[asset_key], (Image, Video, Audio)):\n                    assets.append(item[asset_key])\n                else:\n                    raise ValueError(\n                        \"Assets must be of type `Image`, `Video` or `Audio`. \"\n                        + f\"Unsupported asset type: {type(item[asset_key])}\"\n                    )\n            else:\n                raise ValueError(\n                    \"Content must be 'text', 'image', 'video' or 'audio'. \"\n                    + f\"Unsupported content type: {item['type']}\")\n        return assets\n\n    def _format_asset_for_template(self, asset: Image | Video | Audio) -> dict:\n        \"\"\"Process an asset.\"\"\"\n        if isinstance(asset, Image):\n            return {\"type\": \"image\", \"image\": asset}\n        elif isinstance(asset, Video):\n            return {\"type\": \"video\", \"video\": asset}\n        elif isinstance(asset, Audio):\n            return {\"type\": \"audio\", \"audio\": asset}\n        else:\n            raise ValueError(\n                \"Assets must be of type `Image`, `Video` or `Audio`. \"\n                + f\"Unsupported asset type: {type(asset)}\"\n            )\n\n    @format_input.register(list)\n    def format_list_input(self, model_input: list) -> dict:\n        prompt = model_input[0]\n        assets = model_input[1:]\n\n        if not assets:  # handle empty assets case\n            return {\"text\": prompt}\n\n        asset_types = set(type(asset) for asset in assets)\n        if len(asset_types) > 1:\n            raise ValueError(\n                \"All assets must be of the same type. \"\n                + f\"Found types: {asset_types}\"\n            )\n        asset_type = asset_types.pop()\n\n        if asset_type == Image:\n            return {\n                \"text\": prompt,\n                \"images\": [asset.image for asset in assets]\n            }\n        elif asset_type == Audio: # pragma: no cover\n            return {\n                \"text\": prompt,\n                \"audio\": [asset.audio for asset in assets]\n            }\n        elif asset_type == Video: # pragma: no cover\n            return {\n                \"text\": prompt,\n                \"videos\": [asset.video for asset in assets]\n            }\n        else:\n            raise ValueError(f\"Unsupported asset type: {asset_type}\")\n\n    def format_output_type(\n        self,\n        output_type: Optional[OutlinesLogitsProcessor] = None,\n    ) -> Optional[\"LogitsProcessorList\"]:\n        \"\"\"Generate the logits processor argument to pass to the model.\n\n        Argument\n        --------\n        output_type\n            The logits processor provided.\n\n        Returns\n        -------\n        Optional[LogitsProcessorList]\n            The logits processor to pass to the model.\n\n        \"\"\"\n        from transformers import LogitsProcessorList\n\n        if output_type is not None:\n            return LogitsProcessorList([output_type])\n        return None\n\n\nclass TransformersMultiModal(Transformers):\n    \"\"\"Thin wrapper around a `transformers` model and a `transformers`\n    processor.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `transformers` model and\n    processor.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        model: \"PreTrainedModel\",\n        processor,\n        *,\n        device_dtype: Optional[\"torch.dtype\"] = None,\n    ):\n        \"\"\"Create a TransformersMultiModal model instance\n\n        We rely on the `__init__` method of the `Transformers` class to handle\n        most of the initialization and then add elements specific to multimodal\n        models.\n\n        Parameters\n        ----------\n        model\n            A `PreTrainedModel`, or any model that is compatible with the\n            `transformers` API for models.\n        processor\n            A `ProcessorMixin` instance.\n        device_dtype\n            The dtype to use for the model. If not provided, the model will use\n            the default dtype.\n\n        \"\"\"\n        self.processor = processor\n        self.processor.padding_side = \"left\"\n        self.processor.pad_token = \"[PAD]\"\n\n        tokenizer: \"PreTrainedTokenizer\" = self.processor.tokenizer\n\n        super().__init__(model, tokenizer, device_dtype=device_dtype)\n\n        self.type_adapter = TransformersMultiModalTypeAdapter(\n            tokenizer=tokenizer\n        )\n\n    def _prepare_model_inputs(\n        self,\n        model_input,\n        is_batch: bool = False,\n    ) -> Tuple[Union[str, List[str]], dict]:\n        \"\"\"Turn the user input into arguments to pass to the model\"\"\"\n        if is_batch:\n            prompts = [\n                self.type_adapter.format_input(item) for item in model_input\n            ]\n        else:\n            prompts = self.type_adapter.format_input(model_input)\n\n        # The expected format is a single dict\n        if is_batch:\n            merged_prompts = defaultdict(list)\n            for d in prompts:\n                for key, value in d.items():\n                    if key == \"text\":\n                        merged_prompts[key].append(value)\n                    else:\n                        merged_prompts[key].extend(value)\n        else:\n            merged_prompts = prompts # type: ignore\n\n        inputs = self.processor(\n            **merged_prompts, padding=True, return_tensors=\"pt\"\n        )\n        if self.device_dtype is not None:\n            inputs = inputs.to(self.model.device, dtype=self.device_dtype)\n        else:\n            inputs = inputs.to(self.model.device)\n\n        return merged_prompts[\"text\"], inputs\n\n\ndef from_transformers(\n    model: \"PreTrainedModel\",\n    tokenizer_or_processor: Union[\"PreTrainedTokenizer\", \"ProcessorMixin\"],\n    *,\n    device_dtype: Optional[\"torch.dtype\"] = None,\n) -> Union[Transformers, TransformersMultiModal]:\n    \"\"\"Create an Outlines `Transformers` or `TransformersMultiModal` model\n    instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or\n    `ProcessorMixin` instance.\n\n    `outlines` supports `PreTrainedModelForCausalLM`,\n    `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model\n    that implements the `transformers` model API.\n\n    Parameters\n    ----------\n    model\n        A `transformers.PreTrainedModel` instance.\n    tokenizer_or_processor\n        A `transformers.PreTrainedTokenizer` or\n        `transformers.ProcessorMixin` instance.\n    device_dtype\n        The dtype to use for the model. If not provided, the model will use\n        the default dtype.\n\n    Returns\n    -------\n    Union[Transformers, TransformersMultiModal]\n        An Outlines `Transformers` or `TransformersMultiModal` model instance.\n\n    \"\"\"\n    from transformers import (\n        PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin)\n\n    if isinstance(\n        tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast)\n    ):\n        tokenizer = tokenizer_or_processor\n        return Transformers(model, tokenizer, device_dtype=device_dtype)\n    elif isinstance(tokenizer_or_processor, ProcessorMixin):\n        processor = tokenizer_or_processor\n        return TransformersMultiModal(model, processor, device_dtype=device_dtype)\n    else:\n        raise ValueError(\n            \"We could determine whether the model passed to `from_transformers`\"\n            + \" is a text-2-text or a multi-modal model. Please provide a \"\n            + \"a transformers tokenizer or processor.\"\n        )\n"
  },
  {
    "path": "outlines/models/utils.py",
    "content": "import jsonpath_ng\n\n\ndef set_additional_properties_false_json_schema(schema: dict) -> dict:\n    \"\"\"Set additionalProperties to False to all objects in the schema using jsonpath.\n\n    Parameters\n    ----------\n    schema\n        The JSON schema to modify\n\n    Returns\n    -------\n    dict\n        The modified schema with additionalProperties set to False\n    \"\"\"\n    # Get all nodes\n    jsonpath_expr = jsonpath_ng.parse('$..*')\n    matches = jsonpath_expr.find(schema)\n\n    # Go over all nodes and set additionalProperties to False if it's an object\n    for match in matches:\n        if match.value == 'object':\n            if 'additionalProperties' not in match.context.value:\n                match.context.value['additionalProperties'] = False\n\n    return schema\n"
  },
  {
    "path": "outlines/models/vllm.py",
    "content": "\"\"\"Integration with a vLLM server.\"\"\"\n\nimport json\nfrom typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union\n\nfrom outlines.inputs import Chat\nfrom outlines.models.base import AsyncModel,Model, ModelTypeAdapter\nfrom outlines.models.openai import OpenAITypeAdapter\nfrom outlines.types.dsl import CFG, JsonSchema, python_types_to_terms, to_regex\n\nif TYPE_CHECKING:\n    from openai import AsyncOpenAI, OpenAI\n\n__all__ = [\"VLLM\", \"AsyncVLLM\", \"from_vllm\"]\n\n\nclass VLLMTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `VLLM` and `AsyncVLLM` models.\"\"\"\n\n    def format_input(self, model_input: Union[Chat, str, list]) -> list:\n        \"\"\"Generate the value of the messages argument to pass to the client.\n\n        We rely on the OpenAITypeAdapter to format the input as the vLLM server\n        expects input in the same format as OpenAI.\n\n        Parameters\n        ----------\n        model_input\n            The input passed by the user.\n\n        Returns\n        -------\n        list\n            The formatted input to be passed to the model.\n\n        \"\"\"\n        return OpenAITypeAdapter().format_input(model_input)\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the structured output argument to pass to the client.\n\n        Parameters\n        ----------\n        output_type\n            The structured output type provided.\n\n        Returns\n        -------\n        dict\n            The structured output argument to pass to the model.\n\n        \"\"\"\n        if output_type is None:\n            return {}\n\n        term = python_types_to_terms(output_type)\n        if isinstance(term, CFG):\n            return {\"guided_grammar\": term.definition}\n        elif isinstance(term, JsonSchema):\n            extra_body = {\"guided_json\": json.loads(term.schema)}\n            if term.whitespace_pattern:\n                extra_body[\"whitespace_pattern\"] = term.whitespace_pattern\n            return extra_body\n        else:\n            return {\"guided_regex\": to_regex(term)}\n\n\nclass VLLM(Model):\n    \"\"\"Thin wrapper around the `openai.OpenAI` client used to communicate with\n    a `vllm` server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.OpenAI` client for the\n    `vllm` server.\n    \"\"\"\n\n    def __init__(\n        self,\n        client: \"OpenAI\",\n        model_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            An `openai.OpenAI` client instance.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = VLLMTypeAdapter()\n\n    def generate(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using vLLM.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input,\n            output_type,\n            **inference_kwargs,\n        )\n\n        response = self.client.chat.completions.create(**client_args)\n\n        messages = [choice.message for choice in response.choices]\n        for message in messages:\n            if message.refusal is not None:  # pragma: no cover\n                raise ValueError(\n                    f\"The vLLM server refused to answer the request: \"\n                    f\"{message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\"VLLM does not support batch inference.\")\n\n    def generate_stream(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Iterator[str]:\n        \"\"\"Stream text using vLLM.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Iterator[str]\n            An iterator that yields the text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = self.client.chat.completions.create(\n            **client_args, stream=True,\n        )\n\n        for chunk in stream:  # pragma: no cover\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n    def _build_client_args(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the OpenAI client.\"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        extra_body = inference_kwargs.pop(\"extra_body\", {})\n        extra_body.update(output_type_args)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        client_args = {\n            \"messages\": messages,\n            **inference_kwargs,\n        }\n        if extra_body:\n            client_args[\"extra_body\"] = extra_body\n\n        return client_args\n\n\nclass AsyncVLLM(AsyncModel):\n    \"\"\"Thin async wrapper around the `openai.OpenAI` client used to communicate\n    with a `vllm` server.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `openai.OpenAI` client for the\n    `vllm` server.\n    \"\"\"\n\n    def __init__(\n        self,\n        client: \"AsyncOpenAI\",\n        model_name: Optional[str] = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        client\n            An `openai.AsyncOpenAI` client instance.\n\n        \"\"\"\n        self.client = client\n        self.model_name = model_name\n        self.type_adapter = VLLMTypeAdapter()\n\n    async def generate(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, list[str]]:\n        \"\"\"Generate text using vLLM.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        Union[str, list[str]]\n            The text generated by the model.\n\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        response = await self.client.chat.completions.create(**client_args)\n\n        messages = [choice.message for choice in response.choices]\n        for message in messages:\n            if message.refusal is not None:  # pragma: no cover\n                raise ValueError(\n                    f\"The vLLM server refused to answer the request: \"\n                    f\"{message.refusal}\"\n                )\n\n        if len(messages) == 1:\n            return messages[0].content\n        else:\n            return [message.content for message in messages]\n\n    async def generate_batch(\n        self,\n        model_input,\n        output_type = None,\n        **inference_kwargs,\n    ):\n        raise NotImplementedError(\"VLLM does not support batch inference.\")\n\n    async def generate_stream( # type: ignore\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> AsyncIterator[str]:\n        \"\"\"Stream text using vLLM.\n\n        Parameters\n        ----------\n        model_input\n            The prompt based on which the model will generate a response.\n        output_type\n            The desired format of the response generated by the model. All\n            output types available in Outlines are supported provided your\n            server uses a structured generation backend that supports them.\n        inference_kwargs\n            Additional keyword arguments to pass to the client.\n\n        Returns\n        -------\n        AsyncIterator[str]\n            An async iterator that yields the text generated by the model.\n        \"\"\"\n        client_args = self._build_client_args(\n            model_input, output_type, **inference_kwargs,\n        )\n\n        stream = await self.client.chat.completions.create(\n            **client_args,\n            stream=True,\n        )\n\n        async for chunk in stream:  # pragma: no cover\n            if chunk.choices and chunk.choices[0].delta.content is not None:\n                yield chunk.choices[0].delta.content\n\n    def _build_client_args(\n        self,\n        model_input: Union[Chat, str, list],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> dict:\n        \"\"\"Build the arguments to pass to the OpenAI client.\"\"\"\n        messages = self.type_adapter.format_input(model_input)\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        extra_body = inference_kwargs.pop(\"extra_body\", {})\n        extra_body.update(output_type_args)\n\n        if \"model\" not in inference_kwargs and self.model_name is not None:\n            inference_kwargs[\"model\"] = self.model_name\n\n        client_args = {\n            \"messages\": messages,\n            **inference_kwargs,\n        }\n        if extra_body:\n            client_args[\"extra_body\"] = extra_body\n\n        return client_args\n\n\ndef from_vllm(\n    client: Union[\"OpenAI\", \"AsyncOpenAI\"],\n    model_name: Optional[str] = None,\n) -> Union[VLLM, AsyncVLLM]:\n    \"\"\"Create an Outlines `VLLM` or `AsyncVLLM` model instance from an\n    `openai.OpenAI` or `openai.AsyncOpenAI` instance.\n\n    Parameters\n    ----------\n    client\n        An `openai.OpenAI` or `openai.AsyncOpenAI` instance.\n    model_name\n        The name of the model to use.\n\n    Returns\n    -------\n    Union[VLLM, AsyncVLLM]\n        An Outlines `VLLM` or `AsyncVLLM` model instance.\n\n    \"\"\"\n    from openai import AsyncOpenAI, OpenAI\n\n    if isinstance(client, OpenAI):\n        return VLLM(client, model_name)\n    elif isinstance(client, AsyncOpenAI):\n        return AsyncVLLM(client, model_name)\n    else:\n        raise ValueError(\n            f\"Unsupported client type: {type(client)}.\\n\"\n            \"Please provide an OpenAI or AsyncOpenAI instance.\"\n        )\n"
  },
  {
    "path": "outlines/models/vllm_offline.py",
    "content": "\"\"\"Integration with the `vllm` library (offline mode).\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKING, Any, List, Optional, Union\n\nfrom outlines.inputs import Chat\nfrom outlines.models.base import Model, ModelTypeAdapter\nfrom outlines.models.openai import OpenAITypeAdapter\nfrom outlines.types.dsl import CFG, JsonSchema, python_types_to_terms, to_regex\n\nif TYPE_CHECKING:\n    from vllm import LLM\n    from vllm.sampling_params import SamplingParams\n\n__all__ = [\"VLLMOffline\", \"from_vllm_offline\"]\n\n\nclass VLLMOfflineTypeAdapter(ModelTypeAdapter):\n    \"\"\"Type adapter for the `VLLMOffline` model.\"\"\"\n\n    def __init__(self, has_chat_template: bool = False):\n        self.has_chat_template = has_chat_template\n\n    @singledispatchmethod\n    def format_input(self, model_input):\n        \"\"\"Generate the prompt argument to pass to the model.\n\n        Argument\n        --------\n        model_input\n            The input passed by the user.\n\n        \"\"\"\n        raise TypeError(\n            f\"The input type {type(model_input)} is not available with \"\n            \"VLLM offline. The only available types are `str` and \"\n            \"`Chat` (containing a prompt and images).\"\n        )\n\n    @format_input.register(str)\n    def format_input_str(self, model_input: str) -> str | list:\n        \"\"\"Format a `str` input.\n\n        \"\"\"\n        if self.has_chat_template:\n            return self.format_input_chat(Chat([{\"role\": \"user\", \"content\": model_input}]))\n        return model_input\n\n    @format_input.register(Chat)\n    def format_input_chat(self, model_input: Chat) -> list:\n        \"\"\"Format a `Chat` input.\n\n        \"\"\"\n        for message in model_input.messages:\n            content = message[\"content\"]\n            if isinstance(content, list):\n                raise ValueError(\n                    \"Assets are not supported for vLLM offline.\"\n                    \"Please only use text content in the `Chat` input.\"\n                )\n        return OpenAITypeAdapter().format_input(model_input)\n\n    def format_output_type(self, output_type: Optional[Any] = None) -> dict:\n        \"\"\"Generate the structured output argument to pass to the model.\n\n        For vLLM, the structured output definition is set in the\n        `GuidedDecodingParams` constructor that is provided as a value to the\n        `guided_decoding` parameter of the `SamplingParams` constructor, itself\n        provided as a value to the `sampling_params` parameter of the `generate`\n        method.\n\n        Parameters\n        ----------\n        output_type\n            The structured output type provided.\n\n        Returns\n        -------\n        dict\n            The arguments to provide to the `GuidedDecodingParams` constructor.\n\n        \"\"\"\n        if output_type is None:\n            return {}\n\n        term = python_types_to_terms(output_type)\n        if isinstance(term, CFG):\n            return {\"grammar\": term.definition}\n        elif isinstance(term, JsonSchema):\n            guided_decoding_params = {\"json\": json.loads(term.schema)}\n            if term.whitespace_pattern:\n                guided_decoding_params[\"whitespace_pattern\"] = term.whitespace_pattern\n            return guided_decoding_params\n        else:\n            return {\"regex\": to_regex(term)}\n\n\nclass VLLMOffline(Model):\n    \"\"\"Thin wrapper around a `vllm.LLM` model.\n\n    This wrapper is used to convert the input and output types specified by the\n    users at a higher level to arguments to the `vllm.LLM` model.\n\n    \"\"\"\n\n    def __init__(self, model: \"LLM\"):\n        \"\"\"Create a VLLM model instance.\n\n        Parameters\n        ----------\n        model\n            A `vllm.LLM` model instance.\n\n        \"\"\"\n        self.model = model\n        self.tokenizer = self.model.get_tokenizer()\n        self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template())\n\n    def _build_generation_args(\n        self,\n        inference_kwargs: dict,\n        output_type: Optional[Any] = None,\n    ) -> \"SamplingParams\":\n        \"\"\"Create the `SamplingParams` object to pass to the `generate` method\n        of the `vllm.LLM` model.\"\"\"\n        from vllm.sampling_params import StructuredOutputsParams, SamplingParams\n\n        sampling_params = inference_kwargs.pop(\"sampling_params\", None)\n\n        if sampling_params is None:\n            sampling_params = SamplingParams()\n\n        output_type_args = self.type_adapter.format_output_type(output_type)\n        if output_type_args:\n            original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__}\n            sampling_params_dict = {**original_sampling_params_dict, \"structured_outputs\": StructuredOutputsParams(**output_type_args)}\n            sampling_params = SamplingParams(**sampling_params_dict)\n\n        return sampling_params\n\n    def generate(\n        self,\n        model_input: Chat | str,\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[str, List[str]]:\n        \"\"\"Generate text using vLLM offline.\n\n        Parameters\n        ----------\n        prompt\n            The prompt based on which the model will generate a response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        inference_kwargs\n            Additional keyword arguments to pass to the `generate` method\n            in the `vllm.LLM` model.\n\n        Returns\n        -------\n        Union[str, List[str]]\n            The text generated by the model.\n\n        \"\"\"\n        sampling_params = self._build_generation_args(\n            inference_kwargs,\n            output_type,\n        )\n\n        model_input = self.type_adapter.format_input(model_input)\n\n        if isinstance(model_input, list):\n            results = self.model.chat(\n                messages=model_input,\n                sampling_params=sampling_params,\n                **inference_kwargs,\n            )\n        else:\n            results = self.model.generate(\n                prompts=model_input,\n                sampling_params=sampling_params,\n                **inference_kwargs,\n            )\n        results = [completion.text for completion in results[0].outputs]\n\n        if len(results) == 1:\n            return results[0]\n        else:\n            return results\n\n    def generate_batch(\n        self,\n        model_input: List[Chat | str],\n        output_type: Optional[Any] = None,\n        **inference_kwargs: Any,\n    ) -> Union[List[str], List[List[str]]]:\n        \"\"\"Generate a batch of completions using vLLM offline.\n\n        Parameters\n        ----------\n        prompt\n            The list of prompts based on which the model will generate a\n            response.\n        output_type\n            The logits processor the model will use to constrain the format of\n            the generated text.\n        inference_kwargs\n            Additional keyword arguments to pass to the `generate` method\n            in the `vllm.LLM` model.\n\n        Returns\n        -------\n        Union[List[str], List[List[str]]]\n            The text generated by the model.\n\n        \"\"\"\n        sampling_params = self._build_generation_args(\n            inference_kwargs,\n            output_type,\n        )\n\n        model_inputs = [self.type_adapter.format_input(item) for item in model_input]\n\n        if model_inputs and isinstance(model_inputs[0], list):\n            results = self.model.chat(\n                messages=model_inputs,\n                sampling_params=sampling_params,\n                **inference_kwargs,\n            )\n        else:\n            results = self.model.generate(\n                prompts=model_inputs,\n                sampling_params=sampling_params,\n                **inference_kwargs,\n            )\n        return [[sample.text for sample in batch.outputs] for batch in results]\n\n    def generate_stream(self, model_input, output_type, **inference_kwargs):\n        \"\"\"Not available for `vllm.LLM`.\n\n        TODO: Implement the streaming functionality ourselves.\n\n        \"\"\"\n        raise NotImplementedError(\n            \"Streaming is not available for the vLLM offline integration.\"\n        )\n\n    def _check_chat_template(self) -> bool:\n        \"\"\"Check if the tokenizer has a chat template.\"\"\"\n        from vllm.transformers_utils.tokenizer import (\n            PreTrainedTokenizer,\n            PreTrainedTokenizerFast,\n            TokenizerBase\n        )\n        from outlines.models.tokenizer import _check_hf_chat_template\n\n        if isinstance(self.tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):\n            return _check_hf_chat_template(self.tokenizer)\n        elif isinstance(self.tokenizer, TokenizerBase):\n            # vLLM defines its own TokenizerBase class, and only provides\n            # limited compatibility with HuggingFace tokenizers. So we\n            # need to check for chat template support differently.\n            try:\n                self.tokenizer.apply_chat_template([{\"role\": \"user\", \"content\": \"test\"}])\n                return True\n            except Exception:\n                return False\n        else:  # Never reached  # pragma: no cover\n            return False\n\ndef from_vllm_offline(model: \"LLM\") -> VLLMOffline:\n    \"\"\"Create an Outlines `VLLMOffline` model instance from a `vllm.LLM`\n    instance.\n\n    Parameters\n    ----------\n    model\n        A `vllm.LLM` instance.\n\n    Returns\n    -------\n    VLLMOffline\n        An Outlines `VLLMOffline` model instance.\n\n    \"\"\"\n    return VLLMOffline(model)\n"
  },
  {
    "path": "outlines/processors/__init__.py",
    "content": "\"\"\"Processors to control generation in steerable models.\"\"\"\n\nfrom .base_logits_processor import OutlinesLogitsProcessor\n\n__all__ = [\n    \"OutlinesLogitsProcessor\",\n]\n"
  },
  {
    "path": "outlines/processors/base_logits_processor.py",
    "content": "\"\"\"Base class for logits processors.\"\"\"\n\nfrom abc import abstractmethod\nfrom typing import TypeVar\n\nfrom outlines.processors.tensor_adapters import (\n    TensorAdapterImplementation,\n    tensor_adapters,\n)\n\nTensorType = TypeVar('TensorType')\n\n\nclass OutlinesLogitsProcessor:\n    \"\"\"Base class for logits processors.\n    This class implements a shared `__call__` method is called by the models\n    and returns the processed logits. It relies on the `process_logits` method\n    that must be implemented by the subclasses to do the actual processing. The\n    `tensor_adapter` attribute, created at initialization based on the\n    tensor library name specified in the constructor, is used to manipulate the\n    tensors using the appropriate library for the model (numpy, torch...).\n    \"\"\"\n    tensor_adapter: TensorAdapterImplementation\n\n    def __init__(self, tensor_library_name: str):\n        \"\"\"\n        Parameters\n        ----------\n        tensor_library_name\n            The name of the library to use to manipulate tensors. Possible\n            values are \"mlx\", \"numpy\" and \"torch\". You must choose the library\n            that your model is using.\n        \"\"\"\n        # Temporary fix as torch raises a warning that can cause can an error\n        # with python 3.12.\n        if tensor_library_name == \"torch\":\n            import torch._dynamo\n\n            torch._dynamo.config.suppress_errors = True\n\n        tensor_adapter_class = tensor_adapters.get(tensor_library_name)\n        if tensor_adapter_class is None:\n            raise NotImplementedError(\n                f\"Library {tensor_library_name} is not available\"\n            )\n        self.tensor_adapter = tensor_adapter_class()  # type: ignore\n\n    def reset(self):\n        \"\"\"Reset the logits processor for a new generation\n\n        Only implement this method in subclasses if the logits processor\n        needs to be reset for a new generation.\n\n        \"\"\"\n        pass # pragma: no cover\n\n    @abstractmethod\n    def process_logits(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Main method to implement for logits processors subclasses.\n        This method applies a mask on the logits to bias the generation.\n        It is called by the `__call__` method that standardizes the shape of\n        `input_ids` and `logits` to ensure they are 2D tensors.\n        Elements to keep in mind when designing universal logits processors:\n        - logits processors are only used once and never re-applied for a new\n        sequence generator\n        - Some models only pass output_ids, some models such as llamacpp and\n        transformers prefix with input_ids\n        - Some sampling methods, such as beam search, result in unstable\n        sequence ordering in models like vLLM\n        Parameters\n        ----------\n        input_ids\n            The ids of the tokens of the existing sequences in a 2D tensor.\n        logits\n            The logits for the current generation step in a 2D tensor.\n        Returns\n        -------\n        TensorType\n            The processed logits as a 2D tensor.\n        \"\"\"\n        ...\n\n    def __call__(\n        self, input_ids: TensorType, logits: TensorType\n    ) -> TensorType:\n        \"\"\"Entrypoint for logits processors, this is the method that is\n        called by the model.\n        Because different models use different structures to store the\n        input_ids and logits, we standardize their format to 2D tensors\n        before calling the `process_logits` method. After processing, the\n        logits are cast back to the original array library type before being\n        returned.\n        Parameters\n        ----------\n        input_ids\n            The ids of the tokens of the existing sequences in a tensor.\n        logits\n            The logits for the current generation step in a tensor.\n        Returns\n        -------\n        TensorType\n            The processed logits as a tensor.\n        \"\"\"\n        # if input_ids is 1D and logits is 2D with a single sequence,\n        # reshape input_ids to 2D (needed for mlx-lm)\n        if (\n            len(self.tensor_adapter.shape(input_ids)) == 1\n            and len(self.tensor_adapter.shape(logits)) == 2\n            and self.tensor_adapter.shape(logits)[0] == 1\n        ):\n            input_ids = self.tensor_adapter.unsqueeze(input_ids)\n\n        assert (\n            self.tensor_adapter.shape(logits)[:-1]\n            == self.tensor_adapter.shape(input_ids)[:-1]\n        )\n\n        # Guarantee passed as 2D Tensors, then covert back to original\n        # (1D or 2D) shape\n        if len(self.tensor_adapter.shape(logits)) == 2:\n            processed_logits = self.process_logits(input_ids, logits)\n        elif len(self.tensor_adapter.shape(logits)) == 1:\n            processed_logits = self.tensor_adapter.squeeze(\n                self.process_logits(\n                    self.tensor_adapter.unsqueeze(input_ids),\n                    self.tensor_adapter.unsqueeze(logits),\n                ),\n            )\n        else:\n            raise ValueError(\n                f\"Logits shape {self.tensor_adapter.shape(logits)} is not \"\n                + \"supported\"\n            )\n\n        return processed_logits\n"
  },
  {
    "path": "outlines/processors/tensor_adapters/__init__.py",
    "content": "\"\"\"Library specific objects to manipulate tensors.\"\"\"\n\nfrom typing import Union\n\nfrom .mlx import MLXTensorAdapter\nfrom .numpy import NumpyTensorAdapter\nfrom .torch import TorchTensorAdapter\n\ntensor_adapters = {\n    \"mlx\": MLXTensorAdapter,\n    \"numpy\": NumpyTensorAdapter,\n    \"torch\": TorchTensorAdapter,\n}\n\nTensorAdapterImplementation = Union[\n    MLXTensorAdapter,\n    NumpyTensorAdapter,\n    TorchTensorAdapter,\n]\n\n__all__ = [\n    \"MLXTensorAdapter\",\n    \"NumpyTensorAdapter\",\n    \"TorchTensorAdapter\",\n    \"tensor_adapters\",\n    \"TensorAdapterImplementation\",\n]\n"
  },
  {
    "path": "outlines/processors/tensor_adapters/base.py",
    "content": "\"\"\"Base class for tensor adapters.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import TYPE_CHECKING, TypeVar, Any, Union\n\nif TYPE_CHECKING:\n    import torch\n\nTensorType = TypeVar('TensorType')\n\n\nclass TensorAdapter(ABC):\n    \"\"\"Abstract base class for tensor adapters.\n\n    This class defines the interface for tensor adapters that are used to\n    manipulate tensors in different libraries. Concrete implementations of\n    this class should provide specific implementations for each method as\n    well as providing a `library_name` attribute.\n\n    TODO: Update the version of outlines-core used to receive plain arrays\n    instead of torch tensors. In the meantime, implementations of this class\n    must make sure that their `full_like` and `concatenate` methods can\n    handle torch tensors.\n\n    \"\"\"\n    library_name: str\n\n    @abstractmethod\n    def shape(self, tensor: TensorType) -> list[int]:\n        \"\"\"Get the shape of the tensor.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to get the shape of.\n\n        Returns\n        -------\n        list[int]\n            The shape of the tensor. The list contains as many elements as\n            there are dimensions in the tensor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def unsqueeze(self, tensor: TensorType) -> TensorType:\n        \"\"\"Add a dimension to the tensor at axis 0.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to add a dimension to.\n\n        Returns\n        -------\n        TensorType\n            The tensor with an additional dimension.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def squeeze(self, tensor: TensorType) -> TensorType:\n        \"\"\"Remove a dimension from the tensor at axis 0.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to remove a dimension from.\n\n        Returns\n        -------\n        TensorType\n            The tensor with one less dimension.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def to_list(self, tensor: TensorType) -> list:\n        \"\"\"Convert the tensor to a list.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to convert to a list.\n\n        Returns\n        -------\n        list\n            The tensor as a list.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def to_scalar(self, tensor: TensorType) -> Any:\n        \"\"\"Return the only element of the tensor.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to return the only element of.\n\n        Returns\n        -------\n        Any\n            The only element of the tensor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def full_like(self, tensor: \"torch.Tensor\", fill_value: Any) -> TensorType: # type: ignore\n        \"\"\"Create a tensor with the same shape as the input tensor filled\n        with a scalar value.\n\n        ATTENTION: This method receives a torch tensor regardless of the\n        library used.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to create a new tensor with the same shape.\n        fill_value\n            The value to fill the new tensor with.\n\n        Returns\n        -------\n        TensorType\n            A tensor with the same shape as the input tensor filled with the\n            specified value.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def concatenate(\n        self, tensors: list[Union[\"torch.Tensor\", TensorType]]\n    ) -> TensorType:\n        \"\"\"Concatenate a list of tensors along axis 0.\n\n        ATTENTION: This method can either receive a list of torch tensors or\n        a list of tensors from the library used.\n\n        Parameters\n        ----------\n        tensors\n            The list of tensors to concatenate.\n\n        Returns\n        -------\n        TensorType\n            The concatenated tensor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def get_device(self, tensor: TensorType) -> str:\n        \"\"\"Get the name of the tensor's device.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to get the device of.\n\n        Returns\n        -------\n        str\n            The name of the tensor's device.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def to_device(self, tensor: TensorType, device: str) -> TensorType:\n        \"\"\"Move the tensor to a specified device.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to move to a specified device.\n        device\n            The name of the device to move the tensor to.\n\n        Returns\n        -------\n        TensorType\n            The tensor moved to the specified device.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def boolean_ones_like(self, tensor: TensorType) -> TensorType:\n        \"\"\"Create a boolean ones tensor with the same shape as the input\n        tensor.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to create a boolean ones tensor with the same shape.\n\n        Returns\n        -------\n        TensorType\n            A boolean ones tensor with the same shape as the input tensor.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def apply_mask(\n        self, tensor: TensorType, mask: TensorType, value: Any\n    ) -> TensorType:\n        \"\"\"Fill the elements of the tensor where the mask is True with the\n        specified value.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to fill.\n        mask\n            The mask to apply to the tensor.\n        value\n            The value to fill the tensor with.\n\n        Returns\n        -------\n        TensorType\n            The tensor with the mask applied.\n\n        \"\"\"\n        ...\n\n    @abstractmethod\n    def argsort_descending(\n        self, tensor: TensorType\n    ) -> TensorType:\n        \"\"\"Return the indices that would sort the tensor in descending order\n        along axis -1.\n\n        Parameters\n        ----------\n        tensor\n            The tensor to sort.\n\n        Returns\n        -------\n        TensorType\n            The indices that would sort the tensor in descending order along\n            axis -1.\n\n        \"\"\"\n        ...\n"
  },
  {
    "path": "outlines/processors/tensor_adapters/mlx.py",
    "content": "\"\"\"Tensor adapter for the `mlx` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclass MLXTensorAdapter(TensorAdapter):\n    library_name = \"mlx\"\n\n    def __init__(self):\n        import mlx.core\n\n        self.mlx = mlx.core\n\n    def shape(self, tensor):\n        return tensor.shape\n\n    def unsqueeze(self, tensor):\n        return self.mlx.expand_dims(tensor, 0)\n\n    def squeeze(self, tensor):\n        if tensor.shape[0] == 1:\n            return tensor[0]\n        return tensor\n\n    def to_list(self, tensor):\n        return tensor.tolist()\n\n    def to_scalar(self, tensor):\n        return tensor.item()\n\n    def full_like(self, tensor, fill_value):\n        # Compatible with receiving a torch tensor\n        return self.mlx.full(tensor.shape, fill_value)\n\n    def concatenate(self, tensors):\n        # Can handle both torch and mlx tensors\n        return self.mlx.concatenate(\n            [\n                self.mlx.array(t) if not isinstance(t, self.mlx.array) else t\n                for t in tensors\n            ],\n            axis=0\n        )\n\n    def get_device(self, tensor):\n        return None\n\n    def to_device(self, tensor, device):\n        return tensor\n\n    def boolean_ones_like(self, tensor):\n        return self.mlx.ones(tensor.shape, dtype=self.mlx.bool_)\n\n    def apply_mask(self, tensor, mask, value):\n        result = tensor.astype(tensor.dtype)\n        result = self.mlx.where(mask, self.mlx.array(value), result)\n        return result\n\n    def argsort_descending(self, tensor):\n        return self.mlx.argsort(-tensor)\n"
  },
  {
    "path": "outlines/processors/tensor_adapters/numpy.py",
    "content": "\"\"\"Tensor adapter for the `numpy` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclass NumpyTensorAdapter(TensorAdapter):\n    library_name = \"numpy\"\n\n    def __init__(self):\n        import numpy\n\n        self.numpy = numpy\n\n    def shape(self, tensor):\n        return tensor.shape\n\n    def unsqueeze(self, tensor):\n        return self.numpy.expand_dims(tensor, axis=0)\n\n    def squeeze(self, tensor):\n        return self.numpy.squeeze(tensor, axis=0)\n\n    def to_list(self, tensor):\n        return tensor.tolist()\n\n    def to_scalar(self, tensor):\n        return tensor.item()\n\n    def full_like(self, tensor, fill_value):\n        return self.numpy.full_like(tensor, fill_value)\n\n    def concatenate(self, tensors):\n        return self.numpy.concatenate(tensors, axis=0)\n\n    def get_device(self, tensor):\n        return None\n\n    def to_device(self, tensor, device):\n        return tensor\n\n    def boolean_ones_like(self, tensor):\n        return self.numpy.ones_like(tensor, dtype=bool)\n\n    def apply_mask(self, tensor, mask, value):\n        result = tensor.copy()\n        result[mask] = value\n        return result\n\n    def argsort_descending(self, tensor):\n        return self.numpy.argsort(-tensor)\n"
  },
  {
    "path": "outlines/processors/tensor_adapters/torch.py",
    "content": "\"\"\"Tensor adapter for the `torch` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclass TorchTensorAdapter(TensorAdapter):\n    library_name = \"torch\"\n\n    def __init__(self):\n        import torch\n\n        self.torch = torch\n\n    def shape(self, tensor):\n        return tensor.shape\n\n    def unsqueeze(self, tensor):\n        return tensor.unsqueeze(0)\n\n    def squeeze(self, tensor):\n        return tensor.squeeze(0)\n\n    def to_list(self, tensor):\n        return tensor.tolist()\n\n    def to_scalar(self, tensor):\n        return tensor.item()\n\n    def full_like(self, tensor, fill_value):\n        return self.torch.full_like(tensor, fill_value)\n\n    def concatenate(self, tensors):\n        return self.torch.cat(tensors, dim=0)\n\n    def get_device(self, tensor):\n        return tensor.device\n\n    def to_device(self, tensor, device):\n        return tensor.to(device)\n\n    def boolean_ones_like(self, tensor):\n        return self.torch.ones_like(tensor, dtype=self.torch.bool)\n\n    def apply_mask(self, tensor, mask, value):\n        return self.torch.masked_fill(tensor, mask, value)\n\n    def argsort_descending(self, tensor):\n        return self.torch.argsort(tensor, descending=True)\n"
  },
  {
    "path": "outlines/py.typed",
    "content": ""
  },
  {
    "path": "outlines/release_note.md",
    "content": "# Release Note\n\n### Why a new major version?\n\nThe v1 intends on making Outlines more closely focused on constrained generation. To do so, we delegate a wider range of tasks to the users and inference libraries. On top of making Outlines leaner, this design provides more flexibility to the users and let them use interfaces they are already familiar with.\n\nOur approach is inspired by the unix best practices — each element does one thing well, and we compose those functional elements.\n\nAs this new version deprecates some previously available features of Outlines, we have written a migration guide that gives detailed information on how to upgrade your v0 code to v1.\n\n### Deprecated\n\nAll deprecated features listed below will be removed in version 1.1.0. Until then, a warning will be displayed with information on how to migrate your code to v1.\n\n- The model loader functions from the `models` module (`transformers`, `openai`, etc.) have been deprecated. They are replaced by equivalent functions prefixed with `from_` such as `from_transformers`, `from_openai`, etc. The new loader functions accept different arguments compared to the old ones. They now typically require an instance of an engine/client from the associated inference library. This change was made to avoid duplicating inference library logic and to give users more control over inference engine/client initialization.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n\n```python\n# v0\nfrom outlines import models\nfrom transformers import BertForSequenceClassification, BertTokenizer\n\nmodel = models.transformers(\n    model_name=\"prajjwal1/bert-tiny\",\n    model_class=BertForSequenceClassification,\n    tokenizer_class=BertTokenizer,\n    model_kwargs={\"use_cache\": False},\n    tokenizer_kwargs={\"model_max_length\": 512},\n)\n\n# v1\nimport outlines\nfrom transformers import BertForSequenceClassification, BertTokenizer\n\nhf_model = BertForSequenceClassification.from_pretrained(\"prajjwal1/bert-tiny\", use_cache=False)\nhf_tokenizer = BertTokenizer.from_pretrained(\"prajjwal1/bert-tiny\", model_max_length=512)\nmodel = outlines.from_transformers(hf_model, hf_tokenizer)\n```\n\n- The `generate` module and the associated functions (`json`, `choice`…) have been deprecated. They are replaced by the `Generator` constructor. While you had to select the right generate function for your output type, you can now provide any output type supported by Outlines to the unique `Generator` object.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/generator)\n\n\n```python\n# v0\nfrom pydantic import BaseModel\nfrom outlines import generate, models\n\nclass Character(BaseModel):\n\tname: str\n\nmodel = models.openai(\"gpt-4o\")\ngenerator = generate.json(model, Character)\n\n# v1\nfrom openai import OpenAI\nfrom pydantic import BaseModel\nfrom outlines import Generator, from_openai\n\nclass Character(BaseModel):\n\tname: str\n\nmodel = from_openai(OpenAI())\ngenerator = Generator(model, Character)\n```\n\n- The `TransformersVision` model has been deprecated. It's replaced by `TransformersMultiModal`, which is more general as it supports additional input types beyond images, such as audio. When calling it, instead of providing the prompt and image assets separately, both should now be included in a single dictionary. The model is loaded with `from_transformers` just like the `Transformers` model, but the second argument must be a processor instead of a tokenizer.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models/transformers_multimodal)\n\n\n```python\n# v0\nfrom io import BytesIO\nfrom urllib.request import urlopen\nfrom PIL import Image\nfrom transformers import LlavaForConditionalGeneration\nfrom outlines import models, generate\n\ndef img_from_url(url):\n    img_byte_stream = BytesIO(urlopen(url).read())\n    return Image.open(img_byte_stream).convert(\"RGB\")\n\nmodel = models.transformers_vision(\n    model_name=\"trl-internal-testing/tiny-LlavaForConditionalGeneration\",\n    model_class=LlavaForConditionalGeneration,\n)\ngenerator = generate.text(model)\nresult = generator(\n    \"Describe the image <image>\",\n    img_from_url(\"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\")\n)\n\n# v1\nfrom io import BytesIO\nfrom urllib.request import urlopen\nfrom PIL import Image\nfrom transformers import LlavaForConditionalGeneration, AutoProcessor\nimport outlines\n\ndef img_from_url(url):\n    img_byte_stream = BytesIO(urlopen(url).read())\n    return Image.open(img_byte_stream).convert(\"RGB\")\n\nmodel = outlines.from_transformers(\n\tLlavaForConditionalGeneration.from_pretrained(\"trl-internal-testing/tiny-LlavaForConditionalGeneration\"),\n\tAutoProcessor.from_pretrained(\"trl-internal-testing/tiny-LlavaForConditionalGeneration\")\n)\nimage = img_from_url(\"https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg\")\nresult = model({\"text\": \"Describe the image <image>\", \"images\": image})\n```\n\n- The `Exllamav2` model has been deprecated without replacement because its interface is not fully compatible with Outlines. We had to implement cumbersome patching to make it work, so we decided to remove it entirely.\n\n- The `function` module and the associated `Function` class have been deprecated. They are replaced by the `Application` class, which serves a similar purpose to `Function`. There are two notable differences: an `Application` is not initialized with a model (a model must be provided when calling the object), and template variables must be provided in a dictionary instead of as keyword arguments when calling the `Application`.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/utility/application)\n\n\n```python\n# v0\nfrom pydantic import BaseModel\nfrom outlines import Function, Template\n\nclass Character(BaseModel):\n\tname: str\n\ntemplate = Template.from_string(\"Create a {{ gender }} character.\")\nfn = Function(template, Character, \"hf-internal-testing/tiny-random-GPTJForCausalLM\")\nresponse = fn(gender=\"female\")\n\n# v1\nfrom pydantic import BaseModel\nfrom outlines import Application, Template, from_transformers\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nclass Character(BaseModel):\n\tname: str\n\nmodel = from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\n\ntemplate = Template.from_string(\"Create a {{ gender }} character.\")\napp = Application(template, Character)\nresponse = app(model, {\"gender\": \"female\"})\n```\n\n- The `samplers` module and the associated objects (`multinomial`, `greedy`…) have been deprecated. You should now use the inference arguments specific to the inference library your  model is based on to control the sampling.\n\n```python\n# v0\nfrom outlines import generate, models, samplers\n\nmodel = models.transformers(\"microsoft/Phi-3-mini-4k-instruct\")\ngenerator = generate.text(model, samplers.beam_search(2))\nresponse = generator(\"Write a short story about a cat\", max_tokens=10)\n\n# v1\nfrom outlines import Generator, from_transformers\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n    AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n    AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\nresponse = model(\"Write a short story about a cat\", num_beams=2)\n```\n\n- The `load_lora` methods on the `VLLM` and `LlamaCpp` models have been deprecated. You should now load through the `Llama` instance provided when initializing the model in the case of the `LlamaCpp` model, and provide it as a keyword argument when calling the model in the case of the `VLLM` model.\n\n```python\n# v0\nfrom outlines import models\nfrom vllm import LLM\n\nmodel = models.vllm(\"erwanf/gpt2-mini\")\nmodel.load_lora(\"path/to/lora/file\")\nresponse = model(\"Write a short story about a cat.\")\n\n#v1\nfrom outlines import from_vllm\nfrom vllm import LLM\nfrom vllm.lora.request import LoRARequest\n\nmodel = from_vllm(\n    LLM(\"microsoft/Phi-3-mini-4k-instruct\")\n)\nlora_request = LoRARequest(\"path/to/lora/file\", 1, \"path/to/lora/file\")\nresponse = model(\"Write a short story about a cat.\", lora_request=lora_request)\n```\n\n### Modified\n\nSome objects are maintained, but their interface or behavior has been modified.\n\n- The interface of `Model` classes (`Transformers`, `OpenAI`, etc.) has been significantly modified. Models can now be called directly with a prompt and an output type without having to create a generator first. Additionally, all models have a `stream` method that can be invoked directly by the user.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n\n\n```python\n# v0\nfrom pydantic import BaseModel\nfrom outlines import generate, models\n\nclass Character(BaseModel):\n\t\tname: str\n\nmodel = models.openai(\"gpt-4o\")\ngenerator = generate.json(model, Character)\nresult = generator(\"Create a character\")\n\n# v1\nfrom openai import OpenAI\nfrom pydantic import BaseModel\nfrom outlines import from_openai\n\nclass Character(BaseModel):\n\tname: str\n\nmodel = from_openai(OpenAI(), \"gpt-4o\")\nresult = model(\"Create a character\", Character)\n```\n\n- The interface of the `__init__` method of the `OpenAI` model class has been modified. While it previously accepted a client and an `OpenAIConfig` object instance, it now accepts a client and a model name. The inference arguments from the config object should now be specified when calling the model to more closely align with the OpenAI Python library's functionality. If you provide an `OpenAIConfig` instance when initializing the model, a deprecation warning will appear and your model will behave like a v0 model.\nWe recommend using the `from_openai` function instead of initializing models directly.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models/openai)\n\n\n```python\n# v0\nfrom outlines.models.openai import OpenAI, OpenAIConfig\nfrom openai import OpenAI as OpenAIClient\n\nmodel = OpenAI(\n\tOpenAIClient(),\n\tOpenAIConfig(model=\"gpt-4o\", stop=[\".\"])\n)\n\n# v1\nimport outlines\nfrom openai import OpenAI\n\nmodel = outlines.from_openai(OpenAIClient(), \"gpt-4o\")\n```\n\n- The return type of text generation is now consistently a string (or list/lists of strings for multiple samples or batching). In v0, Outlines automatically cast the inference result into the type provided by the user for constrained generation, but we have removed this behavior. This change was made to create more consistent behavior and to give users more freedom in deciding how to handle the generation result.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n\n```python\n# v0\nfrom pydantic import BaseModel\nfrom outlines import generate, models\n\nclass Character(BaseModel):\n\tname: str\n\nmodel = models.openai(\"gpt-4o\")\ngenerator = generate.json(model, Character)\nresult = generator(\"Create a character\")\nprint(result) # name='James'\n\n# v1\nimport openai\nfrom pydantic import BaseModel\nfrom outlines import from_openai\n\nclass Character(BaseModel):\n\t\tname: str\n\nmodel = from_openai(OpenAI())\nresult = model(\"Create a character\", Character)\nprint(result) # {\"name\": \"James\"}\nprint(Character.model_validate_json(result)) # name='James'\n```\n\n- While Outlines was trying to standardize inference argument names across models in v0, we decided to stop doing so and to directly pass on the inference arguments provided by the user to the inference engine/client. Our objective is to let the user use all arguments they are accustomed to with their inference library instead of having to learn Outlines-defined arguments. The deprecation of the `samplers` mentioned above is a part of this change of approach.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n\n```python\n# v0\nfrom outlines import generate, models\n\nmodel = models.transformers(\"microsoft/Phi-3-mini-4k-instruct\")\ngenerator = generate.text(model)\nresult = generator(\"Create a character\", max_tokens=256, stop_at=\".\")\n\n# v1\nfrom outlines import from_transformers\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n\tAutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n\tAutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\nresult = model(\"Create a character\", max_new_tokens=256, stop_strings=\".\")\n```\n\n### Added features\n\n- There are 8 additional models available. All of them are loaded with an associated `from_` function that accepts an inference engine/client instance.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n    - `Dottxt`\n    - `Anthopic`\n    - `Gemini`\n    - `Ollama`\n    - `SGLang`\n    - `TGI`\n    - `TransformersMultiModel`\n    - `VLLM`\n- Some server-based models now have an async version. To create an async model, just provide an async client instance when using the loader function. The async models are the following.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/models)\n    - `AsyncSGLang`\n    - `AsyncTGI`\n    - `AsyncVLLM`\n\n```python\nimport outlines\nfrom huggingface_hub import AsyncInferenceClient\n\nasync_model = outlines.from_tgi(AsyncInferenceClient(\"http://localhost:11434\"))\n```\n\n- As explained previously, the `Generator` constructor has been added. It accepts a model and an output type as arguments and returns a generator object that can be used to generate text by providing a prompt and inference arguments. The interest of a generator is that it's reusable such that the user does not have to specify the output type they want each time and the output type compilation (when applicable) happens only once.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/generator)\n\n```python\n# direct model calling\nfrom typing import Literal\nfrom outlines import from_transformers\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n\t\tAutoModelForCausalLM.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\"),\n\t\tAutoTokenizer.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\")\n)\nresult = model(\"Pizza or burger\", Literal[\"pizza\", \"burger\"])\n\n# using a generator\nfrom outlines import Generator, from_transformers\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n\t\tAutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\"),\n\t\tAutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n)\ngenerator = Generator(model, Literal[\"pizza\", \"burger\"])\nresult = generator(\"Pizza or burger\")\n```\n\n- As explained previously, the `Application` class has been added. An `Application` is initialized with a prompt template and an output type. The application object returned can then be called with a model, a dictionary containing values for the template variables and inference arguments. The objective of this object is to let users easily switch from a model to another for a given set of prompt and output type.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/utility/application)\n\n```python\nfrom pydantic import BaseModel\nfrom outlines import Application, Template\n\nclass Character(BaseModel):\n\tname: str\n\ntemplate = Template.from_string(\"Create a {{ gender }} character.\")\napp = Application(template, Character)\nresponse = app(model, {\"gender\": \"female\"})\n```\n\n- The regex DSL and the associated `Term` classes and functions have been added. Terms (`Regex`, `String`…) can be used as output types to generate text with models or generators (they are turned into a regex). The term functions (`either`, `optional`, `at_least`…) are useful to build more complex regex patterns by combining terms. On top of the objects related to regex patterns, there are also 2 terms that are intended to be used by themselves as output types: `JsonSchema` and `CFG`.\n[Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/ouput_types)\n\n```python\n# term used directly as an output type\nfrom outlines import from_transformers\nfrom outlines.types import JsonSchema\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n\t\tAutoModelForCausalLM.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\"),\n\t\tAutoTokenizer.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\")\n)\njson_schema = '{\"type\": \"object\", \"properties\": {\"answer\": {\"type\": \"number\"}}}'\nresult = model(\"What's 2 + 2? Respond in a json\", JsonSchema(json_schema))\n\n# creating a complex regex pattern\nfrom outlines import from_transformers\nfrom outlines.types import at_least, either, integer, optional\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel = from_transformers(\n\tAutoModelForCausalLM.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\"),\n\tAutoTokenizer.from_pretrained(\"fmicrosoft/Phi-3-mini-4k-instruct\")\n)\nregex_term = \"I have \" + integer + either(\"dog\", \"cat\") + optional(\"s\")\nresult = model(\"How many pets do you have\", regex_term)\n```\n"
  },
  {
    "path": "outlines/templates.py",
    "content": "\"\"\"Create templates to easily build prompts.\"\"\"\n\nimport functools\nimport inspect\nimport json\nimport os\nimport re\nimport textwrap\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Callable, Dict, Optional, Type, cast\nimport warnings\n\nimport jinja2\nfrom pydantic import BaseModel\nfrom PIL import Image as PILImage\n\nfrom outlines.inputs import Image\n\n\ndef Vision(prompt: str, image: PILImage.Image) -> list:\n    \"\"\"This factory function replaces the deprecated `Vision` class until it is\n    fully removed in outlines v1.2.0.\n\n    Parameters\n    ----------\n    prompt\n        The prompt to use to generate the response.\n    image\n        The image to use to generate the response.\n\n    Returns\n    -------\n    list\n        A list containing the prompt and Image instance.\n    \"\"\"\n    warnings.warn(\"\"\"\n        The Vision function is deprecated and will be removed in outlines 1.2.0.\n        Instead of using Vision, please use a prompt along with an\n        outlines.inputs.Image instance.\n        For instance:\n        ```python\n        import openai\n        from outlines import Image, from_openai\n        model = from_openai(\"gpt-4o\")\n        response = model(\n            [\"A beautiful image of a cat\", Image(my_image)],\n            max_tokens=100\n        )\n        ```\n        \"\"\",\n        DeprecationWarning,\n        stacklevel=2,\n    )\n    return [prompt, Image(image)]\n\n\n@dataclass\nclass Template:\n    \"\"\"Represents a prompt template.\n\n    We return a `Template` class instead of a simple function so the\n    template can be accessed by callers.\n\n    \"\"\"\n    template: jinja2.Template\n\n    def __call__(self, *args, **kwargs) -> str:\n        \"\"\"Render and return the template.\n\n        Returns\n        -------\n        str\n            The rendered template as a Python string.\n\n        \"\"\"\n        return self.template.render(**kwargs)\n\n    @classmethod\n    def from_string(cls, content: str, filters: Dict[str, Callable] = {}):\n        \"\"\"Create a `Template` instance from a string containing a Jinja\n        template.\n\n        Parameters\n        ----------\n        content : str\n            The string content to be converted into a template.\n\n        Returns\n        -------\n        Template\n            An instance of the class with the provided content as a template.\n\n        \"\"\"\n        return cls(build_template_from_string(content, filters))\n\n    @classmethod\n    def from_file(cls, path: Path, filters: Dict[str, Callable] = {}):\n        \"\"\"Create a `Template` instance from a file containing a Jinja\n        template.\n\n        Note: This method does not allow to include and inheritance to\n        reference files that are outside the folder or subfolders of the file\n        given to `from_file`.\n\n        Parameters\n        ----------\n        path : Path\n            The path to the file containing the Jinja template.\n\n        Returns\n        -------\n        Template\n            An instance of the Template class with the template loaded from the\n            file.\n\n        \"\"\"\n        # We don't use a `Signature` here because it seems not feasible to\n        # infer one from a Jinja2 environment that is\n        # split across multiple files (since e.g. we support features like\n        # Jinja2 includes and template inheritance)\n        return cls(build_template_from_file(path, filters))\n\n\ndef build_template_from_string(\n    content: str, filters: Dict[str, Callable] = {}\n) -> jinja2.Template:\n    # Dedent, and remove extra linebreak\n    cleaned_template = inspect.cleandoc(content)\n\n    # Add linebreak if there were any extra linebreaks that\n    # `cleandoc` would have removed\n    ends_with_linebreak = content.replace(\" \", \"\").endswith(\"\\n\\n\")\n    if ends_with_linebreak:\n        cleaned_template += \"\\n\"\n\n    # Remove extra whitespaces, except those that immediately follow a newline symbol.\n    # This is necessary to avoid introducing whitespaces after backslash `\\` characters\n    # used to continue to the next line without linebreak.\n    cleaned_template = re.sub(r\"(?![\\r\\n])(\\b\\s+)\", \" \", cleaned_template)\n\n    env = create_jinja_env(None, filters)\n\n    return env.from_string(cleaned_template)\n\n\ndef build_template_from_file(\n    path: Path, filters: Dict[str, Callable] = {}\n) -> jinja2.Template:\n    file_directory = os.path.dirname(os.path.abspath(path))\n    env = create_jinja_env(jinja2.FileSystemLoader(file_directory), filters)\n\n    return env.get_template(os.path.basename(path))\n\n\ndef create_jinja_env(\n    loader: Optional[jinja2.BaseLoader], filters: Dict[str, Callable]\n) -> jinja2.Environment:\n    \"\"\"Create a new Jinja environment.\n\n    The Jinja environment is loaded with a set of pre-defined filters:\n    - `name`: get the name of a function\n    - `description`: get a function's docstring\n    - `source`: get a function's source code\n    - `signature`: get a function's signature\n    - `args`: get a function's arguments\n    - `schema`: display a JSON Schema\n\n    Users may pass additional filters, and/or override existing ones.\n\n    Parameters\n    ----------\n    loader\n       An optional `BaseLoader` instance\n    filters\n       A dictionary of filters, map between the filter's name and the\n       corresponding function.\n\n    \"\"\"\n    env = jinja2.Environment(\n        loader=loader,\n        trim_blocks=True,\n        lstrip_blocks=True,\n        keep_trailing_newline=True,\n        undefined=jinja2.StrictUndefined,\n    )\n\n    env.filters[\"name\"] = get_fn_name\n    env.filters[\"description\"] = get_fn_description\n    env.filters[\"source\"] = get_fn_source\n    env.filters[\"signature\"] = get_fn_signature\n    env.filters[\"schema\"] = get_schema\n    env.filters[\"args\"] = get_fn_args\n\n    # The filters passed by the user may override the\n    # pre-defined filters.\n    for name, filter_fn in filters.items():\n        env.filters[name] = filter_fn\n\n    return env\n\n\ndef get_fn_name(fn: Callable):\n    \"\"\"Returns the name of a callable.\"\"\"\n    if not callable(fn):\n        raise TypeError(\"The `name` filter only applies to callables.\")\n\n    if not hasattr(fn, \"__name__\"):\n        name = type(fn).__name__\n    else:\n        name = fn.__name__\n\n    return name\n\n\ndef get_fn_args(fn: Callable):\n    \"\"\"Returns the arguments of a function with annotations and default values if provided.\"\"\"\n    if not callable(fn):\n        raise TypeError(\"The `args` filter only applies to callables.\")\n\n    arg_str_list = []\n    signature = inspect.signature(fn)\n    arg_str_list = [str(param) for param in signature.parameters.values()]\n    arg_str = \", \".join(arg_str_list)\n    return arg_str\n\n\ndef get_fn_description(fn: Callable):\n    \"\"\"Returns the first line of a callable's docstring.\"\"\"\n    if not callable(fn):\n        raise TypeError(\"The `description` filter only applies to callables.\")\n\n    docstring = inspect.getdoc(fn)\n    if docstring is None:\n        description = \"\"\n    else:\n        description = docstring.split(\"\\n\")[0].strip()\n\n    return description\n\n\ndef get_fn_source(fn: Callable):\n    \"\"\"Return the source code of a callable.\"\"\"\n    if not callable(fn):\n        raise TypeError(\"The `source` filter only applies to callables.\")\n\n    source = textwrap.dedent(inspect.getsource(fn))\n    re_search = re.search(re.compile(r\"(\\bdef\\b.*)\", re.DOTALL), source)\n    if re_search is not None:\n        source = re_search.group(0)\n    else:  # pragma: no cover\n        raise TypeError(\"Could not read the function's source code\")\n\n    return source\n\n\ndef get_fn_signature(fn: Callable):\n    \"\"\"Return the signature of a callable.\"\"\"\n    if not callable(fn):\n        raise TypeError(\"The `source` filter only applies to callables.\")\n\n    source = textwrap.dedent(inspect.getsource(fn))\n    re_search = re.search(re.compile(r\"\\(([^)]+)\\)\"), source)\n    if re_search is None:  # pragma: no cover\n        signature = \"\"\n    else:\n        signature = re_search.group(1)\n\n    return signature\n\n\n@functools.singledispatch\ndef get_schema(model: Any):\n    raise NotImplementedError(\n        f\"No schema rendering function defined for type {type(model)}.\"\n    )\n\n\n@get_schema.register(dict)\ndef get_schema_dict(model: Dict):\n    \"\"\"Return a pretty-printed dictionary\"\"\"\n    return json.dumps(model, indent=2)\n\n\n@get_schema.register(type(BaseModel))\ndef get_schema_pydantic(model: Type[BaseModel]):\n    \"\"\"Return the schema of a Pydantic model.\"\"\"\n    if hasattr(model, \"model_json_schema\"):\n        def_key = \"$defs\"\n        raw_schema = model.model_json_schema()\n    else:  # pragma: no cover\n        def_key = \"definitions\"\n        raw_schema = model.schema()\n\n    definitions = raw_schema.get(def_key, None)\n    schema = parse_pydantic_schema(raw_schema, definitions)\n\n    return json.dumps(schema, indent=2)\n\n\ndef parse_pydantic_schema(raw_schema, definitions):\n    \"\"\"Parse the output of `Basemodel.[schema|model_json_schema]()`.\n\n    This recursively follows the references to other schemas in case\n    of nested models. Other schemas are stored under the \"definitions\"\n    key in the schema of the top-level model.\n\n    \"\"\"\n    simple_schema = {}\n    for name, value in raw_schema[\"properties\"].items():\n        if \"description\" in value:\n            simple_schema[name] = value[\"description\"]\n        elif \"$ref\" in value: # pragma: no cover\n            refs = value[\"$ref\"].split(\"/\")\n            simple_schema[name] = parse_pydantic_schema(\n                definitions[refs[2]], definitions\n            )\n        else:\n            simple_schema[name] = f\"<{name}>\"\n\n    return simple_schema\n"
  },
  {
    "path": "outlines/types/__init__.py",
    "content": "\"\"\"Output types for structured generation and regex DSL.\"\"\"\n\nfrom outlines.types.dsl import (\n    CFG,\n    Choice,\n    JsonSchema,\n    Regex,\n    at_least,\n    at_most,\n    between,\n    cfg,\n    either,\n    exactly,\n    json_schema,\n    one_or_more,\n    optional,\n    regex,\n    zero_or_more,\n)\n\nfrom . import locale\n\ntry:\n    from . import airports\nexcept ImportError:  # pragma: no cover\n    class AirportImportError:\n        \"\"\"Dummy module that raises an error when accessed.\"\"\"\n        def __getattr__(self, name):\n            raise ImportError(\n                \"The 'airportsdata' package is required to use airport types. \"\n                \"Install it with: pip install 'outlines[airports]'\"\n            )\n\n    airports = AirportImportError()  # type: ignore\n\ntry:\n    from . import countries\nexcept ImportError:  # pragma: no cover\n    class CountryImportError:\n        \"\"\"Dummy module that raises an error when accessed.\"\"\"\n        def __getattr__(self, name):\n            raise ImportError(\n                \"The 'iso3166' package is required to use country types. \"\n                \"Install it with: pip install 'outlines[countries]'\"\n            )\n\n    countries = CountryImportError()  # type: ignore\n\n__all__ = [\n    # Submodules\n    \"airports\",\n    \"countries\",\n    \"locale\",\n    # DSL functions and classes\n    \"Regex\",\n    \"CFG\",\n    \"Choice\",\n    \"JsonSchema\",\n    \"regex\",\n    \"cfg\",\n    \"json_schema\",\n    \"optional\",\n    \"either\",\n    \"exactly\",\n    \"at_least\",\n    \"at_most\",\n    \"between\",\n    \"zero_or_more\",\n    \"one_or_more\",\n    # Python types\n    \"string\",\n    \"integer\",\n    \"boolean\",\n    \"number\",\n    \"date\",\n    \"time\",\n    \"datetime\",\n    # Basic regex types\n    \"digit\",\n    \"char\",\n    \"newline\",\n    \"whitespace\",\n    \"hex_str\",\n    \"uuid4\",\n    \"ipv4\",\n    # Document-specific types\n    \"sentence\",\n    \"paragraph\",\n    \"email\",\n    \"isbn\",\n]\n\n\n# Python types\nstring = Regex(r'\"[^\"]*\"')\ninteger = Regex(r\"[+-]?(0|[1-9][0-9]*)\")\nboolean = Regex(\"(True|False)\")\nnumber = Regex(rf\"{integer.pattern}(\\.[0-9]+)?([eE][+-][0-9]+)?\")\ndate = Regex(r\"(\\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])\")\ntime = Regex(r\"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])\")\ndatetime = Regex(rf\"({date.pattern})(\\s)({time.pattern})\")\n\n# Basic regex types\ndigit = Regex(r\"\\d\")\nchar = Regex(r\"\\w\")\nnewline = Regex(r\"(\\r\\n|\\r|\\n)\")  # Matched new lines on Linux, Windows & MacOS\nwhitespace = Regex(r\"\\s\")\nhex_str = Regex(r\"(0x)?[a-fA-F0-9]+\")\nuuid4 = Regex(\n    r\"[a-fA-F0-9]{8}-\"\n    r\"[a-fA-F0-9]{4}-\"\n    r\"4[a-fA-F0-9]{3}-\"\n    r\"[89abAB][a-fA-F0-9]{3}-\"\n    r\"[a-fA-F0-9]{12}\"\n)\nipv4 = Regex(\n    r\"((25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})\\.){3}\"\n    r\"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})\"\n)\n\n# Document-specific types\nsentence = Regex(r\"[A-Z].*\\s*[.!?]\")\nparagraph = Regex(rf\"{sentence.pattern}(?:\\s+{sentence.pattern})*\\n+\")\n\n\n# The following regex is FRC 5322 compliant and was found at:\n# https://emailregex.com/\nemail = Regex(\n    r\"\"\"(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])\"\"\"\n)\n\n# Matches any ISBN number. Note that this is not completely correct as not all\n# 10 or 13 digits numbers are valid ISBNs. See https://en.wikipedia.org/wiki/ISBN\n# Taken from O'Reilly's Regular Expression Cookbook:\n# https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s13.html\n#\n# TODO: The check digit can only be computed by calling a function to compute it dynamically\nisbn = Regex(\n    r\"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]\"\n)\n"
  },
  {
    "path": "outlines/types/airports.py",
    "content": "\"\"\"Generate valid airport codes.\"\"\"\n\nfrom enum import Enum\n\nimport airportsdata\n\nAIRPORT_IATA_LIST = [\n    (v[\"iata\"], v[\"iata\"]) for v in airportsdata.load().values() if v[\"iata\"]\n]\nIATA = Enum(\"Airport\", AIRPORT_IATA_LIST)  # type:ignore\n"
  },
  {
    "path": "outlines/types/countries.py",
    "content": "\"\"\"Generate valid country codes and names.\"\"\"\n\nfrom enum import Enum\n\nfrom iso3166 import countries\n\n\ndef get_country_flags():\n    \"\"\"Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum.\"\"\"\n    base = ord(\"🇦\")\n    return {\n        code.name: chr(base + ord(code.name[0]) - ord(\"A\"))\n        + chr(base + ord(code.name[1]) - ord(\"A\"))\n        for code in Alpha2\n    }\n\n\nALPHA_2_CODE = [(country.alpha2, country.alpha2) for country in countries]\nAlpha2 = Enum(\"Alpha_2\", ALPHA_2_CODE)  # type:ignore\n\nALPHA_3_CODE = [(country.alpha3, country.alpha3) for country in countries]\nAlpha3 = Enum(\"Alpha_3\", ALPHA_3_CODE)  # type:ignore\n\nNUMERIC_CODE = [(str(country.numeric), str(country.numeric)) for country in countries]\nNumeric = Enum(\"Numeric_code\", NUMERIC_CODE)  # type:ignore\n\nNAME = [(country.name, country.name) for country in countries]\nName = Enum(\"Name\", NAME)  # type:ignore\n\nflag_mapping = get_country_flags()\nFLAG = [(flag, flag) for code, flag in flag_mapping.items()]\nFlag = Enum(\"Flag\", FLAG)  # type:ignore\n"
  },
  {
    "path": "outlines/types/dsl.py",
    "content": "\"\"\"Regular expression DSL and output types for structured generation.\n\nThis module contains elements related to three logical steps in the use of\noutput types for structured generation:\n\n1. Definition of `Term` classes that contain output type definitions. That\n   includes both terms intended to be used by themselves such as `JsonSchema`\n   or `CFG` and terms that are part of the regular expression DSL such as\n   `Alternatives` or `KleeneStar` (and the related functions).\n2. Conversion of Python types into `Term` instances (`python_types_to_terms`).\n3. Conversion of a `Term` instance into a regular expression (`to_regex`).\n\n\"\"\"\n\nimport json\nimport re\nimport sys\nimport warnings\nfrom dataclasses import dataclass\nfrom enum import EnumMeta\nfrom types import FunctionType\nfrom typing import (\n    Any,\n    List,\n    Literal,\n    Optional as OptionalType,\n    Union,\n    get_args,\n)\nimport jsonschema\nfrom genson import SchemaBuilder\nfrom pydantic import (\n    BaseModel,\n    GetCoreSchemaHandler,\n    GetJsonSchemaHandler,\n    TypeAdapter,\n)\nfrom pydantic.json_schema import JsonSchemaValue\nfrom pydantic_core import core_schema as cs\nfrom outlines_core.json_schema import build_regex_from_schema\n\nimport outlines.types as types\nfrom outlines import grammars\nfrom outlines.types.json_schema_utils import (\n    json_schema_dict_to_pydantic,\n    json_schema_dict_to_typeddict,\n    json_schema_dict_to_dataclass,\n)\nfrom outlines.types.utils import (\n    get_schema_from_signature,\n    is_int,\n    is_int_instance,\n    is_float,\n    is_float_instance,\n    is_str,\n    is_str_instance,\n    is_bool,\n    is_datetime,\n    is_date,\n    is_time,\n    is_native_dict,\n    is_dict_instance,\n    is_dataclass,\n    is_typed_dict,\n    is_pydantic_model,\n    is_genson_schema_builder,\n    is_literal,\n    is_union,\n    is_enum,\n    is_callable,\n    is_typing_list,\n    is_typing_tuple,\n    is_typing_dict,\n)\n\nif sys.version_info >= (3, 12):  # pragma: no cover\n    from typing import _TypedDictMeta  # type: ignore\nelse:  # pragma: no cover\n    from typing_extensions import _TypedDictMeta  # type: ignore\n\n\nclass Term:\n    \"\"\"Represents types defined with a regular expression.\n\n    `Regex` instances can be used as a type in a Pydantic model definittion.\n    They will be translated to JSON Schema as a \"string\" field with the\n    \"pattern\" keyword set to the regular expression this class represents. The\n    class also handles validation.\n\n    Examples\n    --------\n\n    >>> from outlines.types import Regex\n    >>> from pydantic import BaseModel\n    >>>\n    >>> age_type = Regex(\"[0-9]+\")\n    >>>\n    >>> class User(BaseModel):\n    >>>     name: str\n    >>>     age: age_type\n\n    \"\"\"\n\n    def __add__(self: \"Term\", other: \"Term\") -> \"Sequence\":\n        if is_str_instance(other):\n            other = String(str(other))\n\n        return Sequence([self, other])\n\n    def __radd__(self: \"Term\", other: \"Term\") -> \"Sequence\":\n        if is_str_instance(other):\n            other = String(str(other))\n\n        return Sequence([other, self])\n\n    def __or__(self: \"Term\", other: \"Term\") -> \"Alternatives\":\n        if is_str_instance(other):\n            other = String(str(other))\n\n        return Alternatives([self, other])\n\n    def __ror__(self: \"Term\", other: \"Term\") -> \"Alternatives\":\n        if is_str_instance(other):\n            other = String(str(other))\n\n        return Alternatives([other, self])\n\n    def __get_validator__(self, _core_schema):\n        def validate(input_value):\n            return self.validate(input_value)\n\n        return validate\n\n    def __get_pydantic_core_schema__(\n        self, source_type: Any, handler: GetCoreSchemaHandler\n    ) -> cs.CoreSchema:\n        return cs.no_info_plain_validator_function(lambda value: self.validate(value))\n\n    def __get_pydantic_json_schema__(\n        self, core_schema: cs.CoreSchema, handler: GetJsonSchemaHandler\n    ) -> JsonSchemaValue:\n        return {\"type\": \"string\", \"pattern\": to_regex(self)}\n\n    def validate(self, value: str) -> str:\n        pattern = to_regex(self)\n        compiled = re.compile(pattern)\n        if not compiled.fullmatch(str(value)):\n            raise ValueError(\n                f\"Input should be in the language of the regular expression {pattern}\"\n            )\n        return value\n\n    def matches(self, value: str) -> bool:\n        \"\"\"Check that a given value is in the language defined by the Term.\n\n        We make the assumption that the language defined by the term can\n        be defined with a regular expression.\n\n        \"\"\"\n        pattern = to_regex(self)\n        compiled = re.compile(pattern)\n        if compiled.fullmatch(str(value)):\n            return True\n        return False\n\n    def display_ascii_tree(self, indent=\"\", is_last=True) -> str:\n        \"\"\"Display the regex tree in ASCII format.\"\"\"\n        branch = \"└── \" if is_last else \"├── \"\n        result = indent + branch + self._display_node() + \"\\n\"\n\n        # Calculate the new indent for children\n        new_indent = indent + (\"    \" if is_last else \"│   \")\n\n        # Let each subclass handle its children\n        result += self._display_children(new_indent)\n        return result\n\n    def _display_node(self):\n        raise NotImplementedError\n\n    def _display_children(self, indent: str) -> str:\n        \"\"\"Display the children of this node. Override in subclasses with children.\"\"\"\n        return \"\"\n\n    def __str__(self):\n        return self.display_ascii_tree()\n\n    def optional(self) -> \"Optional\":\n        return optional(self)\n\n    def exactly(self, count: int) -> \"QuantifyExact\":\n        return exactly(count, self)\n\n    def at_least(self, count: int) -> \"QuantifyMinimum\":\n        return at_least(count, self)\n\n    def at_most(self, count: int) -> \"QuantifyMaximum\":\n        return at_most(count, self)\n\n    def between(self, min_count: int, max_count: int) -> \"QuantifyBetween\":\n        return between(min_count, max_count, self)\n\n    def one_or_more(self) -> \"KleenePlus\":\n        return one_or_more(self)\n\n    def zero_or_more(self) -> \"KleeneStar\":\n        return zero_or_more(self)\n\n\n@dataclass\nclass String(Term):\n    value: str\n\n    def _display_node(self) -> str:\n        return f\"String('{self.value}')\"\n\n    def __repr__(self):\n        return f\"String(value='{self.value}')\"\n\n\n@dataclass\nclass Regex(Term):\n    \"\"\"Class representing a regular expression.\n\n    Parameters\n    ----------\n    pattern\n        The regular expression as a string.\n\n    \"\"\"\n    pattern: str\n\n    def _display_node(self) -> str:\n        return f\"Regex('{self.pattern}')\"\n\n    def __repr__(self):\n        return f\"Regex(pattern='{self.pattern}')\"\n\n\n@dataclass\nclass CFG(Term):\n    \"\"\"Class representing a context-free grammar.\n\n    Parameters\n    ----------\n    definition\n        The definition of the context-free grammar as a string.\n\n    \"\"\"\n    definition: str\n\n    def _display_node(self) -> str:\n        return f\"CFG('{self.definition}')\"\n\n    def __repr__(self):\n        return f\"CFG(definition='{self.definition}')\"\n\n    def __eq__(self, other):\n        if not isinstance(other, CFG):\n            return False\n        return self.definition == other.definition\n\n    @classmethod\n    def from_file(cls, path: str) -> \"CFG\":\n        \"\"\"Create a CFG instance from a file containing a CFG definition.\n\n        Parameters\n        ----------\n        path : str\n            The path to the file containing the CFG definition.\n        Returns\n        -------\n        CFG\n            A CFG instance.\n\n        \"\"\"\n        with open(path, \"r\") as f:\n            definition = f.read()\n        return cls(definition)\n\n\nclass JsonSchema(Term):\n    \"\"\"Class representing a JSON schema.\n\n    The JSON schema object from which to instantiate the class can be a\n    dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a\n    genSON schema builder.\n\n    \"\"\"\n    schema: str\n    whitespace_pattern: OptionalType[str]\n\n    def __init__(\n        self,\n        schema: Union[\n            dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder\n        ],\n        whitespace_pattern: OptionalType[str] = None,\n        ensure_ascii: bool = True,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        schema\n            The object containing the JSON schema.\n        whitespace_pattern\n            The pattern to use to match whitespace characters.\n        ensure_ascii\n            Whether to ensure the schema is ASCII-only.\n\n        \"\"\"\n        schema_str: str\n\n        if is_dict_instance(schema):\n            schema_str = json.dumps(schema, ensure_ascii=ensure_ascii)\n        elif is_str_instance(schema):\n            schema_str = str(schema)\n        elif is_pydantic_model(schema):\n            schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore\n        elif is_typed_dict(schema):\n            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)\n        elif is_dataclass(schema):\n            schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii)\n        elif is_genson_schema_builder(schema):\n            schema_str = schema.to_json(ensure_ascii=ensure_ascii)  # type: ignore\n        else:\n            raise ValueError(\n                f\"Cannot parse schema {schema}. The schema must be either \"\n                + \"a Pydantic class, typed dict, a dataclass, a genSON schema \"\n                + \"builder or a string or dict that contains the JSON schema \"\n                + \"specification\"\n            )\n\n        jsonschema.Draft7Validator.check_schema(json.loads(schema_str))\n        self.schema = schema_str\n        self.whitespace_pattern = whitespace_pattern\n\n    @classmethod\n    def is_json_schema(cls, obj: Any) -> bool:\n        \"\"\"Check if the object provided is a JSON schema type.\n\n        Parameters\n        ----------\n        obj: Any\n            The object to check\n\n        Returns\n        -------\n        bool\n            True if the object is a JSON schema type, False otherwise\n\n        \"\"\"\n        return (\n            isinstance(obj, cls)\n            or is_pydantic_model(obj)\n            or is_typed_dict(obj)\n            or is_dataclass(obj)\n            or is_genson_schema_builder(obj)\n        )\n\n    @classmethod\n    def convert_to(\n        cls,\n        schema: Union[\n            \"JsonSchema\",\n            type[BaseModel],\n            _TypedDictMeta,\n            type,\n            SchemaBuilder,\n        ],\n        target_types: List[Literal[\n            \"str\",\n            \"dict\",\n            \"pydantic\",\n            \"typeddict\",\n            \"dataclass\",\n            \"genson\",\n        ]],\n    ) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]:\n        \"\"\"Convert a JSON schema type to a different JSON schema type.\n\n        If the schema provided is already of a type in the target_types, return\n        it unchanged.\n\n        Parameters\n        ----------\n        schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]\n            The schema to convert\n        target_types: List[Literal[\"str\", \"dict\", \"pydantic\", \"typeddict\", \"dataclass\", \"genson\"]]\n            The target types to convert to\n\n        \"\"\"\n        # If the schema provided is already of a type in the target_types,\n        # just return it\n        if isinstance(schema, cls):\n            if \"str\" in target_types:\n                return schema.schema\n            elif \"dict\" in target_types:\n                return json.loads(schema.schema)\n        elif is_pydantic_model(schema) and \"pydantic\" in target_types:\n            return schema\n        elif is_typed_dict(schema) and \"typeddict\" in target_types:\n            return schema\n        elif is_dataclass(schema) and \"dataclass\" in target_types:\n            return schema\n        elif is_genson_schema_builder(schema) and \"genson\" in target_types:\n            return schema\n\n        # Convert the schema to a JSON schema string/dict\n        if isinstance(schema, cls):\n            schema_str = schema.schema\n        else:\n            schema_str = cls(schema).schema\n        schema_dict = json.loads(schema_str)\n\n        for target_type in target_types:\n            try:\n                # Convert the JSON schema string to the target type\n                if target_type == \"str\":\n                    return schema_str\n                elif target_type == \"dict\":\n                    return schema_dict\n                elif target_type == \"pydantic\":\n                    return json_schema_dict_to_pydantic(schema_dict)\n                elif target_type == \"typeddict\":\n                    return json_schema_dict_to_typeddict(schema_dict)\n                elif target_type == \"dataclass\":\n                    return json_schema_dict_to_dataclass(schema_dict)\n                # No conversion available for genson\n            except Exception as e:  # pragma: no cover\n                warnings.warn(\n                    f\"Cannot convert schema type {type(schema)} to {target_type}: {e}\"\n                )\n                continue\n\n        raise ValueError(\n            f\"Cannot convert schema type {type(schema)} to any of the target \"\n            f\"types {target_types}\"\n        )\n\n    def _display_node(self) -> str:\n        return f\"JsonSchema('{self.schema}')\"\n\n    def __repr__(self):\n        return f\"JsonSchema(schema='{self.schema}')\"\n\n    def __eq__(self, other):\n        if not isinstance(other, JsonSchema):\n            return False\n        try:\n            self_dict = json.loads(self.schema)\n            other_dict = json.loads(other.schema)\n            return self_dict == other_dict\n        except json.JSONDecodeError:  # pragma: no cover\n            return self.schema == other.schema\n\n    @classmethod\n    def from_file(cls, path: str) -> \"JsonSchema\":\n        \"\"\"Create a JsonSchema instance from a .json file containing a JSON\n        schema.\n\n        Parameters\n        ----------\n        path:\n            The path to the file containing the JSON schema.\n        Returns\n        -------\n        JsonSchema\n            A JsonSchema instance.\n\n        \"\"\"\n        with open(path, \"r\") as f:\n            schema = json.load(f)\n        return cls(schema)\n\n\n@dataclass\nclass Choice(Term):\n    \"\"\"Class representing a choice between different items.\n\n    Parameters\n    ----------\n    items\n        The items to choose from.\n\n    \"\"\"\n    items: List[Any]\n\n    def _display_node(self) -> str:\n        return f\"Choice({repr(self.items)})\"\n\n    def __repr__(self):\n        return f\"Choice(items={repr(self.items)})\"\n\n\n@dataclass\nclass KleeneStar(Term):\n    term: Term\n\n    def _display_node(self) -> str:\n        return \"KleeneStar(*)\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return f\"KleeneStar(term={repr(self.term)})\"\n\n\n@dataclass\nclass KleenePlus(Term):\n    term: Term\n\n    def _display_node(self) -> str:\n        return \"KleenePlus(+)\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return f\"KleenePlus(term={repr(self.term)})\"\n\n\n@dataclass\nclass Optional(Term):\n    term: Term\n\n    def _display_node(self) -> str:\n        return \"Optional(?)\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return f\"Optional(term={repr(self.term)})\"\n\n\n@dataclass\nclass Alternatives(Term):\n    terms: List[Term]\n\n    def _display_node(self) -> str:\n        return \"Alternatives(|)\"\n\n    def _display_children(self, indent: str) -> str:\n        return \"\".join(\n            term.display_ascii_tree(indent, i == len(self.terms) - 1)\n            for i, term in enumerate(self.terms)\n        )\n\n    def __repr__(self):\n        return f\"Alternatives(terms={repr(self.terms)})\"\n\n\n@dataclass\nclass Sequence(Term):\n    terms: List[Term]\n\n    def _display_node(self) -> str:\n        return \"Sequence\"\n\n    def _display_children(self, indent: str) -> str:\n        return \"\".join(\n            term.display_ascii_tree(indent, i == len(self.terms) - 1)\n            for i, term in enumerate(self.terms)\n        )\n\n    def __repr__(self):\n        return f\"Sequence(terms={repr(self.terms)})\"\n\n\n@dataclass\nclass QuantifyExact(Term):\n    term: Term\n    count: int\n\n    def _display_node(self) -> str:\n        return f\"Quantify({{{self.count}}})\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return f\"QuantifyExact(term={repr(self.term)}, count={repr(self.count)})\"\n\n\n@dataclass\nclass QuantifyMinimum(Term):\n    term: Term\n    min_count: int\n\n    def _display_node(self) -> str:\n        return f\"Quantify({{{self.min_count},}})\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return (\n            f\"QuantifyMinimum(term={repr(self.term)}, min_count={repr(self.min_count)})\"\n        )\n\n\n@dataclass\nclass QuantifyMaximum(Term):\n    term: Term\n    max_count: int\n\n    def _display_node(self) -> str:\n        return f\"Quantify({{,{self.max_count}}})\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return (\n            f\"QuantifyMaximum(term={repr(self.term)}, max_count={repr(self.max_count)})\"\n        )\n\n\n@dataclass\nclass QuantifyBetween(Term):\n    term: Term\n    min_count: int\n    max_count: int\n\n    def __post_init__(self):\n        if self.min_count > self.max_count:\n            raise ValueError(\n                \"QuantifyBetween: `max_count` must be greater than `min_count`.\"\n            )\n\n    def _display_node(self) -> str:\n        return f\"Quantify({{{self.min_count},{self.max_count}}})\"\n\n    def _display_children(self, indent: str) -> str:\n        return self.term.display_ascii_tree(indent, True)\n\n    def __repr__(self):\n        return f\"QuantifyBetween(term={repr(self.term)}, min_count={repr(self.min_count)}, max_count={repr(self.max_count)})\"\n\n\ndef regex(pattern: str):\n    return Regex(pattern)\n\n\ndef cfg(definition: str):\n    return CFG(definition)\n\n\ndef json_schema(schema: Union[str, dict, type[BaseModel]]):\n    return JsonSchema(schema)\n\n\ndef either(*terms: Union[str, Term]):\n    \"\"\"Represents an alternative between different terms or strings.\n\n    This factory function automatically translates string arguments\n    into `String` objects.\n\n    \"\"\"\n    terms = [String(arg) if isinstance(arg, str) else arg for arg in terms]\n    return Alternatives(terms)\n\n\ndef optional(term: Union[Term, str]) -> Optional:\n    term = String(term) if isinstance(term, str) else term\n    return Optional(term)\n\n\ndef exactly(count: int, term: Union[Term, str]) -> QuantifyExact:\n    \"\"\"Repeat the term exactly `count` times.\"\"\"\n    term = String(term) if isinstance(term, str) else term\n    return QuantifyExact(term, count)\n\n\ndef at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum:\n    \"\"\"Repeat the term at least `count` times.\"\"\"\n    term = String(term) if isinstance(term, str) else term\n    return QuantifyMinimum(term, count)\n\n\ndef at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum:\n    \"\"\"Repeat the term exactly `count` times.\"\"\"\n    term = String(term) if isinstance(term, str) else term\n    return QuantifyMaximum(term, count)\n\n\ndef between(min_count: int, max_count: int, term: Union[Term, str]) -> QuantifyBetween:\n    term = String(term) if isinstance(term, str) else term\n    return QuantifyBetween(term, min_count, max_count)\n\n\ndef zero_or_more(term: Union[Term, str]) -> KleeneStar:\n    term = String(term) if isinstance(term, str) else term\n    return KleeneStar(term)\n\n\ndef one_or_more(term: Union[Term, str]) -> KleenePlus:\n    term = String(term) if isinstance(term, str) else term\n    return KleenePlus(term)\n\n\ndef python_types_to_terms(ptype: Any, recursion_depth: int = 0) -> Term:\n    \"\"\"Convert Python types to Outlines DSL terms that constrain LLM output.\n\n    Parameters\n    ----------\n    ptype\n        The Python type to convert\n    recursion_depth\n        Current recursion depth to prevent infinite recursion\n\n    Returns\n    -------\n    Term\n        The corresponding DSL `Term` instance.\n\n    \"\"\"\n    if recursion_depth > 10:\n        raise RecursionError(\n            f\"Maximum recursion depth exceeded when converting {ptype}. \"\n            \"This might be due to a recursive type definition.\"\n        )\n\n    # First handle Term instances\n    if isinstance(ptype, Term):\n        return ptype\n\n    # Basic types\n    if is_int(ptype):\n        return types.integer\n    elif is_float(ptype):\n        return types.number\n    elif is_bool(ptype):\n        return types.boolean\n    elif is_str(ptype):\n        return types.string\n    elif is_native_dict(ptype):\n        return CFG(grammars.json)\n    elif is_time(ptype):\n        return types.time\n    elif is_date(ptype):\n        return types.date\n    elif is_datetime(ptype):\n        return types.datetime\n\n    # Basic type instances\n    if is_str_instance(ptype):\n        return String(ptype)\n    elif is_int_instance(ptype) or is_float_instance(ptype):\n        return Regex(str(ptype))\n\n    # Structured types\n    structured_type_checks = [\n        lambda x: is_dataclass(x),\n        lambda x: is_typed_dict(x),\n        lambda x: is_pydantic_model(x),\n    ]\n    if any(check(ptype) for check in structured_type_checks):\n        schema = TypeAdapter(ptype).json_schema()\n        return JsonSchema(schema)\n\n    elif is_genson_schema_builder(ptype):\n        schema = ptype.to_json()\n        return JsonSchema(schema)\n\n    if is_enum(ptype):\n        return Alternatives(\n            [\n                python_types_to_terms(member, recursion_depth + 1)\n                for member in _get_enum_members(ptype)\n            ]\n        )\n\n    args = get_args(ptype)\n    if is_literal(ptype):\n        return _handle_literal(args)\n    elif is_union(ptype):\n        return _handle_union(args, recursion_depth)\n    elif is_typing_list(ptype):\n        return _handle_list(args, recursion_depth)\n    elif is_typing_tuple(ptype):\n        return _handle_tuple(args, recursion_depth)\n    elif is_typing_dict(ptype):\n        return _handle_dict(args, recursion_depth)\n\n    if is_callable(ptype):\n        return JsonSchema(get_schema_from_signature(ptype))\n\n    type_name = getattr(ptype, \"__name__\", ptype)\n    raise TypeError(\n        f\"Type {type_name} is currently not supported. Please open an issue: \"\n        \"https://github.com/dottxt-ai/outlines/issues\"\n    )\n\n\ndef _get_enum_members(ptype: EnumMeta) -> List[Any]:\n    regular_members = [member.value for member in ptype]  # type: ignore\n    function_members = []\n    for key, value in ptype.__dict__.items():\n        if (\n            isinstance(value, FunctionType)\n            and not (key.startswith('__') and key.endswith('__'))\n            and key != '_generate_next_value_'  # Skip this specific method that causes issues\n        ):\n            function_members.append(value)\n    return regular_members + function_members\n\n\ndef _handle_literal(args: tuple) -> Alternatives:\n    return Alternatives([python_types_to_terms(arg) for arg in args])\n\n\ndef _ensure_json_quoted(term: Term) -> Term:\n    \"\"\"Wrap bare ``String`` terms in double quotes for JSON container contexts.\n\n    When string literal values (from ``Literal`` or ``Enum``) appear inside\n    container types (``List``, ``Tuple``, ``Dict``), they must be JSON-quoted\n    so the generated regex matches valid JSON.  ``Regex``-based terms (e.g.\n    ``types.string``) already include their own quotes and are left untouched.\n    \"\"\"\n    if isinstance(term, String):\n        return String(f'\"{term.value}\"')\n    if isinstance(term, Alternatives):\n        quoted = [_ensure_json_quoted(t) for t in term.terms]\n        return Alternatives(quoted)\n    return term\n\n\ndef _handle_union(args: tuple, recursion_depth: int) -> Alternatives:\n    # Handle the Optional[T] type\n    if len(args) == 2 and (type(None) in args or None in args):\n        other_ptype = next(arg for arg in args if arg not in (type(None), None))\n        return Alternatives(\n            [\n                python_types_to_terms(other_ptype, recursion_depth + 1),\n                String(\"None\"),\n            ]\n        )\n    return Alternatives(\n        [python_types_to_terms(arg, recursion_depth + 1) for arg in args]\n    )\n\n\ndef _handle_list(args: tuple, recursion_depth: int) -> Sequence:\n    if args is None or len(args) != 1:\n        raise TypeError(\n            \"Only homogeneous lists are supported. You should provide exactly \"\n            + \"one argument to `List`, got {args}.\"\n        )\n    item_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))\n    return Sequence(\n        [\n            String(\"[\"),\n            item_type,\n            KleeneStar(Sequence([String(\", \"), item_type])),\n            String(\"]\"),\n        ]\n    )\n\n\ndef _handle_tuple(args: tuple, recursion_depth: int) -> Union[Sequence, String]:\n    if len(args) == 0 or args == ((),):\n        return String(\"()\")\n    elif len(args) == 2 and args[1] is Ellipsis:\n        item_term = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))\n        return Sequence(\n            [\n                String(\"(\"),\n                item_term,\n                KleeneStar(Sequence([String(\", \"), item_term])),\n                String(\")\"),\n            ]\n        )\n    else:\n        items = [_ensure_json_quoted(python_types_to_terms(arg, recursion_depth + 1)) for arg in args]\n        separator = String(\", \")\n        elements = []\n        for i, item in enumerate(items):\n            elements.append(item)\n            if i < len(items) - 1:\n                elements.append(separator)\n        return Sequence([String(\"(\"), *elements, String(\")\")])\n\n\ndef _handle_dict(args: tuple, recursion_depth: int) -> Sequence:\n    if args is None or len(args) != 2:\n        raise TypeError(f\"Dict must have exactly two type arguments. Got {args}.\")\n    # Add dict support with key:value pairs\n    key_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))\n    value_type = _ensure_json_quoted(python_types_to_terms(args[1], recursion_depth + 1))\n    return Sequence(\n        [\n            String(\"{\"),\n            Optional(\n                Sequence(\n                    [\n                        key_type,\n                        String(\":\"),\n                        value_type,\n                        KleeneStar(\n                            Sequence([String(\", \"), key_type, String(\":\"), value_type])\n                        ),\n                    ]\n                )\n            ),\n            String(\"}\"),\n        ]\n    )\n\n\ndef to_regex(term: Term) -> str:\n    \"\"\"Convert a term to a regular expression.\n\n    We only consider self-contained terms that do not refer to another rule.\n\n    Parameters\n    ----------\n    term\n        The term to convert to a regular expression.\n\n    Returns\n    -------\n    str\n        The regular expression as a string.\n\n    \"\"\"\n    if isinstance(term, String):\n        return re.escape(term.value)\n    elif isinstance(term, Regex):\n        return f\"({term.pattern})\"\n    elif isinstance(term, JsonSchema):\n        regex_str = build_regex_from_schema(term.schema, term.whitespace_pattern)\n        return f\"({regex_str})\"\n    elif isinstance(term, Choice):\n        regexes = [to_regex(python_types_to_terms(item)) for item in term.items]\n        return f\"({'|'.join(regexes)})\"\n    elif isinstance(term, KleeneStar):\n        return f\"({to_regex(term.term)})*\"\n    elif isinstance(term, KleenePlus):\n        return f\"({to_regex(term.term)})+\"\n    elif isinstance(term, Optional):\n        return f\"({to_regex(term.term)})?\"\n    elif isinstance(term, Alternatives):\n        regexes = [to_regex(subterm) for subterm in term.terms]\n        return f\"({'|'.join(regexes)})\"\n    elif isinstance(term, Sequence):\n        regexes = [to_regex(subterm) for subterm in term.terms]\n        return f\"{''.join(regexes)}\"\n    elif isinstance(term, QuantifyExact):\n        return f\"({to_regex(term.term)}){{{term.count}}}\"\n    elif isinstance(term, QuantifyMinimum):\n        return f\"({to_regex(term.term)}){{{term.min_count},}}\"\n    elif isinstance(term, QuantifyMaximum):\n        return f\"({to_regex(term.term)}){{,{term.max_count}}}\"\n    elif isinstance(term, QuantifyBetween):\n        return f\"({to_regex(term.term)}){{{term.min_count},{term.max_count}}}\"\n    else:\n        raise TypeError(\n            f\"Cannot convert object {repr(term)} to a regular expression.\"\n        )\n"
  },
  {
    "path": "outlines/types/json_schema_utils.py",
    "content": "\"\"\"Convert JSON Schema dicts to Python types.\"\"\"\n\nimport sys\nfrom dataclasses import dataclass, field\nfrom typing import Any, Dict, List, Literal, Optional\n\nfrom pydantic import BaseModel, create_model\n\nif sys.version_info >= (3, 12):  # pragma: no cover\n    from typing import _TypedDictMeta, TypedDict  # type: ignore\nelse:  # pragma: no cover\n    from typing_extensions import _TypedDictMeta, TypedDict  # type: ignore\n\n\ndef schema_type_to_python(\n    schema: dict,\n    caller_target_type: Literal[\"pydantic\", \"typeddict\", \"dataclass\"]\n) -> Any:\n    \"\"\"Get a Python type from a JSON Schema dict.\n\n    Parameters\n    ----------\n    schema: dict\n        The JSON Schema dict to convert to a Python type\n    caller_target_type: Literal[\"pydantic\", \"typeddict\", \"dataclass\"]\n        The type of the caller\n\n    Returns\n    -------\n    Any\n        The Python type\n\n    \"\"\"\n    if \"enum\" in schema:\n        values = schema[\"enum\"]\n        return Literal[tuple(values)]\n\n    t = schema.get(\"type\")\n\n    if t == \"string\":\n        return str\n    elif t == \"integer\":\n        return int\n    elif t == \"number\":\n        return float\n    elif t == \"boolean\":\n        return bool\n    elif t == \"array\":\n        items = schema.get(\"items\", {})\n        if items:\n            item_type = schema_type_to_python(items, caller_target_type)\n        else:\n            item_type = Any\n        return List[item_type]  # type: ignore\n    elif t == \"object\":\n        name = schema.get(\"title\")\n        if caller_target_type == \"pydantic\":\n            return json_schema_dict_to_pydantic(schema, name)\n        elif caller_target_type == \"typeddict\":\n            return json_schema_dict_to_typeddict(schema, name)\n        elif caller_target_type == \"dataclass\":\n            return json_schema_dict_to_dataclass(schema, name)\n\n    return Any\n\n\ndef json_schema_dict_to_typeddict(\n    schema: dict,\n    name: Optional[str] = None\n) -> _TypedDictMeta:\n    \"\"\"Convert a JSON Schema dict into a TypedDict class.\n\n    Parameters\n    ----------\n    schema: dict\n        The JSON Schema dict to convert to a TypedDict\n    name: Optional[str]\n        The name of the TypedDict\n\n    Returns\n    -------\n    _TypedDictMeta\n        The TypedDict class\n\n    \"\"\"\n    required = set(schema.get(\"required\", []))\n    properties = schema.get(\"properties\", {})\n\n    annotations: Dict[str, Any] = {}\n\n    for property, details in properties.items():\n        typ = schema_type_to_python(details, \"typeddict\")\n        if property not in required:\n            typ = Optional[typ]\n        annotations[property] = typ\n\n    return TypedDict(name or \"AnonymousTypedDict\", annotations)  # type: ignore\n\n\ndef json_schema_dict_to_pydantic(\n    schema: dict,\n    name: Optional[str] = None\n) -> type[BaseModel]:\n    \"\"\"Convert a JSON Schema dict into a Pydantic BaseModel class.\n\n    Parameters\n    ----------\n    schema: dict\n        The JSON Schema dict to convert to a Pydantic BaseModel\n    name: Optional[str]\n        The name of the Pydantic BaseModel\n\n    Returns\n    -------\n    type[BaseModel]\n        The Pydantic BaseModel class\n\n    \"\"\"\n    required = set(schema.get(\"required\", []))\n    properties = schema.get(\"properties\", {})\n\n    field_definitions: Dict[str, Any] = {}\n\n    for property, details in properties.items():\n        typ = schema_type_to_python(details, \"pydantic\")\n        if property not in required:\n            field_definitions[property] = (Optional[typ], None)\n        else:\n            field_definitions[property] = (typ, ...)\n\n    return create_model(name or \"AnonymousPydanticModel\", **field_definitions)\n\n\ndef json_schema_dict_to_dataclass(\n    schema: dict,\n    name: Optional[str] = None\n) -> type:\n    \"\"\"Convert a JSON Schema dict into a dataclass.\n\n    Parameters\n    ----------\n    schema: dict\n        The JSON Schema dict to convert to a dataclass\n    name: Optional[str]\n        The name of the dataclass\n\n    Returns\n    -------\n    type\n        The dataclass\n\n    \"\"\"\n    required = set(schema.get(\"required\", []))\n    properties = schema.get(\"properties\", {})\n\n    annotations: Dict[str, Any] = {}\n    defaults: Dict[str, Any] = {}\n\n    for property, details in properties.items():\n        typ = schema_type_to_python(details, \"dataclass\")\n        annotations[property] = typ\n\n        if property not in required:\n            defaults[property] = None\n\n    class_dict = {\n        '__annotations__': annotations,\n        '__module__': __name__,\n    }\n\n    for property, default_val in defaults.items():\n        class_dict[property] = field(default=default_val)\n\n    cls = type(name or \"AnonymousDataclass\", (), class_dict)\n    return dataclass(cls)\n"
  },
  {
    "path": "outlines/types/locale/__init__.py",
    "content": "\"\"\"Locale-specific regex patterns.\"\"\"\n\nfrom . import us\n\n__all__ = [\n    \"us\",\n]\n"
  },
  {
    "path": "outlines/types/locale/us.py",
    "content": "\"\"\"Locale-specific regex patterns for the United States.\"\"\"\n\nfrom outlines.types.dsl import Regex\n\nzip_code = Regex(r\"\\d{5}(?:-\\d{4})?\")\nphone_number = Regex(r\"(\\([0-9]{3}\\) |[0-9]{3}-)[0-9]{3}-[0-9]{4}\")\n"
  },
  {
    "path": "outlines/types/utils.py",
    "content": "\"\"\"Utility functions for the types module.\"\"\"\n\nimport dataclasses\nimport datetime\nimport inspect\nimport sys\nimport warnings\nfrom enum import Enum, EnumMeta\nfrom typing import (\n    Annotated,\n    Any,\n    Callable,\n    Dict,\n    Literal,\n    List,\n    NewType,\n    Tuple,\n    Union,\n    get_args,\n    get_origin,\n)\n\nfrom genson import SchemaBuilder\nfrom pydantic import BaseModel, create_model\n\nif sys.version_info >= (3, 12): # pragma: no cover\n    from typing import _TypedDictMeta  # type: ignore\nelse: # pragma: no cover\n    from typing_extensions import _TypedDictMeta  # type: ignore\n\n\n# Type identification\n\n\ndef is_int(value: Any) -> bool:\n    return (\n        value is int\n        or get_origin(value) is int\n        or (get_origin(value) is Annotated and get_args(value)[0] is int)\n        or (hasattr(value, \"__supertype__\") and value.__supertype__ is int)\n    )\n\n\ndef is_int_instance(value: Any) -> bool:\n    return isinstance(value, int) and not isinstance(value, bool)\n\n\ndef is_float(value: Any) -> bool:\n    return (\n        value is float\n        or get_origin(value) is float\n        or (get_origin(value) is Annotated and get_args(value)[0] is float)\n        or (hasattr(value, \"__supertype__\") and value.__supertype__ is float)\n    )\n\n\ndef is_float_instance(value: Any) -> bool:\n    return isinstance(value, float)\n\n\ndef is_str(value: Any) -> bool:\n    return (\n        value is str\n        or get_origin(value) is str\n        or (get_origin(value) is Annotated and get_args(value)[0] is str)\n        or (hasattr(value, \"__supertype__\") and value.__supertype__ is str)\n    )\n\n\ndef is_str_instance(value: Any) -> bool:\n    return isinstance(value, str)\n\n\ndef is_bool(value: Any) -> bool:\n    return (\n        value is bool\n        or get_origin(value) is bool\n        or (get_origin(value) is Annotated and get_args(value)[0] is bool)\n        or (hasattr(value, \"__supertype__\") and value.__supertype__ is bool)\n    )\n\n\ndef is_dict_instance(value: Any) -> bool:\n    return isinstance(value, dict)\n\n\ndef is_datetime(value: Any) -> bool:\n    return value is datetime.datetime or get_origin(value) is datetime.datetime\n\n\ndef is_date(value: Any) -> bool:\n    return value is datetime.date or get_origin(value) is datetime.date\n\n\ndef is_time(value: Any) -> bool:\n    return value is datetime.time or get_origin(value) is datetime.time\n\n\ndef is_native_dict(value: Any) -> bool:\n    return value is dict\n\n\ndef is_typing_dict(value: Any) -> bool:\n    return get_origin(value) is dict\n\n\ndef is_typing_list(value: Any) -> bool:\n    return get_origin(value) is list\n\n\ndef is_typing_tuple(value: Any) -> bool:\n    return get_origin(value) is tuple\n\n\ndef is_union(value: Any) -> bool:\n    return get_origin(value) is Union\n\n\ndef is_literal(value: Any) -> bool:\n    return get_origin(value) is Literal\n\n\ndef is_dataclass(value: Any) -> bool:\n    return isinstance(value, type) and dataclasses.is_dataclass(value)\n\n\ndef is_typed_dict(value: Any) -> bool:\n    return isinstance(value, _TypedDictMeta)\n\n\ndef is_pydantic_model(value):\n    # needed because generic type cannot be used with `issubclass`    # for Python versions < 3.11\n    if get_origin(value) is not None:\n        return False\n\n    return isinstance(value, type) and issubclass(value, BaseModel)\n\n\ndef is_genson_schema_builder(value: Any) -> bool:\n    return isinstance(value, SchemaBuilder)\n\n\ndef is_enum(value: Any) -> bool:\n    return isinstance(value, EnumMeta)\n\n\ndef is_callable(value: Any) -> bool:\n    return callable(value) and not isinstance(value, type)\n\n\n# Type conversion\n\n\ndef get_enum_from_literal(value) -> Enum:\n    return Enum(\n        value.__name__,\n        {str(arg): arg for arg in get_args(value)}\n    )\n\n\ndef get_enum_from_choice(value) -> Enum:\n    return Enum(\n        'Choice',\n        {str(item): item for item in value.items}\n    )\n\n\ndef get_schema_from_signature(fn: Callable) -> dict:\n    \"\"\"Turn a function signature into a JSON schema.\n\n    Every JSON object valid to the output JSON Schema can be passed\n    to `fn` using the ** unpacking syntax.\n\n    \"\"\"\n    signature = inspect.signature(fn)\n    arguments = {}\n    for name, arg in signature.parameters.items():\n        if arg.annotation == inspect._empty:\n            raise ValueError(\"Each argument must have a type annotation\")\n        else:\n            arguments[name] = (arg.annotation, ...)\n\n    try:\n        fn_name = fn.__name__\n    except Exception as e:\n        fn_name = \"Arguments\"\n        warnings.warn(\n            f\"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\\n{e}\",\n            category=UserWarning,\n        )\n    model = create_model(fn_name, **arguments)\n\n    return model.model_json_schema()\n\n\ndef get_schema_from_enum(myenum: type[Enum]) -> dict:\n    if len(myenum) == 0:\n        raise ValueError(\n            f\"Your enum class {myenum.__name__} has 0 members. If you are working with an enum of functions, do not forget to register them as callable (using `partial` for instance)\"\n        )\n    choices = [\n        get_schema_from_signature(elt.value.func)\n        if callable(elt.value)\n        else {\"const\": elt.value}\n        for elt in myenum\n    ]\n    schema = {\"title\": myenum.__name__, \"oneOf\": choices}\n    return schema\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools>=45\", \"setuptools_scm[toml]>=6.2\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"outlines\"\nauthors= [{name = \"Outlines Developers\"}]\ndescription = \"Probabilistic Generative Model Programming\"\nrequires-python = \">=3.10,<3.14\"\nlicense = {text = \"Apache-2.0\"}\nkeywords=[\n    \"machine learning\",\n    \"deep learning\",\n    \"language models\",\n    \"structured generation\",\n]\nclassifiers = [\n    \"Development Status :: 5 - Production/Stable\",\n    \"Intended Audience :: Developers\",\n    \"Intended Audience :: Information Technology\",\n    \"Intended Audience :: Science/Research\",\n    \"Operating System :: OS Independent\",\n    \"Programming Language :: Python :: 3\",\n    \"Topic :: Scientific/Engineering :: Artificial Intelligence\",\n]\ndependencies = [\n   \"jinja2\",\n   \"cloudpickle\",\n   \"diskcache\",\n   \"pydantic>=2.0\",\n   \"jsonschema\",\n   \"pillow\",\n   \"typing_extensions\",\n   \"outlines_core==0.2.14\",\n   \"genson\",\n   \"jsonpath_ng\",\n]\ndynamic = [\"version\"]\n\n[project.optional-dependencies]\nanthropic = [\"anthropic\"]\ndottxt = [\"dottxt\"]\ngemini = [\"google-genai\"]\nllamacpp = [\"huggingface-hub\", \"llama-cpp-python\", \"numba\"]\nmlxlm = [\"datasets\", \"mlx\", \"mlx-lm\"]\nlmstudio = [\"lmstudio\"]\nollama = [\"ollama\"]\nopenai = [\"openai\"]\nmistral = [\"mistralai\"]\nsglang = [\"openai\"]\ntgi = [\"huggingface_hub\"]\ntransformers = [\"accelerate\", \"datasets\", \"transformers\", \"setuptools\", \"sentencepiece\"]\nvllm = [\"openai\"]\nxgrammar = [\"xgrammar\"]\nllguidance = [\"llguidance\"]\nairports = [\"airportsdata\"]\ncountries = [\"iso3166\"]\ntest = [\n    \"pre-commit\",\n    \"pytest\",\n    \"pytest-benchmark\",\n    \"pytest-cov\",\n    \"pytest-mock\",\n    \"pytest-asyncio\",\n    \"coverage[toml]>=5.1\",\n    \"diff-cover\",\n    \"accelerate\",\n    \"beartype<0.16.0\",\n    \"responses\",\n    \"llama-cpp-python\",\n    \"mlx-lm>=0.19.2; platform_machine == 'arm64' and sys_platform == 'darwin'\",\n    \"huggingface_hub\",\n    \"openai>=1.0.0\",\n    \"datasets\",\n    \"anthropic\",\n    \"google-genai\",\n    \"mistralai\",\n    \"transformers\",\n    \"pillow\",\n    \"jax\",\n    \"flax\",\n    \"numpy>=2.0.0,<2.2.0\",\n    \"numba\",\n    \"torch>2.3.0\",\n    \"tensorflow\",\n    \"tf-keras\",\n    \"ollama\",\n    \"lmstudio\",\n    \"dottxt\",\n    \"sentencepiece\",\n    \"mkdocs_gen_files\",\n    \"llguidance\",\n    \"xgrammar\",\n    \"airportsdata\",\n    \"iso3166\",\n    \"requests\",\n]\n\n[dependency-groups]\n# Note: vllm is excluded from the lock file due to circular dependency with outlines-core.\n# For GPU testing, install vllm manually: pip install vllm\ntest-gpu = [\"outlines[test]\"]\n\n[project.urls]\nhomepage = \"https://github.com/dottxt-ai/outlines\"\ndocumentation = \"https://dottxt-ai.github.io/outlines/\"\nrepository = \"https://github.com/dottxt-ai/outlines\"\n\n[project.readme]\nfile=\"README.md\"\ncontent-type = \"text/markdown\"\n\n[tool.setuptools.packages.find]\ninclude = [\"outlines*\"]\n\n[tool.setuptools.package-data]\n\"outlines\" = [\"py.typed\", \"grammars/*.lark\"]\n\n[tool.setuptools_scm]\nwrite_to = \"outlines/_version.py\"\n\n[tool.pytest.ini_options]\ntestpaths = [\"tests\"]\nfilterwarnings = [\n    \"error\",\n    \"ignore::pydantic.warnings.PydanticDeprecatedSince20\",\n    \"ignore::FutureWarning:transformers.*\",\n    \"ignore::FutureWarning:huggingface_hub.*\",\n    \"ignore::UserWarning\",\n    \"ignore::DeprecationWarning:pyairports.*\",\n    \"ignore::DeprecationWarning:jax.*\",\n    \"ignore::DeprecationWarning:flax.*\",\n    \"ignore::DeprecationWarning:torch.*\",\n]\n\n[tool.mypy]\nexclude=[\"examples\"]\nenable_incomplete_feature = [\"Unpack\"]\n\n[[tool.mypy.overrides]]\nmodule = [\n    \"jax\",\n    \"jaxlib\",\n    \"jax.numpy\",\n    \"jinja2\",\n    \"jsonschema.*\",\n    \"anthropic.*\",\n    \"google.*\",\n    \"mistralai.*\",\n    \"mamba_ssm.*\",\n    \"mlx_lm.*\",\n    \"mlx.*\",\n    \"numpy.*\",\n    \"cloudpickle.*\",\n    \"diskcache.*\",\n    \"pydantic.*\",\n    \"pydantic_core.*\",\n    \"pytest\",\n    \"referencing.*\",\n    \"torch.*\",\n    \"transformers.*\",\n    \"llama_cpp\",\n    \"huggingface_hub\",\n    \"datasets.*\",\n    \"openai.*\",\n    \"requests.*\",\n    \"responses.*\",\n    \"vllm.*\",\n    \"iso3166.*\",\n    \"airportsdata.*\",\n    \"outlines_core.*\",\n    \"genson\",\n    \"lmstudio.*\",\n    \"ollama.*\",\n    \"dottxt.*\",\n    \"tensorflow\",\n    \"tensorflow.*\",\n    \"tf-keras\",\n    \"tf-keras.*\",\n    \"mkdocs_gen_files.*\",\n    \"jsonpath_ng.*\",\n    \"llguidance.*\",\n    \"xgrammar.*\",\n]\nignore_missing_imports = true\n\n[tool.coverage.run]\n# we omit the files that require a GPU or Apple Silicon\n# as well as the models that make API calls\nomit = [\n    \"outlines/_version.py\",\n    \"outlines/models/anthropic.py\",\n    \"outlines/models/dottxt.py\",\n    \"outlines/models/gemini.py\",\n    \"outlines/models/lmstudio.py\",\n    \"outlines/models/mlxlm.py\",\n    \"outlines/models/openai.py\",\n    \"outlines/models/mistral.py\",\n    \"outlines/models/vllm_offline.py\",\n    \"outlines/processors/tensor_adapters/mlx.py\",\n    \"tests/*\",\n]\nbranch = true\nrelative_files = true\n\n[tool.coverage.report]\nshow_missing = true\nexclude_lines = [\n    \"pragma: no cover\",\n    \"if TYPE_CHECKING:\",\n    \"\\\\.\\\\.\\\\.\",\n]\n\n[tool.diff_cover]\ncompare_branch = \"origin/main\"\ndiff_range_notation = \"..\"\n\n[tool.docformatter]\nstyle = \"numpy\"\nin-place = true\n\n[tool.ruff.lint]\nignore = [ \"E731\", \"F401\" ]\n"
  },
  {
    "path": "requirements-doc.txt",
    "content": "mkdocs\nmkdocs-material\nmkdocs-material[imaging]\nmkdocs-mermaid2-plugin\nmkdocs-section-index\nmkdocstrings[python]\nmkdocs-git-committers-plugin-2\nmkdocs-git-revision-date-localized-plugin\nmkdocs-redirects\nmkdocs-gen-files\nmkdocs-literate-nav\nmike\n"
  },
  {
    "path": "scripts/gen_ref_pages.py",
    "content": "\"\"\"Generate the API reference pages and navigation automatically.\n\nThis script is based on the `gen_ref_pages.py` script in the\n[mkdocstrings](https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages) project.\n\nTo exclude a file or module from being included in the generated API reference,\nadd a part of its path to the `EXCLUDED_FILES` list.\n\"\"\"\n\nfrom pathlib import Path\n\nimport mkdocs_gen_files\n\nCODEBASE_DIR_NAME = \"outlines\"\nOUTPUT_DIR_NAME = \"api_reference\"\nEXCLUDED_FILES = [\"_version\"]\n\n\nnav = mkdocs_gen_files.Nav()\nroot = Path(__file__).parent.parent\nsrc = root / CODEBASE_DIR_NAME\n\nfor path in sorted(src.rglob(\"*.py\")):\n    module_path = path.relative_to(src).with_suffix(\"\")\n    doc_path = path.relative_to(src).with_suffix(\".md\")\n    full_doc_path = Path(OUTPUT_DIR_NAME, doc_path)\n\n    parts = tuple(module_path.parts)\n\n    if any(part in EXCLUDED_FILES for part in parts):\n        continue\n\n    if parts[-1] == \"__init__\":\n        if len(parts) == 1:\n            doc_path = Path(\"index.md\")\n            full_doc_path = Path(OUTPUT_DIR_NAME, doc_path)\n            parts = (CODEBASE_DIR_NAME,)\n        else:\n            parts = parts[:-1]\n            doc_path = doc_path.with_name(\"index.md\")\n            full_doc_path = full_doc_path.with_name(\"index.md\")\n\n    nav[parts] = doc_path.as_posix()\n\n    with mkdocs_gen_files.open(full_doc_path, \"w\") as fd:\n        ident = \".\".join(parts)\n        if len(parts) == 1 and parts[0] == CODEBASE_DIR_NAME:\n            # For root module, just use the package name\n            fd.write(f\"::: {CODEBASE_DIR_NAME}\")\n        else:\n            fd.write(f\"::: {CODEBASE_DIR_NAME}.{ident}\")\n\n    mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root))\n\nwith mkdocs_gen_files.open(f\"{OUTPUT_DIR_NAME}/SUMMARY.md\", \"w\") as nav_file:\n    nav_file.writelines(nav.build_literate_nav())\n"
  },
  {
    "path": "setup.cfg",
    "content": "[flake8]\nmax-line-length = 88\nselect = C,E,F,W\nignore = E203,E231,E501,E741,W503,W504,C901,E731\nper-file-ignores =\n    **/__init__.py:F401,F403\nexclude =\n    normalai/_version.py\n"
  },
  {
    "path": "shell.nix",
    "content": "{ pkgs ? import <nixpkgs> { config = { allowUnfree = true; }; } }:\n\n(pkgs.buildFHSEnv {\n  name = \"dottxt-ai\";\n  targetPkgs = pkgs:\n    with pkgs; [\n      autoconf\n      binutils\n      cmake\n      cudatoolkit\n      curl\n      freeglut\n      gcc13\n      git\n      gitRepo\n      gnumake\n      gnupg\n      gperf\n      libGL\n      libGLU\n      linuxPackages.nvidia_x11\n      m4\n      ncurses5\n      procps\n      python311\n      stdenv.cc\n      unzip\n      util-linux\n      uv\n      xorg.libX11\n      xorg.libXext\n      xorg.libXi\n      xorg.libXmu\n      xorg.libXrandr\n      xorg.libXv\n      zlib\n    ];\n\n  multiPkgs = pkgs: with pkgs; [ zlib ];\n\n  runScript = \"bash\";\n\n  profile = ''\n    # CUDA paths\n    export CUDA_HOME=${pkgs.cudatoolkit}\n    export CUDA_PATH=${pkgs.cudatoolkit}\n\n    # Ensure proper binary paths are included\n    export PATH=${pkgs.gcc13}/bin:${pkgs.cudatoolkit}/bin:$PATH\n\n    # Set library paths, including additional directories for CUPTI\n    export LD_LIBRARY_PATH=${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/extras/CUPTI/lib64:${pkgs.linuxPackages.nvidia_x11}/lib:$LD_LIBRARY_PATH\n\n    # Add static library paths to EXTRA_LDFLAGS for the linker\n    export EXTRA_LDFLAGS=\"-L${pkgs.cudatoolkit}/lib64 -L${pkgs.cudatoolkit}/extras/CUPTI/lib64 -L${pkgs.linuxPackages.nvidia_x11}/lib -L${pkgs.cudatoolkit}/libdevice $EXTRA_LDFLAGS\"\n    export EXTRA_CCFLAGS=\"-I${pkgs.cudatoolkit}/include $EXTRA_CCFLAGS\"\n\n    # Set CMake paths\n    export CMAKE_PREFIX_PATH=${pkgs.cudatoolkit}:${pkgs.linuxPackages.nvidia_x11}:$CMAKE_PREFIX_PATH\n\n    # C++ and CC flags\n    export CXXFLAGS=\"--std=c++17 $EXTRA_CCFLAGS\"\n    export CC=${pkgs.gcc13}/bin/gcc\n    export CXX=${pkgs.gcc13}/bin/g++\n\n    # NVCC flags to use the right compiler\n    export NVCC_FLAGS=\"-ccbin ${pkgs.gcc13}/bin/gcc\"\n  '';\n\n  structuredAttrs__ = {\n    stdenv = pkgs.stdenv.overrideCC pkgs.stdenv.cc pkgs.gcc13;\n  };\n}).env\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/backends/test_backends.py",
    "content": "import outlines\nimport pytest\nimport transformers\n\nfrom outlines.backends import (\n    _get_backend,\n    get_json_schema_logits_processor,\n    get_regex_logits_processor,\n    get_cfg_logits_processor,\n)\nfrom outlines.backends.outlines_core import (\n    OutlinesCoreBackend,\n    OutlinesCoreLogitsProcessor,\n)\nfrom outlines.backends.llguidance import (\n    LLGuidanceBackend,\n    LLGuidanceLogitsProcessor\n)\nfrom outlines.backends.xgrammar import XGrammarBackend, XGrammarLogitsProcessor\n\n\n@pytest.fixture\ndef model():\n    return outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"erwanf/gpt2-mini\"),\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n\n@pytest.fixture\ndef json_schema():\n    return (\n        '{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, '\n        + '\"age\": {\"type\": \"integer\"}}, \"required\": [\"name\", \"age\"], '\n        + '\"additionalProperties\": false}'\n    )\n\n@pytest.fixture\ndef regex():\n    return r\"[0-9]{3}\"\n\n@pytest.fixture\ndef cfg_lark():\n    return \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\n\n@pytest.fixture\ndef cfg_ebnf():\n    return \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\n\n\ndef test_get_backend(model):\n    backend = _get_backend(\"outlines_core\", model)\n    assert isinstance(backend, OutlinesCoreBackend)\n\n    backend = _get_backend(\"xgrammar\", model)\n    assert isinstance(backend, XGrammarBackend)\n\n    backend = _get_backend(\"llguidance\", model)\n    assert isinstance(backend, LLGuidanceBackend)\n\n    with pytest.raises(ValueError, match=\"not supported\"):\n        _get_backend(\"not_supported\", model)\n\n\ndef test_get_json_schema_logits_processor(model, json_schema):\n    processor = get_json_schema_logits_processor(\"outlines_core\", model, json_schema)\n    assert isinstance(processor, OutlinesCoreLogitsProcessor)\n\n    processor = get_json_schema_logits_processor(\"llguidance\", model, json_schema)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n\n    processor = get_json_schema_logits_processor(\"xgrammar\", model, json_schema)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n\n\ndef test_get_regex_logits_processor(model, regex):\n    processor = get_regex_logits_processor(\"outlines_core\", model, regex)\n    assert isinstance(processor, OutlinesCoreLogitsProcessor)\n\n    processor = get_regex_logits_processor(\"llguidance\", model, regex)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n\n    processor = get_regex_logits_processor(\"xgrammar\", model, regex)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n\n\ndef test_get_cfg_logits_processor(model, cfg_lark, cfg_ebnf):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Outlines Core does not support context-free grammar.\"\n    ):\n        get_cfg_logits_processor(\"outlines_core\", model, cfg_lark)\n\n    processor = get_cfg_logits_processor(\"llguidance\", model, cfg_lark)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n\n    processor = get_cfg_logits_processor(\"xgrammar\", model, cfg_ebnf)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n"
  },
  {
    "path": "tests/backends/test_backends_utils.py",
    "content": "import torch\nimport numpy as np\n\n\ndef simulate_model_calling_processor(processor, tensor_library_name, vocabulary_size, eos_token_id, batch_size):\n    if tensor_library_name == \"torch\":\n        tensor_adapter = TorchTensorAdapter()\n    elif tensor_library_name == \"numpy\":\n        tensor_adapter = NumpyTensorAdapter()\n    elif tensor_library_name == \"mlx\":\n        tensor_adapter = MLXTensorAdapter()\n\n    processor.reset()\n    i = 0\n    input_ids = tensor_adapter.randint(0, vocabulary_size, (batch_size, 10))\n    while True:\n        i += 1\n        logits = tensor_adapter.randn((batch_size, vocabulary_size))\n        output = processor(input_ids, logits)\n        assert output.shape == (batch_size, vocabulary_size)\n        if all(input_ids[:, -1] == eos_token_id):\n            break\n        input_ids = tensor_adapter.add_token_inputs_ids(input_ids, output)\n        print(input_ids)\n        if i > 20:\n            break\n    return input_ids[:, 10:]\n\nclass TorchTensorAdapter():\n    def randn(self, shape):\n        return torch.randn(*shape)\n\n    def randint(self, low, high, size):\n        return torch.randint(low, high, size)\n\n    def add_token_inputs_ids(self, input_ids, logits):\n        next_token_ids = torch.argmax(logits, dim=-1)\n        input_ids = torch.cat([input_ids, next_token_ids.unsqueeze(-1)], dim=-1)\n        return input_ids\n\n\nclass NumpyTensorAdapter():\n    def randn(self, shape):\n        return np.random.randn(*shape)\n\n    def randint(self, low, high, size):\n        return np.random.randint(low, high, size)\n\n    def add_token_inputs_ids(self, input_ids, logits):\n        next_token_ids = np.argmax(logits, axis=-1)\n        print(\"next_token_ids\",next_token_ids)\n        input_ids = np.concatenate([input_ids, next_token_ids[..., None]], axis=-1)\n        return input_ids\n\n\nclass MLXTensorAdapter():\n    def __init__(self):\n        import mlx\n        self.mlx = mlx\n\n    def randn(self, shape):\n        return self.mlx.random.randn(*shape)\n\n    def randint(self, low, high, size):\n        return self.mlx.random.randint(low, high, size)\n\n    def add_token_inputs_ids(self, input_ids, logits):\n        next_token_ids = self.mlx.argmax(logits, axis=-1)\n        input_ids = self.mlx.concatenate([input_ids, next_token_ids[..., None]], axis=-1)\n        return input_ids\n"
  },
  {
    "path": "tests/backends/test_llguidance.py",
    "content": "import re\n\nimport llama_cpp\nimport llguidance\nimport pytest\nimport transformers\nfrom llguidance import LLTokenizer\n\nimport outlines\nfrom outlines.backends.llguidance import (\n    LLGuidanceBackend,\n    LLGuidanceLogitsProcessor\n)\nfrom tests.backends.test_backends_utils import simulate_model_calling_processor\n\ntry:\n    import mlx_lm\n    HAS_MLX = True\nexcept ImportError:\n    HAS_MLX = False\n\n\ndef model_transformers():\n    return outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"erwanf/gpt2-mini\"),\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n\ndef model_llamacpp():\n    return outlines.from_llamacpp(\n        llama_cpp.Llama.from_pretrained(\n            repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n            filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n            chat_format=\"qwen\",\n        )\n    )\n\ndef model_mlxlm():\n    return outlines.from_mlxlm(\n        *mlx_lm.load(\"mlx-community/SmolLM-135M-Instruct-4bit\")\n    )\n\n@pytest.fixture\ndef json_schema():\n    return (\n        '{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, '\n        + '\"age\": {\"type\": \"integer\"}}, \"required\": [\"name\", \"age\"], '\n        + '\"additionalProperties\": false}'\n    )\n\n@pytest.fixture\ndef regex():\n    return r\"[0-9]{3}\"\n\n@pytest.fixture\ndef cfg_lark():\n    return \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\n\n@pytest.fixture\ndef cfg_ebnf():\n    return \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\n\n\ndef test_llguidance_processor_torch(regex):\n    model = model_transformers()\n    tokenizer = model.tokenizer\n    hf_tokenizer = model.hf_tokenizer\n    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer\n    grammar_spec = llguidance.grammar_from(\"regex\", regex)\n    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, \"torch\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor,\n            \"torch\",\n            len(tokenizer.get_vocab()),\n            tokenizer.eos_token_id,\n            2\n        )\n        assert re.match(regex, hf_tokenizer.decode(input_ids[0]))\n        assert re.match(regex, hf_tokenizer.decode(input_ids[1]))\n\n\ndef test_llguidance_processor_numpy(regex):\n    model = model_llamacpp()\n    tokenizer = model.tokenizer\n    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer\n    grammar_spec = llguidance.grammar_from(\"regex\", regex)\n    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, \"numpy\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor,\n            \"numpy\",\n            len(tokenizer.vocabulary),\n            tokenizer.eos_token_id,\n            2\n        )\n        assert re.match(regex, tokenizer.decode(input_ids[0])[0])\n        assert re.match(regex, tokenizer.decode(input_ids[1])[0])\n\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_llguidance_processor_mlx(regex):\n    model = model_mlxlm()\n    tokenizer = model.mlx_tokenizer\n    llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer\n    grammar_spec = llguidance.grammar_from(\"regex\", regex)\n    processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, \"mlx\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor,\n            \"mlx\",\n            len(tokenizer.vocabulary),\n            tokenizer.eos_token_id,\n            2\n        )\n        assert re.match(regex, tokenizer.decode(input_ids[0]))\n        assert re.match(regex, tokenizer.decode(input_ids[1]))\n\n\nmodels = [\n    (model_transformers(), \"torch\"),\n    (model_llamacpp(), \"numpy\"),\n]\nif HAS_MLX:\n    models.append((model_mlxlm(), \"mlx\"))\n\n@pytest.mark.parametrize(\"model, tensor_library_name\", models)\ndef test_llguidance_backend(model, tensor_library_name, json_schema, regex, cfg_lark, cfg_ebnf):\n    # initialization\n    backend = LLGuidanceBackend(model)\n    assert isinstance(backend.llg_tokenizer, LLTokenizer)\n    assert backend.tensor_library_name == tensor_library_name\n\n    # json schema\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"llguidance\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert response[0] == \"{\"\n\n    # regex\n    processor = backend.get_regex_logits_processor(regex)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"llguidance\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert len(response) == 3\n    assert int(response)\n\n    # cfg lark\n    processor = backend.get_cfg_logits_processor(cfg_lark)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"llguidance\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert (\n        \"+\" in response\n        or \"-\" in response\n        or \"*\" in response\n        or \"/\" in response\n        or float(response.strip())\n    )\n\n    # cfg ebnf\n    processor = backend.get_cfg_logits_processor(cfg_ebnf)\n    assert isinstance(processor, LLGuidanceLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"llguidance\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert response == \"yes\" or response == \"no\"\n\n    # batch + multiple generations\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    generator = outlines.Generator(model, backend=\"llguidance\", processor=processor)\n    for _ in range(2):\n        if tensor_library_name == \"torch\":\n            response = generator.batch([\"Create a character\", \"Hello, how are you?\"], max_new_tokens=200)\n            assert len(response) == 2\n            for r in response:\n                assert r[0] == \"{\"\n        else:\n            response = generator(\"Create a character\", max_tokens=20)\n            assert response[0] == \"{\"\n"
  },
  {
    "path": "tests/backends/test_outlines_core.py",
    "content": "import re\n\nimport llama_cpp\nimport pytest\nimport transformers\nfrom outlines_core import Index, Vocabulary\n\nimport outlines\nfrom outlines.backends.outlines_core import (\n    OutlinesCoreBackend,\n    OutlinesCoreLogitsProcessor,\n)\nfrom tests.backends.test_backends_utils import simulate_model_calling_processor\n\ntry:\n    import mlx_lm\n\n    HAS_MLX = True\nexcept ImportError:\n    HAS_MLX = False\n\n\ndef model_transformers():\n    return outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"erwanf/gpt2-mini\"),\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n\n\ndef model_llamacpp():\n    return outlines.from_llamacpp(\n        llama_cpp.Llama.from_pretrained(\n            repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n            filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n            chat_format=\"qwen\",\n        )\n    )\n\n\ndef model_mlxlm():\n    return outlines.from_mlxlm(*mlx_lm.load(\"mlx-community/SmolLM-135M-Instruct-4bit\"))\n\n\n@pytest.fixture\ndef json_schema():\n    return (\n        '{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, '\n        + '\"age\": {\"type\": \"integer\"}}, \"required\": [\"name\", \"age\"], '\n        + '\"additionalProperties\": false}'\n    )\n\n\n@pytest.fixture\ndef regex():\n    return r\"[0-9]{3}\"\n\n\n@pytest.fixture\ndef cfg():\n    return \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\n\n\ndef test_outlines_core_processor_torch(regex):\n    model = model_transformers()\n    tokenizer = model.tokenizer\n    hf_tokenizer = model.hf_tokenizer\n    backend = OutlinesCoreBackend(model)\n    index = Index(regex, backend.vocabulary)\n    processor = OutlinesCoreLogitsProcessor(index, \"torch\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor, \"torch\", len(tokenizer.get_vocab()), tokenizer.eos_token_id, 2\n        )\n        assert re.match(regex, hf_tokenizer.decode(input_ids[0]))\n        assert re.match(regex, hf_tokenizer.decode(input_ids[1]))\n\n\ndef test_outlines_core_processor_numpy(regex):\n    model = model_llamacpp()\n    tokenizer = model.tokenizer\n    backend = OutlinesCoreBackend(model)\n    index = Index(regex, backend.vocabulary)\n    processor = OutlinesCoreLogitsProcessor(index, \"numpy\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor, \"numpy\", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2\n        )\n        assert re.match(regex, tokenizer.decode(input_ids[0])[0])\n        assert re.match(regex, tokenizer.decode(input_ids[1])[0])\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_outlines_core_processor_mlx():\n    model = model_mlxlm()\n    tokenizer = model.mlx_tokenizer\n    backend = OutlinesCoreBackend(model)\n    index = Index(r\"[0-9]{3}\", backend.vocabulary)\n    processor = OutlinesCoreLogitsProcessor(index, \"mlx\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor, \"mlx\", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2\n        )\n        assert re.match(regex, tokenizer.decode(input_ids[0]))\n        assert re.match(regex, tokenizer.decode(input_ids[1]))\n\n\ndef test_create_vocabulary_preserves_duplicate_token_ids():\n    vocab = {\n        \"hello\": 1,\n        \"world\": 2,\n        \"<0x20>\": 3,\n        \"▁\": 4,\n    }\n\n    def token_to_str(token):\n        if token in (\"<0x20>\", \"▁\"):\n            return \" \"\n        return token\n\n    vocabulary = OutlinesCoreBackend.create_outlines_core_vocabulary(\n        vocab=vocab,\n        eos_token_id=0,\n        eos_token=\"hello\",\n        token_to_str=token_to_str,\n    )\n\n    # 4 original IDs - 1 popped (hello) + 1 EOS added by Vocabulary = 4\n    assert len(vocabulary) == 4\n\n\nmodels = [\n    (model_transformers(), \"torch\"),\n    (model_llamacpp(), \"numpy\"),\n]\nif HAS_MLX:\n    models.append((model_mlxlm(), \"mlx\"))\n\n\n@pytest.mark.parametrize(\"model, tensor_library_name\", models)\ndef test_outlines_core_backend(model, tensor_library_name, json_schema, regex, cfg):\n    # initialization\n    backend = OutlinesCoreBackend(model)\n    assert isinstance(backend.vocabulary, Vocabulary)\n    assert backend.tensor_library_name == tensor_library_name\n\n    # json schema\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    assert isinstance(processor, OutlinesCoreLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"outlines_core\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert \"name\" in response\n\n    # regex\n    processor = backend.get_regex_logits_processor(regex)\n    assert isinstance(processor, OutlinesCoreLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"outlines_core\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert len(response) == 3\n    assert int(response)\n\n    # cfg\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Outlines Core does not support context-free grammar.\",\n    ):\n        backend.get_cfg_logits_processor(cfg)\n\n    # batch + multiple generations\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    generator = outlines.Generator(model, backend=\"outlines_core\", processor=processor)\n    for _ in range(2):\n        if tensor_library_name == \"torch\":\n            response = generator.batch(\n                [\"Create a character\", \"Hello, how are you?\"], max_new_tokens=200\n            )\n            assert len(response) == 2\n            for r in response:\n                assert r[0] == \"{\"\n                assert \"name\" in r\n        else:\n            response = generator(\"Create a character\", max_tokens=20)\n            assert response[0] == \"{\"\n            assert \"name\" in response\n"
  },
  {
    "path": "tests/backends/test_xgrammar.py",
    "content": "import re\n\nimport llama_cpp\nimport outlines\nimport pytest\nimport transformers\nfrom xgrammar import GrammarCompiler, TokenizerInfo\n\nfrom outlines.backends.xgrammar import XGrammarBackend, XGrammarLogitsProcessor\nfrom tests.backends.test_backends_utils import simulate_model_calling_processor\n\ntry:\n    import mlx_lm\n    HAS_MLX = True\nexcept ImportError:\n    HAS_MLX = False\n\n\ndef model_transformers():\n    return outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"erwanf/gpt2-mini\"),\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n\ndef model_llamacpp():\n    return outlines.from_llamacpp(\n        llama_cpp.Llama.from_pretrained(\n            repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n            filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n            chat_format=\"qwen\",\n        )\n    )\n\ndef model_mlxlm():\n    return outlines.from_mlxlm(\n        *mlx_lm.load(\"mlx-community/SmolLM-135M-Instruct-4bit\")\n    )\n\n@pytest.fixture\ndef tokenizer_info():\n    tokenizer = model_transformers().hf_tokenizer\n    tokenizer_info = TokenizerInfo.from_huggingface(\n        tokenizer,\n        vocab_size=len(tokenizer.get_vocab())\n    )\n    return tokenizer_info\n\n@pytest.fixture\ndef json_schema():\n    return (\n        '{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, '\n        + '\"age\": {\"type\": \"integer\"}}, \"required\": [\"name\", \"age\"], '\n        + '\"additionalProperties\": false}'\n    )\n\n@pytest.fixture\ndef regex():\n    return r\"[0-9]{3}\"\n\n@pytest.fixture\ndef cfg():\n    return \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\n\n\ndef test_xgr_processor_torch(regex):\n    model = model_transformers()\n    tokenizer = model.tokenizer\n    hf_tokenizer = model.hf_tokenizer\n    tokenizer_info = TokenizerInfo.from_huggingface(\n        hf_tokenizer,\n        vocab_size=len(hf_tokenizer.get_vocab())\n    )\n    grammar_compiler = GrammarCompiler(tokenizer_info)\n    compiled_grammar = grammar_compiler.compile_regex(regex)\n    processor = XGrammarLogitsProcessor(compiled_grammar, \"torch\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor,\n            \"torch\",\n            len(tokenizer.get_vocab()),\n            tokenizer.eos_token_id,\n            2\n        )\n        assert re.match(regex, hf_tokenizer.decode(input_ids[0]))\n        assert re.match(regex, hf_tokenizer.decode(input_ids[1]))\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_xgr_processor_mlx(tokenizer_info):\n    model = model_mlxlm()\n    tokenizer = model.mlx_tokenizer\n    tokenizer_info = TokenizerInfo.from_huggingface(\n        tokenizer,\n        vocab_size=len(tokenizer.get_vocab())\n    )\n    grammar_compiler = GrammarCompiler(tokenizer_info)\n    compiled_grammar = grammar_compiler.compile_regex(regex)\n    processor = XGrammarLogitsProcessor(compiled_grammar, \"mlx\")\n    for _ in range(2):\n        input_ids = simulate_model_calling_processor(\n            processor,\n            \"mlx\",\n            len(tokenizer.get_vocab()),\n            tokenizer.eos_token_id,\n            2\n        )\n        assert re.match(regex, tokenizer.decode(input_ids[0]))\n        assert re.match(regex, tokenizer.decode(input_ids[1]))\n\n\nmodels = [(model_transformers(), \"torch\")]\nif HAS_MLX:\n    models.append((model_mlxlm(), \"mlx\"))\n\n@pytest.mark.parametrize(\"model, tensor_library_name\", models)\ndef test_xgrammar_backend(model, tensor_library_name, json_schema, regex, cfg):\n    # initialization\n    backend = XGrammarBackend(model)\n    assert isinstance(backend.grammar_compiler, GrammarCompiler)\n\n    # json schema\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"xgrammar\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert response[0] == \"{\"\n    assert \"name\" in response\n\n    # regex\n    processor = backend.get_regex_logits_processor(regex)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"xgrammar\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert len(response) == 3\n    assert int(response)\n\n    # cfg\n    processor = backend.get_cfg_logits_processor(cfg)\n    assert isinstance(processor, XGrammarLogitsProcessor)\n    generator = outlines.Generator(model, backend=\"xgrammar\", processor=processor)\n    response = generator(\"Hello, how are you?\")\n    assert response == \"yes\" or response == \"no\"\n\n    # batch + multiple generations\n    processor = backend.get_json_schema_logits_processor(json_schema)\n    generator = outlines.Generator(model, backend=\"xgrammar\", processor=processor)\n    for _ in range(2):\n        if tensor_library_name == \"torch\":\n            response = generator.batch([\"Create a character\", \"Hello, how are you?\"], max_new_tokens=200)\n            assert len(response) == 2\n            for r in response:\n                assert r[0] == \"{\"\n                assert \"name\" in r\n        else:\n            response = generator(\"Create a character\", max_tokens=20)\n            assert response[0] == \"{\"\n            assert \"name\" in response\n\n\ndef test_xgrammar_backend_invalid_model():\n    with pytest.raises(\n        ValueError,\n        match=\"The xgrammar backend only supports Transformers and MLXLM models\",\n    ):\n        XGrammarBackend(model_llamacpp())\n"
  },
  {
    "path": "tests/cfg_samples/arithmetic/lots_of_ops.arithmetic.test",
    "content": "5+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1\n"
  },
  {
    "path": "tests/cfg_samples/arithmetic/simple_math.arithmetic.test",
    "content": "(1 * 2) - (0.1 * 2 * 9.42)\n"
  },
  {
    "path": "tests/cfg_samples/json/outlines.generate.samplers.mypy.json.test",
    "content": "{\n    \".class\": \"MypyFile\",\n    \"_fullname\": \"outlines.generate.samplers\",\n    \"future_import_flags\": [],\n    \"is_partial_stub_package\": false,\n    \"is_stub\": false,\n    \"names\": {\n        \".class\": \"SymbolTable\",\n        \"Protocol\": {\n            \".class\": \"SymbolTableNode\",\n            \"cross_ref\": \"typing.Protocol\",\n            \"kind\": \"Gdef\"\n        },\n        \"Sampler\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"TypeInfo\",\n                \"_promote\": [],\n                \"abstract_attributes\": [\n                    [\n                        \"__call__\",\n                        2\n                    ]\n                ],\n                \"alt_promote\": null,\n                \"bases\": [\n                    \"builtins.object\"\n                ],\n                \"dataclass_transform_spec\": null,\n                \"declared_metaclass\": null,\n                \"defn\": {\n                    \".class\": \"ClassDef\",\n                    \"fullname\": \"outlines.generate.samplers.Sampler\",\n                    \"name\": \"Sampler\",\n                    \"type_vars\": []\n                },\n                \"deletable_attributes\": [],\n                \"flags\": [\n                    \"is_abstract\",\n                    \"is_protocol\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.Sampler\",\n                \"has_param_spec_type\": false,\n                \"metaclass_type\": \"abc.ABCMeta\",\n                \"metadata\": {},\n                \"module_name\": \"outlines.generate.samplers\",\n                \"mro\": [\n                    \"outlines.generate.samplers.Sampler\",\n                    \"builtins.object\"\n                ],\n                \"names\": {\n                    \".class\": \"SymbolTable\",\n                    \"__call__\": {\n                        \".class\": \"SymbolTableNode\",\n                        \"kind\": \"Mdef\",\n                        \"node\": {\n                            \".class\": \"FuncDef\",\n                            \"abstract_status\": 2,\n                            \"arg_kinds\": [\n                                0,\n                                0,\n                                0,\n                                0\n                            ],\n                            \"arg_names\": [\n                                \"self\",\n                                \"logits\",\n                                \"samples\",\n                                \"rng\"\n                            ],\n                            \"dataclass_transform_spec\": null,\n                            \"flags\": [\n                                \"is_trivial_body\"\n                            ],\n                            \"fullname\": \"outlines.generate.samplers.Sampler.__call__\",\n                            \"name\": \"__call__\",\n                            \"type\": {\n                                \".class\": \"CallableType\",\n                                \"arg_kinds\": [\n                                    0,\n                                    0,\n                                    0,\n                                    0\n                                ],\n                                \"arg_names\": [\n                                    \"self\",\n                                    \"logits\",\n                                    \"samples\",\n                                    \"rng\"\n                                ],\n                                \"arg_types\": [\n                                    \"outlines.generate.samplers.Sampler\",\n                                    {\n                                        \".class\": \"AnyType\",\n                                        \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                                        \"source_any\": null,\n                                        \"type_of_any\": 3\n                                    },\n                                    \"builtins.int\",\n                                    {\n                                        \".class\": \"AnyType\",\n                                        \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                                        \"source_any\": null,\n                                        \"type_of_any\": 3\n                                    }\n                                ],\n                                \"bound_args\": [],\n                                \"def_extras\": {\n                                    \"first_arg\": \"self\"\n                                },\n                                \"fallback\": \"builtins.function\",\n                                \"from_concatenate\": false,\n                                \"implicit\": false,\n                                \"is_ellipsis_args\": false,\n                                \"name\": \"__call__ of Sampler\",\n                                \"ret_type\": {\n                                    \".class\": \"AnyType\",\n                                    \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                                    \"source_any\": null,\n                                    \"type_of_any\": 3\n                                },\n                                \"type_guard\": null,\n                                \"unpack_kwargs\": false,\n                                \"variables\": []\n                            }\n                        }\n                    }\n                },\n                \"self_type\": null,\n                \"slots\": null,\n                \"tuple_type\": null,\n                \"type_vars\": [],\n                \"typeddict_type\": null\n            }\n        },\n        \"__annotations__\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_ready\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.__annotations__\",\n                \"name\": \"__annotations__\",\n                \"type\": {\n                    \".class\": \"Instance\",\n                    \"args\": [\n                        \"builtins.str\",\n                        {\n                            \".class\": \"AnyType\",\n                            \"missing_import_name\": null,\n                            \"source_any\": null,\n                            \"type_of_any\": 6\n                        }\n                    ],\n                    \"type_ref\": \"builtins.dict\"\n                }\n            }\n        },\n        \"__doc__\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_ready\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.__doc__\",\n                \"name\": \"__doc__\",\n                \"type\": \"builtins.str\"\n            }\n        },\n        \"__file__\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_ready\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.__file__\",\n                \"name\": \"__file__\",\n                \"type\": \"builtins.str\"\n            }\n        },\n        \"__name__\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_ready\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.__name__\",\n                \"name\": \"__name__\",\n                \"type\": \"builtins.str\"\n            }\n        },\n        \"__package__\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_ready\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.__package__\",\n                \"name\": \"__package__\",\n                \"type\": \"builtins.str\"\n            }\n        },\n        \"greedy\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"FuncDef\",\n                \"abstract_status\": 0,\n                \"arg_kinds\": [\n                    0,\n                    0,\n                    2\n                ],\n                \"arg_names\": [\n                    \"logits\",\n                    \"samples\",\n                    \"_\"\n                ],\n                \"dataclass_transform_spec\": null,\n                \"flags\": [],\n                \"fullname\": \"outlines.generate.samplers.greedy\",\n                \"name\": \"greedy\",\n                \"type\": {\n                    \".class\": \"CallableType\",\n                    \"arg_kinds\": [\n                        0,\n                        0,\n                        2\n                    ],\n                    \"arg_names\": [\n                        \"logits\",\n                        \"samples\",\n                        \"_\"\n                    ],\n                    \"arg_types\": [\n                        {\n                            \".class\": \"AnyType\",\n                            \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                            \"source_any\": null,\n                            \"type_of_any\": 3\n                        },\n                        \"builtins.int\",\n                        {\n                            \".class\": \"AnyType\",\n                            \"missing_import_name\": null,\n                            \"source_any\": null,\n                            \"type_of_any\": 1\n                        }\n                    ],\n                    \"bound_args\": [],\n                    \"def_extras\": {\n                        \"first_arg\": null\n                    },\n                    \"fallback\": \"builtins.function\",\n                    \"from_concatenate\": false,\n                    \"implicit\": false,\n                    \"is_ellipsis_args\": false,\n                    \"name\": \"greedy\",\n                    \"ret_type\": {\n                        \".class\": \"AnyType\",\n                        \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                        \"source_any\": null,\n                        \"type_of_any\": 3\n                    },\n                    \"type_guard\": null,\n                    \"unpack_kwargs\": false,\n                    \"variables\": []\n                }\n            }\n        },\n        \"multinomial\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"FuncDef\",\n                \"abstract_status\": 0,\n                \"arg_kinds\": [\n                    0,\n                    0,\n                    0\n                ],\n                \"arg_names\": [\n                    \"logits\",\n                    \"samples\",\n                    \"rng\"\n                ],\n                \"dataclass_transform_spec\": null,\n                \"flags\": [],\n                \"fullname\": \"outlines.generate.samplers.multinomial\",\n                \"name\": \"multinomial\",\n                \"type\": {\n                    \".class\": \"CallableType\",\n                    \"arg_kinds\": [\n                        0,\n                        0,\n                        0\n                    ],\n                    \"arg_names\": [\n                        \"logits\",\n                        \"samples\",\n                        \"rng\"\n                    ],\n                    \"arg_types\": [\n                        {\n                            \".class\": \"AnyType\",\n                            \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                            \"source_any\": null,\n                            \"type_of_any\": 3\n                        },\n                        \"builtins.int\",\n                        {\n                            \".class\": \"AnyType\",\n                            \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                            \"source_any\": null,\n                            \"type_of_any\": 3\n                        }\n                    ],\n                    \"bound_args\": [],\n                    \"def_extras\": {\n                        \"first_arg\": null\n                    },\n                    \"fallback\": \"builtins.function\",\n                    \"from_concatenate\": false,\n                    \"implicit\": false,\n                    \"is_ellipsis_args\": false,\n                    \"name\": \"multinomial\",\n                    \"ret_type\": {\n                        \".class\": \"AnyType\",\n                        \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                        \"source_any\": null,\n                        \"type_of_any\": 3\n                    },\n                    \"type_guard\": null,\n                    \"unpack_kwargs\": false,\n                    \"variables\": []\n                }\n            }\n        },\n        \"torch\": {\n            \".class\": \"SymbolTableNode\",\n            \"kind\": \"Gdef\",\n            \"node\": {\n                \".class\": \"Var\",\n                \"flags\": [\n                    \"is_suppressed_import\",\n                    \"is_ready\",\n                    \"is_inferred\"\n                ],\n                \"fullname\": \"outlines.generate.samplers.torch\",\n                \"name\": \"torch\",\n                \"type\": {\n                    \".class\": \"AnyType\",\n                    \"missing_import_name\": \"outlines.generate.samplers.torch\",\n                    \"source_any\": null,\n                    \"type_of_any\": 3\n                }\n            }\n        }\n    },\n    \"path\": \"/home/andrew/p/outlines/outlines/generate/samplers.py\"\n}\n"
  },
  {
    "path": "tests/cfg_samples/json/simple_fruit.json.test",
    "content": "[\n    {\n        \"ID\": \"1\",\n        \"Name\": \"Andrew \\\"The Escaper\\\" Lapp\",\n        \"Age\": \"30\",\n        \"FavFruit\": \"Banana\"\n    },\n    {\n        \"ID\": \"2\",\n        \"Name\": \"Mohammad\",\n        \"Age\": \"40\",\n        \"FavFruit\": \"\\\"Any Fruit As Long as It's In Quotes!\\\"\"\n    },\n    {\n        \"ID\": \"3\",\n        \"Name\": \"Alice\",\n        \"Age\": \"61\",\n        \"FavFruit\": \"Peaches, but only \\n newline separated peaches\"\n    }\n]\n"
  },
  {
    "path": "tests/cfg_samples/json/simple_fruit_no_indent.json.test",
    "content": "[{\"ID\": \"1\", \"Name\": \"Andrew\", \"Age\": \"30\", \"FavFruit\": \"Banana\"}, {\"ID\": \"2\", \"Name\": \"Mohammad\", \"Age\": \"40\", \"FavFruit\": \"Apple\"}, {\"ID\": \"3\", \"Name\": \"Alice\", \"Age\": \"61\", \"FavFruit\": \"Peach\"}]\n"
  },
  {
    "path": "tests/conftest.py",
    "content": "import sys\n\nimport pytest\n\n\ndef pytest_collection_modifyitems(config, items):\n    if sys.platform != \"linux\":\n        if not config.option.keyword or (\n            config.option.keyword and \"test_integration_vllm\" in config.option.keyword\n        ):\n            print(\n                \"WARNING: test_integration_vllm tests are skipped because vLLM only supports Linux platform (including WSL).\"\n            )\n        skip_vllm = pytest.mark.skip(reason=\"vLLM models can only be run on Linux.\")\n        for item in items:\n            if \"test_integration_vllm\" in item.nodeid:\n                item.add_marker(skip_vllm)\n"
  },
  {
    "path": "tests/models/test_anthopic_type_adapter.py",
    "content": "import io\nimport pytest\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.anthropic import AnthropicTypeAdapter\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef adapter():\n    return AnthropicTypeAdapter()\n\n\ndef test_anthropic_type_adapter_input_text(adapter):\n    message = \"prompt\"\n    result = adapter.format_input(message)\n    assert result == {\"messages\": [{\"role\": \"user\", \"content\": message}]}\n\n\ndef test_anthropic_type_adapter_input_vision(adapter, image):\n    image_input = Image(image)\n    text_input = \"hello\"\n    result = adapter.format_input([text_input, image_input])\n    assert result == {\n        \"messages\": [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image\",\n                        \"source\": {\n                            \"type\": \"base64\",\n                            \"media_type\": \"image/png\",\n                            \"data\": image_input.image_str,\n                        },\n                    },\n                    {\"type\": \"text\", \"text\": text_input},\n                ],\n            },\n        ]\n    }\n\n\ndef test_anthropic_type_adapter_input_chat(adapter, image):\n    image_input = Image(image)\n    model_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": [\n            \"hello\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = adapter.format_input(model_input)\n    assert result == {\n        \"messages\": [\n            {\"role\": \"system\", \"content\": \"prompt\"},\n            {\"role\": \"user\", \"content\": [\n                {\n                    \"type\": \"image\",\n                    \"source\": {\n                        \"type\": \"base64\",\n                        \"media_type\": \"image/png\",\n                        \"data\": image_input.image_str,\n                    },\n                },\n                {\"type\": \"text\", \"text\": \"hello\"},\n            ]},\n            {\"role\": \"assistant\", \"content\": \"response\"},\n        ]\n    }\n\n\ndef test_anthropic_type_adapter_input_invalid(adapter):\n    @dataclass\n    class Audio:\n        file: str\n\n    with pytest.raises(TypeError, match=\"is not available with Anthropic\"):\n        _ = adapter.format_input(Audio(\"file\"))\n\n    with pytest.raises(\n        ValueError,\n        match=\"All assets provided must be of type Image\",\n    ):\n        _ = adapter.format_input([\"prompt\", Audio(\"file\")])\n\n    with pytest.raises(\n        ValueError,\n        match=\"The content must be a string or a list\",\n    ):\n        _ = adapter.format_input(\n            Chat(messages=[{\"role\": \"user\", \"content\": {\"foo\": \"bar\"}}])\n        )\n\n\ndef test_anthropic_type_adapter_output(adapter):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"is not available with Anthropic\"\n    ):\n        adapter.format_output_type(str)\n"
  },
  {
    "path": "tests/models/test_anthropic.py",
    "content": "import io\nfrom typing import Generator\n\nfrom anthropic import Anthropic as AnthropicClient\nfrom PIL import Image as PILImage\nimport pytest\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models.anthropic import Anthropic\n\n\nMODEL_NAME = \"claude-3-haiku-20240307\"\n\n\n@pytest.fixture(scope=\"session\")\ndef model():\n    return Anthropic(AnthropicClient(), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef model_no_model_name():\n    return Anthropic(AnthropicClient())\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_init_from_client():\n    client = AnthropicClient()\n\n    # With model name\n    model = outlines.from_anthropic(client, MODEL_NAME)\n    assert isinstance(model, Anthropic)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n    # Without model name\n    model = outlines.from_anthropic(client)\n    assert isinstance(model, Anthropic)\n    assert model.client == client\n    assert model.model_name is None\n\n\ndef test_anthropic_wrong_inference_parameters():\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        model = Anthropic(AnthropicClient(), MODEL_NAME)\n        model.generate(\"prompt\", foo=10, max_tokens=1024)\n\n\ndef test_anthropic_wrong_input_type(image):\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model = Anthropic(AnthropicClient(), MODEL_NAME)\n        model.generate(Foo(\"prompt\"))\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        model.generate([\"foo?\", Image(image), Video(\"\")])\n\n\ndef test_anthropic_wrong_output_type():\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(NotImplementedError, match=\"is not available\"):\n        model = Anthropic(AnthropicClient(), MODEL_NAME)\n        model.generate(\"prompt\", Foo(1))\n\n\n@pytest.mark.api_call\ndef test_anthropic_simple_call(model):\n    result = model.generate(\"Respond with one word. Not more.\", max_tokens=1024)\n    assert isinstance(result, str)\n\n\n@pytest.mark.xfail(reason=\"Anthropic requires the `max_tokens` parameter to be set\")\n@pytest.mark.api_call\ndef test_anthropic_direct_call(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model_name=MODEL_NAME,\n        max_tokens=1024,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_anthropic_simple_vision(model, image):\n    result = model.generate(\n        [\n            \"What does this logo represent?\",\n            Image(image),\n        ],\n        max_tokens=1024,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_anthropic_chat(model, image):\n    result = model.generate(Chat(messages=[\n        {\"role\": \"assistant\", \"content\": \"How can I help you today?\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]), max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_anthopic_streaming(model):\n    result = model.stream(\"Respond with one word. Not more.\", max_tokens=1024)\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_anthropic_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n            max_tokens=1024,\n        )\n"
  },
  {
    "path": "tests/models/test_dottxt.py",
    "content": "import json\nimport os\n\nimport pytest\nfrom dottxt.client import Dottxt as DottxtClient\nfrom pydantic import BaseModel\n\nimport outlines\nfrom outlines import Generator\nfrom outlines.models.dottxt import Dottxt\n\n\nMODEL_NAME = \"dottxt/dottxt-v1-alpha\"\nMODEL_REVISION = \"d06c86726aadd8dadb92c5b9b9e3ce8ef246c471\"\n\n\nclass User(BaseModel):\n    first_name: str\n    last_name: str\n    user_id: int\n\n\n@pytest.fixture(scope=\"session\")\ndef api_key():\n    \"\"\"Get the Dottxt API key from the environment, providing a default value\n    if not found.\n\n    This fixture should be used for tests that do not make actual api calls,\n    but still require to initialize the Dottxt client.\n\n    \"\"\"\n    api_key = os.getenv(\"DOTTXT_API_KEY\")\n    if not api_key:\n        return \"MOCK_API_KEY\"\n    return api_key\n\n\n@pytest.fixture(scope=\"session\")\ndef model_name_and_revision(api_key):\n    client = DottxtClient(api_key=api_key)\n    model_list = client.list_models()\n    return (model_list[0].name, model_list[0].revision)\n\n\n@pytest.fixture(scope=\"session\")\ndef model(api_key, model_name_and_revision):\n    client = DottxtClient(api_key=api_key)\n    return Dottxt(\n        client,\n        model_name_and_revision[0],\n        model_name_and_revision[1],\n    )\n\n\n@pytest.fixture(scope=\"session\")\ndef model_no_model_name(api_key):\n    client = DottxtClient(api_key=api_key)\n    return Dottxt(client)\n\n\n@pytest.mark.api_call\ndef test_dottxt_init_from_client(api_key, model_name_and_revision):\n    client = DottxtClient(api_key=api_key)\n\n    # Without model name\n    model = outlines.from_dottxt(client)\n    assert isinstance(model, Dottxt)\n    assert model.client == client\n    assert model.model_name is None\n\n    # With model name\n    model = outlines.from_dottxt(\n        client,\n        model_name_and_revision[0],\n        model_name_and_revision[1],\n    )\n    assert isinstance(model, Dottxt)\n    assert model.client == client\n    assert model.model_name == model_name_and_revision[0]\n    assert model.model_revision == model_name_and_revision[1]\n\n\ndef test_dottxt_wrong_output_type(model_no_model_name):\n    with pytest.raises(TypeError, match=\"You must provide an output type\"):\n        model_no_model_name(\"prompt\")\n\n\ndef test_dottxt_wrong_input_type(model_no_model_name):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model_no_model_name([\"prompt\"], User)\n\n\n@pytest.mark.api_call\ndef test_dottxt_wrong_inference_parameters(model_no_model_name):\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        model_no_model_name(\"prompt\", User, foo=10)\n\n\n@pytest.mark.api_call\ndef test_dottxt_direct_pydantic_call(model_no_model_name):\n    result = model_no_model_name(\"Create a user\", User)\n    assert \"first_name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_dottxt_direct_jsonschema_call(\n    model_no_model_name, model_name_and_revision\n):\n    result = model_no_model_name(\n        \"Create a user\",\n        User,\n        model_name=model_name_and_revision[0],\n        model_revision=model_name_and_revision[1],\n    )\n    assert \"first_name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_dottxt_generator_pydantic_call(model):\n    generator = Generator(model, User)\n    result = generator(\"Create a user\")\n    assert \"first_name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_dottxt_streaming(model):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Dottxt does not support streaming\"\n    ):\n        model.stream(\"Create a user\", User)\n\n\n@pytest.mark.api_call\ndef test_dottxt_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"]\n        )\n"
  },
  {
    "path": "tests/models/test_dottxt_type_adapter.py",
    "content": "import io\nimport json\nimport pytest\nimport sys\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\nfrom genson import SchemaBuilder\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Image\nfrom outlines.models.dottxt import DottxtTypeAdapter\nfrom outlines.types import cfg, json_schema, regex\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n    }\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef adapter():\n    return DottxtTypeAdapter()\n\n\ndef test_dottxt_type_adapter_input_text(adapter):\n    message = \"prompt\"\n    result = adapter.format_input(message)\n    assert result == message\n\n\ndef test_dottxt_type_adapter_input_invalid(adapter, image):\n    prompt = [\"prompt\", image]\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = adapter.format_input(prompt)\n\n\ndef test_dottxt_type_adapter_output_invalid(adapter):\n    with pytest.raises(TypeError, match=\"You must provide an output type\"):\n        adapter.format_output_type(None)\n\n    with pytest.raises(TypeError, match=\"The type `str` is not supported\"):\n        adapter.format_output_type(str)\n\n    with pytest.raises(TypeError, match=\"The type `int` is not supported\"):\n        adapter.format_output_type(int)\n\n    with pytest.raises(TypeError, match=\"Regex-based structured outputs will soon be\"):\n        adapter.format_output_type(regex(\"[0-9]\"))\n\n    with pytest.raises(TypeError, match=\"CFG-based structured outputs will soon be\"):\n        adapter.format_output_type(cfg(\"\"))\n\n\ndef test_dottxt_type_adapter_output_dataclass(adapter, schema):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == json.dumps(schema)\n\n\ndef test_dottxt_type_adapter_output_typed_dict(adapter, schema):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == json.dumps(schema)\n\n\ndef test_dottxt_type_adapter_output_pydantic(adapter, schema):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == json.dumps(schema)\n\n\ndef test_dottxt_type_adapter_output_genson_schema_builder(adapter, schema):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n\n    result = adapter.format_output_type(builder)\n    result_dict = json.loads(result)\n    assert isinstance(result_dict, dict)\n    expected_schema = {\n        \"$schema\": \"http://json-schema.org/schema#\",\n        \"type\": \"object\",\n        \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n        \"required\": [\"hi\"],\n    }\n    assert result_dict == expected_schema\n\n\ndef test_dottxt_type_adapter_json_schema_str(adapter, schema):\n    schema_str = json.dumps(schema)\n    result = adapter.format_output_type(json_schema(schema_str))\n    assert result == json.dumps(schema)\n\n\ndef test_dottxt_type_adapter_json_schema_dict(adapter, schema):\n    result = adapter.format_output_type(json_schema(schema))\n    assert result == json.dumps(schema)\n"
  },
  {
    "path": "tests/models/test_gemini.py",
    "content": "import io\nimport json\nimport sys\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Generator, Literal\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom google.genai import Client\nfrom pydantic import BaseModel, ValidationError\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models.gemini import Gemini\nfrom outlines.types import Choice\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\nMODEL_NAME = \"gemini-1.5-flash-latest\"\n\n\n@pytest.fixture(scope=\"session\")\ndef model():\n    return Gemini(Client(), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef model_no_model_name():\n    return Gemini(Client())\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.mark.api_call\ndef test_gemini_init_from_client():\n    client = Client()\n\n    # Without model name\n    model = outlines.from_gemini(client)\n    assert isinstance(model, Gemini)\n    assert model.client == client\n    assert model.model_name is None\n\n    # With model name\n    model = outlines.from_gemini(client, MODEL_NAME)\n    assert isinstance(model, Gemini)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n\n@pytest.mark.api_call\ndef test_gemini_wrong_inference_parameters(model):\n    with pytest.raises(ValidationError):\n        model.generate(\"prompt\", foo=10)\n\n\n@pytest.mark.api_call\ndef test_gemini_wrong_input_type(model, image):\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        model.generate([\"foo?\", Image(image), Video(\"\")])\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_call(model):\n    result = model.generate(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_gemini_direct_call(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model=MODEL_NAME\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_vision(model, image):\n    result = model.generate([\"What does this logo represent?\", Image(image)])\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_gemini_chat(model, image):\n    result = model.generate(Chat(messages=[\n        {\"role\": \"assistant\", \"content\": \"How can I help you today?\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]))\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_pydantic(model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_vision_pydantic(model, image):\n    class Logo(BaseModel):\n        name: int\n\n    result = model.generate([\"What does this logo represent?\", Image(image)], Logo)\n    assert isinstance(result, str)\n    assert \"name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_gemini_nested_pydantic(model):\n    class Bar(BaseModel):\n        fu: str\n\n    class Foo(BaseModel):\n        sna: int\n        bar: Bar\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"sna\" in json.loads(result)\n    assert \"bar\" in json.loads(result)\n    assert \"fu\" in json.loads(result)[\"bar\"]\n\n\n@pytest.mark.xfail(\n    reason=\"The Gemini SDK's serialization method does not support Json Schema strings.\"\n)\n@pytest.mark.api_call\ndef test_gemini_simple_json_schema_string(model):\n    schema = \"{'properties': {'bar': {'title': 'Bar', 'type': 'integer'}}, 'required': ['bar'], 'title': 'Foo', 'type': 'object'}\"\n    result = model.generate(\"foo?\", schema)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.xfail(\n    reason=\"The Gemini SDK's serialization method does not support Json Schema dictionaries.\"\n)\n@pytest.mark.api_call\ndef test_gemini_simple_json_schema_dict(model):\n    schema = {\n        \"properties\": {\"bar\": {\"type\": \"integer\"}},\n        \"required\": [\"bar\"],\n        \"type\": \"object\",\n    }\n    result = model.generate(\"foo?\", schema)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_typed_dict(model):\n    class Foo(TypedDict):\n        bar: int\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_dataclass(model):\n    @dataclass\n    class Foo:\n        bar: int\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_choice_enum(model):\n    class Foo(Enum):\n        bar = \"Bar\"\n        foor = \"Foo\"\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert result == \"Foo\" or result == \"Bar\"\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_choice_choice(model):\n    result = model.generate(\"foo?\", Choice([\"Foo\", \"Bar\"]))\n    assert isinstance(result, str)\n    assert result == \"Foo\" or result == \"Bar\"\n\n\n@pytest.mark.api_call\ndef test_gemini_sample_choice_literal(model):\n    result = model.generate(\"foo?\", Literal[\"Foo\", \"Bar\"])\n    assert isinstance(result, str)\n    assert result == \"Foo\" or result == \"Bar\"\n\n\n@pytest.mark.xfail(\n    reason=\"Gemini supports lists for choices but we do not as it is semantically incorrect.\"\n)\n@pytest.mark.api_call\ndef test_gemini_simple_choice_list(model):\n    choices = [\"Foo\", \"Bar\"]\n    result = model.generate(\"foo?\", choices)\n    assert isinstance(result, str)\n    assert result == \"Foo\" or result == \"Bar\"\n\n\n@pytest.mark.api_call\ndef test_gemini_simple_list_pydantic(model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = model.generate(\"foo?\", list[Foo])\n    assert isinstance(json.loads(result), list)\n    assert isinstance(json.loads(result)[0], dict)\n    assert \"bar\" in json.loads(result)[0]\n\n\n@pytest.mark.api_call\ndef test_gemini_streaming(model):\n    result = model.stream(\"Respond with one word. Not more.\")\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\n@pytest.mark.api_call\ndef test_gemini_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n"
  },
  {
    "path": "tests/models/test_gemini_type_adapter.py",
    "content": "import io\nimport pytest\nimport sys\nfrom dataclasses import dataclass\nfrom enum import Enum, EnumMeta\nfrom typing import Literal, get_args\n\nfrom PIL import Image as PILImage\nfrom genson import SchemaBuilder\nfrom pydantic import BaseModel\n\nfrom outlines import cfg, json_schema, regex\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.gemini import GeminiTypeAdapter\nfrom outlines.types.utils import is_dataclass\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n    }\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef adapter():\n    return GeminiTypeAdapter()\n\n\ndef test_gemini_type_adapter_input_text(adapter):\n    message = \"prompt\"\n    result = adapter.format_input(message)\n    assert result == {\"contents\": [{\"text\": message}]}\n\n\ndef test_gemini_type_adapter_input_vision(adapter, image):\n    image_input = Image(image)\n    text_input = \"hello\"\n    result = adapter.format_input([text_input, image_input])\n    assert result == {\n        \"contents\": [\n            {\n                \"role\": \"user\",\n                \"parts\": [\n                    {\"text\": text_input},\n                    {\n                        \"inline_data\": {\n                            \"mime_type\": \"image/png\",\n                            \"data\": image_input.image_str,\n                        },\n                    },\n                ],\n            },\n        ]\n    }\n\n\ndef test_gemini_type_adapter_input_chat(adapter, image):\n    image_input = Image(image)\n    input_message = Chat(messages=[\n        {\"role\": \"assistant\", \"content\": \"How can I help you today?\"},\n        {\"role\": \"user\", \"content\": [\n            \"What does this logo represent?\",\n            image_input,\n        ]},\n    ])\n    result = adapter.format_input(input_message)\n    assert result == {\n        \"contents\": [\n            {\"role\": \"model\", \"parts\": [{\"text\": \"How can I help you today?\"}]},\n            {\n                \"role\": \"user\",\n                \"parts\": [\n                    {\"text\": \"What does this logo represent?\"},\n                    {\n                        \"inline_data\": {\n                            \"mime_type\": \"image/png\",\n                            \"data\": image_input.image_str,\n                        },\n                    },\n                ],\n            },\n        ]\n    }\n\n\ndef test_gemini_type_adapter_input_invalid(adapter):\n    @dataclass\n    class Audio:\n        file: str\n\n    prompt = Audio(\n        \"file\",\n    )\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = adapter.format_input(prompt)\n\n\ndef test_gemini_type_adapter_output_invalid(adapter):\n    with pytest.raises(TypeError, match=\"The type `str` is not supported\"):\n        adapter.format_output_type(str)\n\n    with pytest.raises(TypeError, match=\"The type `int` is not supported\"):\n        adapter.format_output_type(int)\n\n    with pytest.raises(TypeError, match=\"Neither regex-based\"):\n        adapter.format_output_type(regex(\"[0-9]\"))\n\n    with pytest.raises(TypeError, match=\"CFG-based structured outputs\"):\n        adapter.format_output_type(cfg(\"\"))\n\n\ndef test_gemini_type_adapter_output_none(adapter):\n    result = adapter.format_output_type(None)\n    assert result == {}\n\n\ndef test_gemini_type_adapter_output_json_schema(adapter, schema):\n    result = adapter.format_output_type(json_schema(schema))\n    assert isinstance(result, dict)\n    assert result[\"response_mime_type\"] == \"application/json\"\n    assert is_dataclass(result[\"response_schema\"])\n\n\ndef test_gemini_type_adapter_output_list_json_schema(adapter, schema):\n    result = adapter.format_output_type(list[json_schema(schema)])\n    assert isinstance(result, dict)\n    assert result[\"response_mime_type\"] == \"application/json\"\n    args = get_args(result[\"response_schema\"])\n    assert len(args) == 1\n    assert is_dataclass(args[0])\n\n\ndef test_gemini_type_adapter_output_dataclass(adapter):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": User,\n    }\n\n\ndef test_gemini_type_adapter_output_list_dataclass(adapter):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(list[User])\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": list[User],\n    }\n\n\ndef test_gemini_type_adapter_output_typed_dict(adapter):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": User,\n    }\n\n\ndef test_gemini_type_adapter_output_list_typed_dict(adapter):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(list[User])\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": list[User],\n    }\n\n\ndef test_gemini_type_adapter_output_pydantic(adapter):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": User,\n    }\n\n\ndef test_gemini_type_adapter_output_list_pydantic(adapter):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(list[User])\n    assert result == {\n        \"response_mime_type\": \"application/json\",\n        \"response_schema\": list[User],\n    }\n\n\ndef test_gemini_type_adapter_output_genson_schema_builder(adapter):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {\"foo\": {\"type\": \"string\"}, \"bar\": {\"type\": \"integer\"}}, \"required\": [\"foo\"]})\n    result = adapter.format_output_type(builder)\n    assert isinstance(result, dict)\n    assert result[\"response_mime_type\"] == \"application/json\"\n    assert is_dataclass(result[\"response_schema\"])\n\n\ndef test_gemini_type_adapter_output_list_genson_schema_builder(adapter):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {\"foo\": {\"type\": \"string\"}, \"bar\": {\"type\": \"integer\"}}, \"required\": [\"foo\"]})\n    result = adapter.format_output_type(list[builder])\n    assert isinstance(result, dict)\n    assert result[\"response_mime_type\"] == \"application/json\"\n    args = get_args(result[\"response_schema\"])\n    assert len(args) == 1\n    assert is_dataclass(args[0])\n\n\ndef test_gemini_type_adapter_output_enum(adapter):\n    class Foo(Enum):\n        Bar = \"bar\"\n        Fuzz = \"fuzz\"\n\n    result = adapter.format_output_type(Foo)\n    assert result == {\n        \"response_mime_type\": \"text/x.enum\",\n        \"response_schema\": Foo,\n    }\n\n\ndef test_gemini_type_adapter_output_literal(adapter):\n    Foo = Literal[\"bar\", \"fuzz\"]\n    result = adapter.format_output_type(Foo)\n\n    assert isinstance(result, dict)\n    assert len(result) == 2\n    assert result[\"response_mime_type\"] == \"text/x.enum\"\n    assert isinstance(result[\"response_schema\"], EnumMeta)\n    assert len(result[\"response_schema\"].__members__) == 2\n    assert result[\"response_schema\"].bar.value == \"bar\"\n    assert result[\"response_schema\"].fuzz.value == \"fuzz\"\n"
  },
  {
    "path": "tests/models/test_llamacpp.py",
    "content": "import json\nfrom enum import Enum\n\nimport pytest\nfrom llama_cpp import Llama\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Chat\nfrom outlines.models.llamacpp import (\n    LlamaCpp,\n    LlamaCppTokenizer,\n    LlamaCppTypeAdapter,\n    from_llamacpp\n)\nfrom outlines.types.dsl import Regex, CFG\n\n\ndef test_load_model():\n    model = from_llamacpp(\n        Llama.from_pretrained(\n            repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n            filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n            chat_format=\"qwen\"\n        )\n    )\n\n    assert isinstance(model, LlamaCpp)\n    assert isinstance(model.model, Llama)\n    assert isinstance(model.tokenizer, LlamaCppTokenizer)\n    assert isinstance(model.type_adapter, LlamaCppTypeAdapter)\n    assert model.tensor_library_name == \"numpy\"\n\n\n@pytest.fixture(scope=\"session\")\ndef model(tmp_path_factory):\n    return LlamaCpp(\n        Llama.from_pretrained(\n            repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n            filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n            chat_format=\"qwen\",\n        )\n    )\n\n@pytest.fixture(scope=\"session\")\ndef model_no_chat(tmp_path_factory):\n    return LlamaCpp(\n        Llama.from_pretrained(\n            repo_id=\"tensorblock/Llama3-1B-Base-GGUF\",\n            filename=\"Llama3-1B-Base-Q2_K.gguf\",\n        ),\n        chat_mode=False\n    )\n\n@pytest.fixture\ndef lark_grammar():\n    return \"\"\"\n?start: sum\n\n?sum: product\n| sum \"+\" product   -> add\n| sum \"-\" product   -> sub\n\n?product: atom\n| product \"*\" atom  -> mul\n| product \"/\" atom  -> div\n\n?atom: NUMBER           -> number\n| \"-\" atom         -> neg\n| \"(\" sum \")\"\n\n%import common.NUMBER\n%import common.WS_INLINE\n\n%ignore WS_INLINE\n\"\"\"\n\n@pytest.fixture\ndef ebnf_grammar():\n    return \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\n\n\ndef test_llamacpp_simple(model):\n    result = model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\ndef test_llamacpp_chat(model):\n    result = model.generate(\n        Chat(\n            messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": \"Respond with one word. Not more.\"}\n            ]\n        ),\n        max_tokens=10\n    )\n    assert isinstance(result, str)\n\n\ndef test_llamacpp_regex(model):\n    result = model(\"Respond with one word. Not more.\", Regex(r\"[0-9]\"))\n    assert isinstance(result, str)\n    assert int(result)\n    assert len(result) == 1\n\n\ndef test_llamacpp_json(model):\n    class Foo(BaseModel):\n        bar: str\n\n    result = model(\"foo? Respond with one word.\", Foo, max_tokens=100)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\ndef test_llamacpp_choice(model):\n    class Foo(Enum):\n        bar = \"Bar\"\n        foor = \"Foo\"\n\n    result = model(\"foo?\", Foo)\n    assert result == \"Foo\" or result == \"Bar\"\n\n\ndef test_llamacpp_cfg(model, ebnf_grammar):\n    response = model(\"Respond with one word. Not more.\", CFG(ebnf_grammar))\n    assert response in [\"yes\", \"no\"]\n\n\ndef test_llamacpp_cfg_outlines_core(model, lark_grammar):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Outlines Core does not support context-free grammar.\"\n    ):\n        model(\n            \"Respond with one word. Not more.\",\n            CFG(lark_grammar),\n            backend=\"outlines_core\"\n        )\n\n\ndef test_llamacpp_text_stop(model):\n    result = model.generate(\"Write the letter a.\", None, stop=\"a\", max_tokens=100)\n    assert \"a\" not in result\n\n\ndef test_llamacpp_stream_simple(model):\n    generator = model.stream(\"Respond with one word. Not more.\", None)\n\n    for x in generator:\n        assert isinstance(x, str)\n\n\ndef test_llamacpp_stream_chat(model):\n    generator = model.stream(\n        Chat(\n            messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": \"Respond with one word. Not more.\"}\n            ]\n        ),\n        max_tokens=10\n    )\n    for x in generator:\n        assert isinstance(x, str)\n\n\ndef test_llamacpp_stream_regex(model):\n    generator = model.stream(\"Respond with one word. Not more.\", Regex(r\"[0-9]\"))\n\n    x = next(generator)\n    assert isinstance(x, str)\n\n\ndef test_llamacpp_stream_json(model):\n    class Foo(BaseModel):\n        bar: int\n\n    generator = model.stream(\"foo?\", Foo)\n\n    # NOTE: The first few chunks may be empty (role info, control tokens, finish chunks)\n    # Relevant issue: https://github.com/abetlen/llama-cpp-python/issues/372\n    first_non_empty_token = next(x for x in generator if x)\n    assert first_non_empty_token == \"{\"\n\n\ndef test_llamacpp_stream_cfg(model, ebnf_grammar):\n    response = \"\"\n    for chunk in model.stream(\n        \"Respond with one word. Not more.\", CFG(ebnf_grammar)\n    ):\n        response += chunk\n    assert response in [\"yes\", \"no\"]\n\n\ndef test_llamacpp_stream_cfg_outlines_core(model, lark_grammar):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Outlines Core does not support context-free grammar.\"\n    ):\n        for chunk in model.stream(\n            \"Respond with one word. Not more.\",\n            CFG(lark_grammar),\n            backend=\"outlines_core\"\n        ):\n            pass\n\n\ndef test_llamacpp_stream_choice(model):\n    class Foo(Enum):\n        bar = \"Bar\"\n        foor = \"Foo\"\n\n    generator = model.stream(\"foo?\", Foo)\n\n    first_non_empty_token = next(x for x in generator if x)\n    assert first_non_empty_token[0] in (\"B\", \"F\")\n\n\ndef test_llamacpp_stream_text_stop(model):\n    generator = model.stream(\"Write the letter a.\", None, stop=\"a\", max_tokens=100)\n\n    result = next(generator)\n    assert isinstance(result, str)\n    assert result != \"a\"\n\n\ndef test_llamacpp_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\ndef test_llamacpp_no_chat(model_no_chat):\n    result = model_no_chat.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n    generator = model_no_chat.stream(\"Respond with one word. Not more.\", None)\n    for x in generator:\n        assert isinstance(x, str)\n"
  },
  {
    "path": "tests/models/test_llamacpp_tokenizer.py",
    "content": "import ctypes\n\nimport pytest\nimport sys\nfrom unittest.mock import MagicMock, patch\n\nimport llama_cpp\nimport transformers\n\nfrom outlines.models.llamacpp import LlamaCppTokenizer\n\n\n@pytest.fixture\ndef model():\n    model = llama_cpp.Llama.from_pretrained(\n        repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n        filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n        chat_format=\"qwen\",\n    )\n    setattr(\n        model.tokenizer_,\n        \"hf_tokenizer\",\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n    return model\n\n\n@pytest.fixture\ndef model_no_hf_tokenizer():\n    model = llama_cpp.Llama.from_pretrained(\n        repo_id=\"M4-ai/TinyMistral-248M-v2-Instruct-GGUF\",\n        filename=\"TinyMistral-248M-v2-Instruct.Q4_K_M.gguf\",\n        chat_format=\"qwen\",\n    )\n    del model.tokenizer_\n    return model\n\n\n@pytest.fixture\ndef different_model():\n    model = llama_cpp.Llama.from_pretrained(\n        \"TheBloke/phi-2-GGUF\",\n        \"phi-2.Q4_K_M.gguf\",\n    )\n    return model\n\n\n@pytest.fixture\ndef tokenizer(model):\n    return LlamaCppTokenizer(model)\n\n\n@pytest.fixture\ndef another_tokenizer(model):\n    return LlamaCppTokenizer(model)\n\n\n@pytest.fixture\ndef tokenizer_no_hf_tokenizer(model_no_hf_tokenizer):\n    return LlamaCppTokenizer(model_no_hf_tokenizer)\n\n\n@pytest.fixture\ndef different_tokenizer(different_model):\n    return LlamaCppTokenizer(different_model)\n\n\ndef test_llama_cpp_tokenizer_init(tokenizer, tokenizer_no_hf_tokenizer):\n    # regular case\n    assert tokenizer.eos_token_id is not None\n    assert tokenizer.pad_token_id is not None\n    assert isinstance(tokenizer.vocabulary, dict)\n\n    # tokenizer with no hf_tokenizer\n    assert tokenizer_no_hf_tokenizer.eos_token_id is not None\n    assert tokenizer_no_hf_tokenizer.pad_token_id is not None\n    assert isinstance(tokenizer_no_hf_tokenizer.vocabulary, dict)\n\n\ndef test_llama_cpp_tokenizer_encode(tokenizer):\n    # batch case\n    with pytest.raises(NotImplementedError):\n        token_ids, attention_mask = tokenizer.encode([\"foo\", \"bar\"])\n\n    # regular case\n    token_ids, attention_mask = tokenizer.encode(\"Hello, world!\")\n    assert token_ids is not None\n    assert attention_mask is not None\n    assert len(token_ids) == len(attention_mask)\n\n\ndef test_llama_cpp_tokenizer_decode(tokenizer):\n    token_ids, _ = tokenizer.encode(\"Hello, world!\")\n    decoded_text = tokenizer.decode(token_ids)\n    assert isinstance(decoded_text, list)\n    assert \"\".join(decoded_text).strip() == \"Hello, world!\"\n\n\ndef test_llama_cpp_tokenizer_convert_token_to_string(\n    tokenizer,\n    tokenizer_no_hf_tokenizer\n):\n    # with self._hf_tokenizer\n    token_str = tokenizer.convert_token_to_string(\"<0x20>\")\n    assert isinstance(token_str, str)\n\n    # without self._hf_tokenizer\n    token_str = tokenizer_no_hf_tokenizer.convert_token_to_string(\"<0x20>\")\n    assert isinstance(token_str, str)\n\n\ndef test_llama_cpp_tokenizer_eq(tokenizer, another_tokenizer, different_tokenizer):\n    assert not tokenizer == 1\n    assert tokenizer == another_tokenizer\n    assert tokenizer != different_tokenizer\n\n\ndef test_llama_cpp_tokenizer_hash(tokenizer, another_tokenizer, different_tokenizer):\n    assert isinstance(hash(tokenizer), int)\n    assert hash(tokenizer) == hash(another_tokenizer)\n    assert hash(tokenizer) != hash(different_tokenizer)\n\n\ndef test_llama_cpp_tokenizer_getstate(tokenizer):\n    state = tokenizer.__getstate__()\n    assert isinstance(state, tuple)\n    assert len(state) == 5\n    assert isinstance(state[0], dict)\n    assert isinstance(state[1], int)\n    assert isinstance(state[2], str)\n    assert isinstance(state[3], int)\n    assert isinstance(state[4], list)\n\n\ndef test_llama_cpp_tokenizer_setstate(tokenizer):\n    with pytest.raises(NotImplementedError):\n        tokenizer.__setstate__(None)\n\n\ndef _make_mock_model(n_vocab, eos_id, pieces):\n    \"\"\"Build a mock Llama model whose vocab is defined by *pieces*.\n\n    Parameters\n    ----------\n    n_vocab : int\n        Number of tokens in the vocabulary.\n    eos_id : int\n        The EOS token id.\n    pieces : dict[int, bytes]\n        Mapping from token id to the raw bytes of the token piece.\n    \"\"\"\n    model = MagicMock()\n    # Remove tokenizer_ so the code falls into the C-API branch\n    del model.tokenizer_\n    model.token_eos.return_value = eos_id\n    model.n_vocab.return_value = n_vocab\n    model.model = MagicMock()\n    return model\n\n\ndef test_vocab_truncation_retry_path():\n    \"\"\"Tokens whose piece length exceeds the 32-byte buffer must trigger the\n    retry path with a larger buffer so their text is not collapsed.\"\"\"\n    long_piece = b\"x\" * 40  # 40 > 32 → triggers the retry branch\n    short_piece = b\"hi\"\n    eos_piece = b\"</s>\"\n\n    pieces = {0: short_piece, 1: long_piece, 2: eos_piece}\n    model = _make_mock_model(n_vocab=3, eos_id=2, pieces=pieces)\n\n    def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args):\n        data = pieces[token_id]\n        n = len(data)\n        # Only write into the buffer when it is large enough\n        if buf_size >= n:\n            ctypes.memmove(buf, data, n)\n        return n\n\n    with patch(\n        \"outlines.models.llamacpp.llama_model_get_vocab\",\n        return_value=MagicMock(),\n        create=True,\n    ), patch(\n        \"outlines.models.llamacpp.llama_token_to_piece\",\n        side_effect=fake_llama_token_to_piece,\n        create=True,\n    ):\n        # Patch the imports inside the __init__ else-branch\n        with patch.dict(\n            \"sys.modules\",\n            {\n                \"llama_cpp\": MagicMock(\n                    llama_model_get_vocab=MagicMock(return_value=MagicMock()),\n                    llama_token_to_piece=fake_llama_token_to_piece,\n                ),\n            },\n        ):\n            tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer)\n            # Re-import inside the else-branch uses llama_cpp module\n            tok.__init__(model)\n\n    assert tok.vocabulary[long_piece.decode()] == 1\n    assert tok.vocabulary[short_piece.decode()] == 0\n    assert tok.eos_token == eos_piece.decode()\n\n\ndef test_attention_mask_all_ones_even_with_eos():\n    \"\"\"The attention mask must be all-ones for every token, including EOS.\"\"\"\n    eos_piece = b\"</s>\"\n    pieces = {0: b\"hello\", 1: eos_piece}\n    model = _make_mock_model(n_vocab=2, eos_id=1, pieces=pieces)\n\n    def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args):\n        data = pieces[token_id]\n        n = len(data)\n        if buf_size >= n:\n            ctypes.memmove(buf, data, n)\n        return n\n\n    with patch.dict(\n        \"sys.modules\",\n        {\n            \"llama_cpp\": MagicMock(\n                llama_model_get_vocab=MagicMock(return_value=MagicMock()),\n                llama_token_to_piece=fake_llama_token_to_piece,\n            ),\n        },\n    ):\n        tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer)\n        tok.__init__(model)\n\n    # Simulate encoding that returns token ids including the EOS token\n    fake_tokenizer = MagicMock()\n    fake_tokenizer.tokenize.return_value = [0, 1]  # token 1 == eos_id\n    tok.tokenizer = fake_tokenizer\n\n    token_ids, attention_mask = tok.encode(\"hello</s>\")\n\n    assert token_ids == [0, 1]\n    assert attention_mask == [1, 1]\n\n\ndef test_negative_n_skips_invalid_token():\n    \"\"\"Tokens that return n < 0 from llama_token_to_piece (error codes)\n    must be silently skipped instead of producing garbage vocabulary entries.\"\"\"\n    eos_piece = b\"</s>\"\n    pieces = {0: b\"ok\", 1: None, 2: eos_piece}  # token 1 returns error\n    model = _make_mock_model(n_vocab=3, eos_id=2, pieces=pieces)\n\n    def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args):\n        data = pieces[token_id]\n        if data is None:\n            return -1  # error return\n        n = len(data)\n        if buf_size >= n:\n            ctypes.memmove(buf, data, n)\n        return n\n\n    with patch.dict(\n        \"sys.modules\",\n        {\n            \"llama_cpp\": MagicMock(\n                llama_model_get_vocab=MagicMock(return_value=MagicMock()),\n                llama_token_to_piece=fake_llama_token_to_piece,\n            ),\n        },\n    ):\n        tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer)\n        tok.__init__(model)\n\n    # Token 1 (error) must not appear in the vocabulary\n    assert 1 not in tok.vocabulary.values()\n    assert tok.vocabulary[\"ok\"] == 0\n    assert tok.eos_token == eos_piece.decode()\n"
  },
  {
    "path": "tests/models/test_llamacpp_type_adapter.py",
    "content": "import pytest\nimport io\n\nfrom llama_cpp import LogitsProcessorList\nfrom PIL import Image as PILImage\nfrom outlines_core import Index, Vocabulary\n\nfrom outlines.backends.outlines_core import OutlinesCoreLogitsProcessor\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.llamacpp import LlamaCppTypeAdapter\n\n\n@pytest.fixture\ndef adapter():\n    return LlamaCppTypeAdapter()\n\n\n@pytest.fixture\ndef logits_processor():\n    vocabulary = Vocabulary.from_pretrained(\"openai-community/gpt2\")\n    index = Index(r\"[0-9]{3}\", vocabulary)\n    return OutlinesCoreLogitsProcessor(index, \"numpy\")\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_llamacpp_type_adapter_format_input(adapter, image):\n    # Anything else than a string/Chat\n    with pytest.raises(NotImplementedError):\n        adapter.format_input([\"Hello, world!\"])\n\n    # string\n    assert adapter.format_input(\"Hello, world!\") == \"Hello, world!\"\n\n    # Chat\n    messages = [\n        {\"role\": \"user\", \"content\": \"Hello, world!\"},\n        {\"role\": \"assistant\", \"content\": \"Hello, world!\"},\n    ]\n    assert adapter.format_input(Chat(messages=messages)) == messages\n\n    # Multi-modal (invalid)\n    with pytest.raises(\n        ValueError,\n        match=\"LlamaCpp does not support multi-modal messages.\"\n    ):\n        adapter.format_input(Chat(messages=[\n            {\"role\": \"user\", \"content\": [\"prompt\", Image(image)]},\n        ]))\n\n\ndef test_llamacpp_type_adapter_format_input_with_chat_template():\n    adapter = LlamaCppTypeAdapter(has_chat_template=True)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == [{\"role\": \"user\", \"content\": \"prompt\"}]\n\n\ndef test_llamacpp_type_adapter_format_input_without_chat_template():\n    adapter = LlamaCppTypeAdapter(has_chat_template=False)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == \"prompt\"\n\n\ndef test_llamacpp_type_adapter_format_output_type(adapter, logits_processor):\n    formatted = adapter.format_output_type(logits_processor)\n    assert isinstance(formatted, LogitsProcessorList)\n    assert formatted[0].index == logits_processor.index\n    assert formatted[0].tensor_library_name == logits_processor.tensor_library_name\n"
  },
  {
    "path": "tests/models/test_lmstudio.py",
    "content": "import io\nimport json\nimport os\nimport warnings\nfrom enum import Enum\nfrom typing import Annotated, AsyncGenerator, Generator\n\nimport lmstudio\nimport pytest\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models import AsyncLMStudio, LMStudio\nfrom outlines.models.lmstudio import LMStudioTypeAdapter\nfrom tests.test_utils.mock_lmstudio_client import (\n    MockLMStudioClient,\n    MockAsyncLMStudioClient,\n)\n\n\n# If the LMSTUDIO_SERVER_URL environment variable is set, use the real LMStudio server\n# Otherwise, use the mock server\nlmstudio_server_url = os.environ.get(\"LMSTUDIO_SERVER_URL\")\nlmstudio_model_name = os.environ.get(\n    \"LMSTUDIO_MODEL_NAME\", \"openai/gpt-oss-20b\"\n)\n\n# Image for testing (only create when server is available, as lms.prepare_image requires it)\nimage_input = None\nif lmstudio_server_url:\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n    image_input = Image(image)\n\nif lmstudio_server_url:\n    lmstudio_client = lmstudio.Client(lmstudio_server_url)\n    async_lmstudio_client = lmstudio.AsyncClient(lmstudio_server_url)\nelse:\n    warnings.warn(\"No LMStudio server URL provided, using mock server\")\n    lmstudio_client = MockLMStudioClient()\n    async_lmstudio_client = MockAsyncLMStudioClient()\n\n\nclass Foo(BaseModel):\n    foo: Annotated[str, Field(max_length=10)]\n\n\ntype_adapter = LMStudioTypeAdapter()\n\n# Mock responses for non-image tests (image tests require a running server\n# because lms.prepare_image() needs to connect to LM Studio)\nmock_responses = [\n    (\n        {\n            \"messages\": type_adapter.format_input(\"Respond with one word. Not more.\"),\n        },\n        \"foo\"\n    ),\n    (\n        {\n            \"messages\": type_adapter.format_input(\n                \"Create a character with a name in the foo field.\"\n            ),\n            \"response_format\": type_adapter.format_output_type(Foo),\n        },\n        '{\"foo\": \"bar\"}'\n    ),\n    (\n        {\n            \"messages\": type_adapter.format_input(\"Write a sentence about a cat.\"),\n        },\n        [\"The \", \"cat \", \"sat.\"]\n    ),\n    (\n        {\n            \"messages\": type_adapter.format_input(\"Create a character.\"),\n            \"response_format\": type_adapter.format_output_type(Foo),\n        },\n        ['{\"foo\":', ' \"bar\"}']\n    ),\n]\n\n\n# If the LMSTUDIO_SERVER_URL environment variable is not set, add the mock\n# responses to the mock clients\nif not lmstudio_server_url:\n    lmstudio_client.add_mock_responses(mock_responses)\n    async_lmstudio_client.add_mock_responses(mock_responses)\n\n\n# Skip condition for tests that require a running LM Studio server (image tests)\nrequires_lmstudio_server = pytest.mark.skipif(\n    not lmstudio_server_url,\n    reason=(\n        \"Image tests require a running LM Studio server (lms.prepare_image \"\n        + \"needs connection)\"\n    )\n)\n\n\n@pytest.fixture\ndef model():\n    return LMStudio(lmstudio_client, lmstudio_model_name)\n\n\n@pytest.fixture\ndef model_no_model_name():\n    return LMStudio(lmstudio_client)\n\n\n@pytest.fixture\ndef async_model():\n    if lmstudio_server_url:\n        # We need to create a new lmstudio client\n        client = lmstudio.AsyncClient(lmstudio_server_url)\n        return AsyncLMStudio(client, lmstudio_model_name)\n    return AsyncLMStudio(async_lmstudio_client, lmstudio_model_name)\n\n\n@pytest.fixture\ndef async_model_no_model_name():\n    if lmstudio_server_url:\n        # We need to create a new lmstudio client\n        client = lmstudio.AsyncClient(lmstudio_server_url)\n        return AsyncLMStudio(client)\n    return AsyncLMStudio(async_lmstudio_client)\n\n\ndef test_lmstudio_init_from_client():\n    if lmstudio_server_url:\n        client = lmstudio.Client(lmstudio_server_url)\n\n        # With model name\n        model = outlines.from_lmstudio(client, lmstudio_model_name)\n        assert isinstance(model, LMStudio)\n        assert model.client == client\n        assert model.model_name == lmstudio_model_name\n\n        # Without model name\n        model = outlines.from_lmstudio(client)\n        assert isinstance(model, LMStudio)\n        assert model.client == client\n        assert model.model_name is None\n    else:\n        # With mock client, test direct instantiation\n        client = MockLMStudioClient()\n        client.add_mock_responses(mock_responses)\n\n        model = LMStudio(client, lmstudio_model_name)\n        assert model.client == client\n        assert model.model_name == lmstudio_model_name\n\n        model = LMStudio(client)\n        assert model.client == client\n        assert model.model_name is None\n\n    # With invalid client\n    with pytest.raises(ValueError, match=\"Invalid client type\"):\n        outlines.from_lmstudio(object())\n\n\ndef test_lmstudio_simple(model):\n    result = model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\ndef test_lmstudio_direct(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        None,\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\n@requires_lmstudio_server\ndef test_lmstudio_simple_vision(model):\n    result = model.generate(\n        [\"What does this logo represent?\", image_input],\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\n@requires_lmstudio_server\ndef test_lmstudio_chat(model):\n    result = model.generate(\n        Chat(\n            [\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": [\n                    \"What does this logo represent?\",\n                    image_input\n                ]},\n            ]\n        ),\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\ndef test_lmstudio_json(model):\n    result = model(\"Create a character with a name in the foo field.\", Foo)\n    assert isinstance(result, str)\n    assert \"foo\" in json.loads(result)\n\n\ndef test_lmstudio_wrong_output_type(model):\n    class BadFoo(Enum):\n        bar = \"Bar\"\n        foo = \"Foo\"\n\n    with pytest.raises(TypeError, match=\"is not supported\"):\n        model.generate(\"foo?\", BadFoo)\n\n\ndef test_lmstudio_wrong_input_type(model):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model.generate({\"foo?\": \"bar?\"}, None)\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        model.generate([\"foo?\", image_input, Video(\"\")], None)\n\n\ndef test_lmstudio_stream(model):\n    result = model.stream(\"Write a sentence about a cat.\")\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_lmstudio_stream_json(model_no_model_name):\n    generator = model_no_model_name.stream(\"Create a character.\", Foo, model=lmstudio_model_name)\n    generated_text = []\n    for text in generator:\n        generated_text.append(text)\n    assert \"foo\" in json.loads(\"\".join(generated_text))\n\n\ndef test_lmstudio_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch([\"Respond with one word.\", \"Respond with one word.\"])\n\n\ndef test_lmstudio_async_init_from_client():\n    if lmstudio_server_url:\n        client = lmstudio.AsyncClient(lmstudio_server_url)\n\n        # With model name\n        model = outlines.from_lmstudio(client, lmstudio_model_name)\n        assert isinstance(model, AsyncLMStudio)\n        assert model.client == client\n        assert model.model_name == lmstudio_model_name\n\n        # Without model name\n        model = outlines.from_lmstudio(client)\n        assert isinstance(model, AsyncLMStudio)\n        assert model.client == client\n        assert model.model_name is None\n    else:\n        # With mock client, test direct instantiation\n        client = MockAsyncLMStudioClient()\n        client.add_mock_responses(mock_responses)\n\n        model = AsyncLMStudio(client, lmstudio_model_name)\n        assert model.client == client\n        assert model.model_name == lmstudio_model_name\n\n        model = AsyncLMStudio(client)\n        assert model.client == client\n        assert model.model_name is None\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_simple(async_model):\n    result = await async_model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_direct(async_model_no_model_name):\n    result = await async_model_no_model_name(\n        \"Respond with one word. Not more.\",\n        None,\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\n@requires_lmstudio_server\n@pytest.mark.asyncio\nasync def test_lmstudio_async_simple_vision(async_model):\n    result = await async_model.generate(\n        [\"What does this logo represent?\", image_input],\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\n@requires_lmstudio_server\n@pytest.mark.asyncio\nasync def test_lmstudio_async_chat(async_model):\n    result = await async_model.generate(\n        Chat(\n            [\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": [\n                    \"What does this logo represent?\",\n                    image_input\n                ]},\n            ]\n        ),\n        model=lmstudio_model_name,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_json(async_model):\n    result = await async_model(\"Create a character with a name in the foo field.\", Foo)\n    assert isinstance(result, str)\n    assert \"foo\" in json.loads(result)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_wrong_output_type(async_model):\n    class BadFoo(Enum):\n        bar = \"Bar\"\n        foo = \"Foo\"\n\n    with pytest.raises(TypeError, match=\"is not supported\"):\n        await async_model.generate(\"foo?\", BadFoo)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_wrong_input_type(async_model):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        await async_model.generate({\"foo?\": \"bar?\"}, None)\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        await async_model.generate([\"foo?\", image_input, Video(\"\")], None)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_stream(async_model):\n    result = async_model.stream(\"Write a sentence about a cat.\")\n    assert isinstance(result, AsyncGenerator)\n    assert isinstance(await result.__anext__(), str)\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_stream_json(async_model_no_model_name):\n    async_generator = async_model_no_model_name.stream(\"Create a character.\", Foo, model=lmstudio_model_name)\n    generated_text = []\n    async for chunk in async_generator:\n        generated_text.append(chunk)\n    assert \"foo\" in json.loads(\"\".join(generated_text))\n\n\n@pytest.mark.asyncio\nasync def test_lmstudio_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch([\"Respond with one word.\", \"Respond with one word.\"])\n"
  },
  {
    "path": "tests/models/test_lmstudio_type_adapter.py",
    "content": "import io\nimport json\nimport os\nimport sys\nfrom dataclasses import dataclass\n\nimport pytest\nfrom genson import SchemaBuilder\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.lmstudio import LMStudioTypeAdapter\nfrom outlines.types import cfg, json_schema, regex\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n# Skip condition for tests that require a running LM Studio server (image tests)\nrequires_lmstudio_server = pytest.mark.skipif(\n    not os.environ.get(\"LMSTUDIO_SERVER_URL\"),\n    reason=(\n        \"Image tests require a running LM Studio server (lms.prepare_image \"\n        + \"needs connection)\"\n    )\n)\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n    }\n\n\n@pytest.fixture\ndef adapter():\n    return LMStudioTypeAdapter()\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_lmstudio_type_adapter_input_text(adapter):\n    text_input = \"prompt\"\n    result = adapter.format_input(text_input)\n    assert isinstance(result, str)\n    assert result == text_input\n\n\n@requires_lmstudio_server\ndef test_lmstudio_type_adapter_input_vision(adapter, image):\n    import lmstudio as lms\n\n    image_input = Image(image)\n    text_input = \"prompt\"\n    result = adapter.format_input([text_input, image_input])\n    assert isinstance(result, lms.Chat)\n\n\ndef test_lmstudio_type_adapter_input_chat(adapter):\n    chat_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi there!\"},\n        {\"role\": \"user\", \"content\": \"How are you?\"},\n    ])\n    result = adapter.format_input(chat_input)\n\n    # Should return an lmstudio.Chat object\n    import lmstudio as lms\n    assert isinstance(result, lms.Chat)\n\n\ndef test_lmstudio_type_adapter_input_chat_no_system(adapter):\n    chat_input = Chat(messages=[\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi!\"},\n    ])\n    result = adapter.format_input(chat_input)\n\n    import lmstudio as lms\n    assert isinstance(result, lms.Chat)\n\n\n@requires_lmstudio_server\ndef test_lmstudio_type_adapter_input_chat_with_image(adapter, image):\n    import lmstudio as lms\n\n    image_input = Image(image)\n    chat_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\"role\": \"user\", \"content\": [\n            \"What is in this image?\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = adapter.format_input(chat_input)\n    assert isinstance(result, lms.Chat)\n\n\ndef test_lmstudio_type_adapter_input_invalid(adapter):\n    prompt = {\"foo\": \"bar\"}\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = adapter.format_input(prompt)\n\n\ndef test_lmstudio_type_adapter_input_chat_invalid_content(adapter):\n    chat_input = Chat(messages=[\n        {\"role\": \"user\", \"content\": {\"foo\": \"bar\"}},\n    ])\n    with pytest.raises(ValueError, match=\"Invalid content type\"):\n        _ = adapter.format_input(chat_input)\n\n\ndef test_lmstudio_type_adapter_input_chat_invalid_role(adapter):\n    chat_input = Chat(messages=[\n        {\"role\": \"unknown\", \"content\": \"hello\"},\n    ])\n    with pytest.raises(ValueError, match=\"Unsupported role\"):\n        _ = adapter.format_input(chat_input)\n\n\ndef test_lmstudio_type_adapter_output_none(adapter):\n    result = adapter.format_output_type(None)\n    assert result is None\n\n\ndef test_lmstudio_type_adapter_output_invalid(adapter):\n    with pytest.raises(TypeError, match=\"The type `str` is not supported\"):\n        adapter.format_output_type(str)\n\n    with pytest.raises(TypeError, match=\"The type `int` is not supported\"):\n        adapter.format_output_type(int)\n\n    with pytest.raises(TypeError, match=\"Regex-based structured outputs are not\"):\n        adapter.format_output_type(regex(\"[0-9]\"))\n\n    with pytest.raises(TypeError, match=\"CFG-based structured outputs are not\"):\n        adapter.format_output_type(cfg(\"\"))\n\n\ndef test_lmstudio_type_adapter_output_dataclass(adapter, schema):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_lmstudio_type_adapter_output_typed_dict(adapter, schema):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_lmstudio_type_adapter_output_pydantic(adapter, schema):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_lmstudio_type_adapter_output_genson_schema_builder(adapter):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n\n    result = adapter.format_output_type(builder)\n    assert result == {\n        \"$schema\": \"http://json-schema.org/schema#\",\n        \"type\": \"object\",\n        \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n        \"required\": [\"hi\"]\n    }\n\n\ndef test_lmstudio_type_adapter_json_schema_str(adapter, schema):\n    schema_str = json.dumps(schema)\n    result = adapter.format_output_type(json_schema(schema_str))\n    assert result == schema\n\n\ndef test_lmstudio_type_adapter_json_schema_dict(adapter, schema):\n    result = adapter.format_output_type(json_schema(schema))\n    assert result == schema\n"
  },
  {
    "path": "tests/models/test_mistral.py",
    "content": "import io\nimport json\nimport os\nfrom typing import Annotated, Generator, AsyncGenerator\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom mistralai import Mistral as MistralClient\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models.mistral import AsyncMistral, Mistral\nfrom outlines.types import JsonSchema, Regex\n\n\nMODEL_NAME = \"mistral-large-latest\"\nVISION_MODEL = \"pixtral-large-latest\"\n\n\n@pytest.fixture(scope=\"session\")\ndef api_key():\n    \"\"\"Get the Mistral API key from the environment, providing a default value if not found.\n\n    This fixture should be used for tests that do not make actual api calls,\n    but still require to initialize the Mistral client.\n\n    \"\"\"\n    api_key = os.getenv(\"MISTRAL_API_KEY\")\n    if not api_key:\n        return \"MOCK_VALUE\"\n    return api_key\n\n\n@pytest.fixture(scope=\"session\")\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture(scope=\"session\")\ndef model(api_key):\n    return Mistral(MistralClient(api_key=api_key), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef vision_model(api_key):\n    return Mistral(MistralClient(api_key=api_key), VISION_MODEL)\n\n\n@pytest.fixture(scope=\"session\")\ndef async_model(api_key):\n    return AsyncMistral(MistralClient(api_key=api_key), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef async_vision_model(api_key):\n    return AsyncMistral(MistralClient(api_key=api_key), VISION_MODEL)\n\n\n@pytest.fixture(scope=\"session\")\ndef model_no_model_name(api_key):\n    return Mistral(MistralClient(api_key=api_key))\n\n\n@pytest.fixture(scope=\"session\")\ndef async_model_no_model_name(api_key):\n    return AsyncMistral(MistralClient(api_key=api_key))\n\n\ndef test_mistral_init_from_client(api_key):\n    client = MistralClient(api_key=api_key)\n\n    # With model name\n    model = outlines.from_mistral(client, MODEL_NAME)\n    assert isinstance(model, Mistral)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n    # Without model name\n    model = outlines.from_mistral(client)\n    assert isinstance(model, Mistral)\n    assert model.client == client\n    assert model.model_name is None\n\n\ndef test_mistral_wrong_inference_parameters(model):\n    with pytest.raises(RuntimeError, match=\"got an unexpected\"):\n        model(\"prompt\", foo=10)\n\n\ndef test_mistral_wrong_input_type(model):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model(123)\n\n\ndef test_mistral_wrong_output_type(model):\n    with pytest.raises(\n        TypeError,\n        match=\"Regex-based structured outputs are not available with Mistral.\",\n    ):\n        model(\"prompt\", Regex(\"^.*$\"))\n\n\n@pytest.mark.api_call\ndef test_mistral_call(model):\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_mistral_call_model_name(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model=MODEL_NAME\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_mistral_multiple_samples(model):\n    result = model(\"Respond with one word. Not more.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.api_call\ndef test_mistral_vision(image, vision_model):\n    result = vision_model([\"What does this logo represent?\", Image(image)])\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_mistral_chat(image, vision_model):\n    result = vision_model(Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]), max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_mistral_pydantic(model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = model(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_mistral_pydantic_refusal(model):\n    class Foo(BaseModel):\n        bar: Annotated[str, Field(int, pattern=r\"^\\d+$\")]\n\n    with pytest.raises(TypeError, match=\"Mistral does not support your schema\"):\n        _ = model(\"foo?\", Foo)\n\n\n@pytest.mark.api_call\ndef test_mistral_vision_pydantic(vision_model, image):\n    class Logo(BaseModel):\n        name: int\n\n    result = vision_model([\"What does this logo represent?\", Image(image)], Logo)\n    assert isinstance(result, str)\n    assert \"name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_mistral_json_schema(model):\n    class Foo(BaseModel):\n        bar: int\n\n    schema = json.dumps(Foo.model_json_schema())\n\n    result = model(\"foo?\", JsonSchema(schema))\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_mistral_streaming(model):\n    result = model.stream(\"Respond with one word. Not more.\")\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_mistral_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_mistral_async_init_from_client(api_key):\n    client = MistralClient(api_key=api_key)\n\n    # Async with model name\n    model = outlines.from_mistral(client, MODEL_NAME, async_client=True)\n    assert isinstance(model, AsyncMistral)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n    # Async without model name\n    model = outlines.from_mistral(client, async_client=True)\n    assert isinstance(model, AsyncMistral)\n    assert model.client == client\n    assert model.model_name is None\n\n\n@pytest.mark.asyncio\nasync def test_mistral_async_wrong_inference_parameters(async_model):\n    with pytest.raises(RuntimeError, match=\"got an unexpected\"):\n        await async_model(\"prompt\", foo=10)\n\n\n@pytest.mark.asyncio\nasync def test_mistral_async_wrong_input_type(async_model):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        await async_model(123)\n\n\n@pytest.mark.asyncio\nasync def test_mistral_async_wrong_output_type(async_model):\n    with pytest.raises(\n        TypeError,\n        match=\"Regex-based structured outputs are not available with Mistral.\",\n    ):\n        await async_model(\"prompt\", Regex(\"^.*$\"))\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_call(async_model):\n    result = await async_model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_call_model_name(async_model_no_model_name):\n    result = await async_model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_multiple_samples(async_model):\n    result = await async_model(\"Respond with one word. Not more.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_vision(async_vision_model, image):\n    result = await async_vision_model([\"What does this logo represent?\", Image(image)])\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_chat(async_vision_model, image):\n    result = await async_vision_model(Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]), max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_pydantic(async_model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = await async_model(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_pydantic_refusal(async_model):\n    class Foo(BaseModel):\n        bar: Annotated[str, Field(int, pattern=r\"^\\d+$\")]\n\n    with pytest.raises(TypeError, match=\"Mistral does not support your schema\"):\n        _ = await async_model(\"foo?\", Foo)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_vision_pydantic(async_vision_model, image):\n    class Logo(BaseModel):\n        name: int\n\n    result = await async_vision_model([\"What does this logo represent?\", Image(image)], Logo)\n    assert isinstance(result, str)\n    assert \"name\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_json_schema(async_model):\n    class Foo(BaseModel):\n        bar: int\n\n    schema = json.dumps(Foo.model_json_schema())\n\n    result = await async_model(\"foo?\", JsonSchema(schema))\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_mistral_async_streaming(async_model):\n    result = async_model.stream(\"Respond with one word. Not more.\")\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n@pytest.mark.asyncio\nasync def test_mistral_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        _ = await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n"
  },
  {
    "path": "tests/models/test_mistral_type_adapter.py",
    "content": "import io\nimport json\nimport sys\nfrom dataclasses import dataclass\nfrom typing import Literal\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom genson import SchemaBuilder\nfrom mistralai import (\n    AssistantMessage,\n    SystemMessage,\n    UserMessage,\n)\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.mistral import MistralTypeAdapter\nfrom outlines.types import CFG, JsonSchema, Regex\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n        \"additionalProperties\": False,\n    }\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n@pytest.fixture\ndef adapter():\n    return MistralTypeAdapter()\n\n\ndef test_mistral_type_adapter_input_text(adapter):\n    message = \"Hello world\"\n    result = adapter.format_input(message)\n    assert len(result) == 1\n    assert isinstance(result[0], UserMessage)\n    assert result[0].content == message\n\n\ndef test_mistral_type_adapter_input_list(adapter, image):\n    image_input = Image(image)\n    message_list = [\"Hello world\", image_input]\n    result = adapter.format_input(message_list)\n    assert len(result) == 1\n    assert isinstance(result[0], UserMessage)\n    message_content = result[0].content\n    assert dict(message_content[0]) == {\"type\": \"text\", \"text\": \"Hello world\"}\n    assert message_content[1].type == \"image_url\"\n    assert hasattr(message_content[1], \"image_url\")\n\n\ndef test_mistral_type_adapter_input_chat(adapter, image):\n    image_input = Image(image)\n    chat = Chat([\n        {\"role\": \"system\", \"content\": \"You are helpful\"},\n        {\"role\": \"user\", \"content\": [\"Hello world\", image_input]},\n        {\"role\": \"assistant\", \"content\": \"Hi there\"},\n    ])\n    result = adapter.format_input(chat)\n    assert len(result) == 3\n    assert isinstance(result[0], SystemMessage)\n    assert result[0].content == \"You are helpful\"\n    assert isinstance(result[1], UserMessage)\n    assert dict(result[1].content[0]) == {\"type\": \"text\", \"text\": \"Hello world\"}\n    assert result[1].content[1].type == \"image_url\"\n    assert hasattr(result[1].content[1], \"image_url\")\n    assert isinstance(result[2], AssistantMessage)\n    assert result[2].content == \"Hi there\"\n\n\ndef test_mistral_type_adapter_input_invalid(adapter, image):\n    @dataclass\n    class Audio:\n        file: str\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        adapter.format_input(123)\n\n    with pytest.raises(ValueError, match=\"Content list cannot be empty.\"):\n        adapter.format_input([])\n\n    with pytest.raises(\n        ValueError,\n        match=\"The first item in the list should be a string.\",\n    ):\n        adapter.format_input([Image(image)])\n\n    with pytest.raises(\n        ValueError,\n        match=\"Expected Image objects after the first string\"\n    ):\n        adapter.format_input([\"hello\", Audio(\"file\")])\n\n    with pytest.raises(\n        TypeError,\n        match=\"Invalid content type\",\n    ):\n        adapter.format_input(Chat([{\"role\": \"user\", \"content\": {}}]))\n\n    with pytest.raises(ValueError, match=\"Unsupported role\"):\n        adapter.format_input(Chat([{\"role\": \"invalid\", \"content\": \"Hello\"}]))\n\n\ndef test_mistral_type_adapter_output_none(adapter):\n    result = adapter.format_output_type(None)\n    assert result == {}\n\n\ndef test_mistral_type_adapter_output_json_mode(adapter):\n    result = adapter.format_output_type(dict)\n    assert result == {\"type\": \"json_object\"}\n\n\ndef test_mistral_type_adapter_dataclass(adapter, schema):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"json_schema\"][\"strict\"] is True\n    assert result[\"json_schema\"][\"schema\"] == schema\n\n\ndef test_mistral_type_adapter_typed_dict(adapter, schema):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"json_schema\"][\"strict\"] is True\n    assert result[\"json_schema\"][\"schema\"] == schema\n\n\ndef test_mistral_type_adapter_pydantic(adapter, schema):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"json_schema\"][\"strict\"] is True\n    assert result[\"json_schema\"][\"schema\"] == schema\n\n\ndef test_mistral_type_adapter_genson_schema_builder(adapter, schema):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n\n    result = adapter.format_output_type(builder)\n    assert isinstance(result, dict)\n    assert result[\"json_schema\"][\"strict\"] is True\n    expected_schema = {\n        \"$schema\": \"http://json-schema.org/schema#\",\n        \"type\": \"object\",\n        \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n        \"required\": [\"hi\"],\n        \"additionalProperties\": False\n    }\n    assert result[\"json_schema\"][\"schema\"] == expected_schema\n\n\ndef test_mistral_type_adapter_json_schema_str(adapter, schema):\n    schema_str = json.dumps(schema)\n    result = adapter.format_output_type(JsonSchema(schema_str))\n    assert isinstance(result, dict)\n    assert result[\"json_schema\"][\"strict\"] is True\n    assert result[\"json_schema\"][\"schema\"] == schema\n\n\ndef test_mistral_type_adapter_output_unsupported(adapter):\n    with pytest.raises(\n        TypeError,\n        match=\"Regex-based structured outputs are not available with Mistral.\",\n    ):\n        adapter.format_output_type(Regex(\"[0-9]\"))\n\n    with pytest.raises(\n        TypeError,\n        match=\"CFG-based structured outputs are not available with Mistral.\",\n    ):\n        adapter.format_output_type(CFG(\"\"))\n\n    with pytest.raises(TypeError, match=\"is not available with Mistral.\"):\n        adapter.format_output_type(Literal[\"foo\", \"bar\"])\n"
  },
  {
    "path": "tests/models/test_mlxlm.py",
    "content": "import pytest\nimport re\nfrom enum import Enum\nfrom typing import Generator\n\nimport outlines\nfrom outlines.types import Regex\nfrom outlines.models.mlxlm import (\n    MLXLM,\n    MLXLMTypeAdapter,\n    from_mlxlm\n)\nfrom outlines.models.transformers import TransformerTokenizer\nfrom pydantic import BaseModel\n\ntry:\n    import mlx_lm\n    import mlx.core as mx\n\n    HAS_MLX = mx.metal.is_available()\nexcept ImportError:\n    HAS_MLX = False\n\n\nTEST_MODEL = \"mlx-community/SmolLM-135M-Instruct-4bit\"\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_model_initialization():\n    model = from_mlxlm(*mlx_lm.load(TEST_MODEL))\n    assert isinstance(model, MLXLM)\n    assert isinstance(model.model, mlx_lm.models.llama.Model)\n    assert isinstance(\n        model.mlx_tokenizer, mlx_lm.tokenizer_utils.TokenizerWrapper\n    )\n    assert isinstance(model.tokenizer, TransformerTokenizer)\n    assert isinstance(model.type_adapter, MLXLMTypeAdapter)\n    assert model.tensor_library_name == \"mlx\"\n\n\n@pytest.fixture(scope=\"session\")\ndef model(tmp_path_factory):\n    model, tokenizer = mlx_lm.load(TEST_MODEL)\n    return outlines.from_mlxlm(model, tokenizer)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_tokenizer(model):\n    # Test single string encoding/decoding\n    test_text = \"Hello, world!\"\n    token_ids, _ = model.tokenizer.encode(test_text)\n    token_ids = mx.array(token_ids)\n    assert isinstance(token_ids, mx.array)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_simple(model):\n    result = model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_call(model):\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_invalid_input_type(model):\n    with pytest.raises(NotImplementedError, match=\"is not available\"):\n        model([\"Respond with one word. Not more.\"])\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_invalid_inference_kwargs(model):\n    with pytest.raises(TypeError):\n        model(\"Respond with one word. Not more.\", foo=\"bar\")\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_inference_kwargs(model):\n    result = model(\"Write a short story about a cat.\", max_tokens=2)\n    assert isinstance(result, str)\n    assert len(result) < 20\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_regex(model):\n    result = model(\"Give a number between 0 and 9.\", Regex(r\"[0-9]\"))\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]\", result)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_json_schema(model):\n    class Character(BaseModel):\n        name: str\n\n    result = model(\"Create a character with a name.\", Character)\n    assert \"name\" in result\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_choice(model):\n    class Foo(Enum):\n        cat = \"cat\"\n        dog = \"dog\"\n\n    result = model(\"Cat or dog?\", Foo)\n    assert result in [\"cat\", \"dog\"]\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_stream_text_stop(model):\n    generator = model.stream(\n        \"Respond with one word. Not more.\", None, max_tokens=100\n    )\n    assert isinstance(generator, Generator)\n    assert isinstance(next(generator), str)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_batch(model):\n    result = model.batch(\n        [\"Respond with one word.\", \"Respond with one word.\"],\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_batch_output_type(model):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"mlx-lm does not support constrained generation with batching.\"\n    ):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n            Regex(r\"[0-9]\")\n        )\n"
  },
  {
    "path": "tests/models/test_mlxlm_type_adapter.py",
    "content": "import pytest\nimport io\nfrom unittest.mock import MagicMock\n\nfrom outlines_core import Index, Vocabulary\nfrom PIL import Image as PILImage\n\nfrom outlines.backends.outlines_core import OutlinesCoreLogitsProcessor\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.mlxlm import MLXLMTypeAdapter\n\ntry:\n    import mlx_lm\n    import mlx.core as mx\n\n    HAS_MLX = mx.metal.is_available()\nexcept ImportError:\n    HAS_MLX = False\n\n\nMODEL_NAME = \"mlx-community/SmolLM-135M-Instruct-4bit\"\n\n\n@pytest.fixture\ndef adapter():\n    _, tokenizer = mlx_lm.load(MODEL_NAME)\n    return MLXLMTypeAdapter(tokenizer=tokenizer)\n\n\n@pytest.fixture\ndef logits_processor():\n    vocabulary = Vocabulary.from_pretrained(MODEL_NAME)\n    index = Index(r\"[0-9]{3}\", vocabulary)\n    return OutlinesCoreLogitsProcessor(index, \"mlx\")\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_mlxlm_type_adapter_format_input_with_template():\n    tokenizer = MagicMock()\n    tokenizer.chat_template = \"some_template\"\n    tokenizer.apply_chat_template.return_value = \"formatted_prompt\"\n\n    adapter = MLXLMTypeAdapter(tokenizer=tokenizer, has_chat_template=True)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == \"formatted_prompt\"\n    tokenizer.apply_chat_template.assert_called_once_with(\n        [{\"role\": \"user\", \"content\": \"prompt\"}],\n        tokenize=False,\n        add_generation_prompt=True,\n    )\n\n\ndef test_mlxlm_type_adapter_format_input_without_template():\n    tokenizer = MagicMock()\n    tokenizer.chat_template = None\n\n    adapter = MLXLMTypeAdapter(tokenizer=tokenizer, has_chat_template=False)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == \"prompt\"\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_type_adapter_format_input(adapter, image):\n    # Anything else than a string/Chat (invalid)\n    with pytest.raises(NotImplementedError):\n        adapter.format_input([\"Hello, world!\"])\n\n    # String\n    assert adapter.format_input(\"Hello, world!\") == \"Hello, world!\"\n\n    # Chat\n    messages = [\n        {\"role\": \"user\", \"content\": \"Hello, world!\"},\n        {\"role\": \"assistant\", \"content\": \"Hello, world!\"},\n    ]\n    expected = (\n        \"<|im_start|>user\\nHello, world!<|im_end|>\\n<|im_start|>assistant\\n\"\n        + \"Hello, world!<|im_end|>\\n<|im_start|>assistant\\n\"\n    )\n    assert adapter.format_input(Chat(messages=messages)) == expected\n\n    # Multi-modal (invalid)\n    with pytest.raises(\n        ValueError,\n        match=\"mlx-lm does not support multi-modal messages.\"\n    ):\n        adapter.format_input(Chat(messages=[\n            {\"role\": \"user\", \"content\": [\"prompt\", Image(image)]},\n        ]))\n\n\n@pytest.mark.skipif(not HAS_MLX, reason=\"MLX tests require Apple Silicon\")\ndef test_mlxlm_type_adapter_format_output_type(adapter, logits_processor):\n    formatted = adapter.format_output_type(logits_processor)\n    assert isinstance(formatted, list)\n    assert len(formatted) == 1\n    assert isinstance(formatted[0], OutlinesCoreLogitsProcessor)\n"
  },
  {
    "path": "tests/models/test_ollama.py",
    "content": "import io\nimport json\nfrom enum import Enum\nfrom typing import Annotated\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom ollama import AsyncClient, Client\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models import AsyncOllama, Ollama\n\n\nMODEL_NAME = \"tinyllama\"\n\n\n@pytest.fixture\ndef model():\n    return Ollama(Client(), MODEL_NAME)\n\n\n@pytest.fixture\ndef model_no_model_name():\n    return Ollama(Client())\n\n\n@pytest.fixture\ndef async_model():\n    return AsyncOllama(AsyncClient(), MODEL_NAME)\n\n\n@pytest.fixture\ndef async_model_no_model_name():\n    return AsyncOllama(AsyncClient())\n\n\n@pytest.fixture(scope=\"session\")\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_ollama_init_from_client():\n    client = Client()\n\n    # With model name\n    model = outlines.from_ollama(client, MODEL_NAME)\n    assert isinstance(model, Ollama)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n    # Without model name\n    model = outlines.from_ollama(client)\n    assert isinstance(model, Ollama)\n    assert model.client == client\n    assert model.model_name is None\n\n    # With invalid client\n    with pytest.raises(ValueError, match=\"Invalid client type\"):\n        outlines.from_ollama(object())\n\n\ndef test_ollama_wrong_inference_parameters(model):\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        model.generate(\n            \"Respond with one word. Not more.\", None, foo=10\n        )\n\n\ndef test_ollama_simple(model):\n    result = model.generate(\n        \"Respond with one word. Not more.\", None\n    )\n    assert isinstance(result, str)\n\n\ndef test_ollama_direct(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        None,\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\ndef test_ollama_simple_vision(image, model):\n    # This is not using a vision model, so it's not able to describe\n    # the image, but we're still checking the model input syntax\n    result = model.generate(\n        [\"What does this logo represent?\", Image(image)],\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\ndef test_ollama_chat(image, model):\n    result = model.generate(\n        Chat(\n            [\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": [\n                    \"What does this logo represent?\",\n                    Image(image)\n                ]},\n            ]\n        ),\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\ndef test_ollama_json(model):\n    class Foo(BaseModel):\n        foo: Annotated[str, Field(max_length=1)]\n\n    result = model(\"Respond with one word. Not more.\", Foo)\n    assert isinstance(result, str)\n    assert \"foo\" in json.loads(result)\n\n\ndef test_ollama_wrong_output_type(model):\n    class Foo(Enum):\n        bar = \"Bar\"\n        foor = \"Foo\"\n\n    with pytest.raises(TypeError, match=\"is not supported\"):\n        model.generate(\"foo?\", Foo)\n\n\ndef test_ollama_wrong_input_type(model, image):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model.generate({\"foo?\": \"bar?\"}, None)\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        model.generate([\"foo?\", Image(image), Video(\"\")], None)\n\n\ndef test_ollama_stream(model):\n    generator = model.stream(\"Write a sentence about a cat.\")\n    assert isinstance(next(generator), str)\n\n\ndef test_ollama_stream_json(model_no_model_name):\n    class Foo(BaseModel):\n        foo: Annotated[str, Field(max_length=2)]\n\n    generator = model_no_model_name.stream(\"Create a character.\", Foo, model=MODEL_NAME)\n    generated_text = []\n    for text in generator:\n        generated_text.append(text)\n    assert \"foo\" in json.loads(\"\".join(generated_text))\n\n\ndef test_ollama_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_ollama_async_init_from_client():\n    client = AsyncClient()\n\n    # With model name\n    model = outlines.from_ollama(client, MODEL_NAME)\n    assert isinstance(model, AsyncOllama)\n    assert model.client == client\n    assert model.model_name == MODEL_NAME\n\n    # Without model name\n    model = outlines.from_ollama(client)\n    assert isinstance(model, AsyncOllama)\n    assert model.client == client\n    assert model.model_name is None\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_wrong_inference_parameters(async_model):\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        await async_model.generate(\n            \"Respond with one word. Not more.\", None, foo=10\n        )\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_simple(async_model):\n    result = await async_model.generate(\n        \"Respond with one word. Not more.\", None\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_direct(async_model_no_model_name):\n    result = await async_model_no_model_name(\n        \"Respond with one word. Not more.\",\n        None,\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_simple_vision(image, async_model):\n    # This is not using a vision model, so it's not able to describe\n    # the image, but we're still checking the model input syntax\n    result = await async_model.generate(\n        [\"What does this logo represent?\", Image(image)],\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_chat(image, async_model):\n    result = await async_model.generate(\n        Chat(\n            [\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": [\n                    \"What does this logo represent?\",\n                    Image(image)\n                ]},\n            ]\n        ),\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_json(async_model):\n    class Foo(BaseModel):\n        foo: Annotated[str, Field(max_length=1)]\n\n    result = await async_model(\"Respond with one word. Not more.\", Foo)\n    assert isinstance(result, str)\n    assert \"foo\" in json.loads(result)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_wrong_output_type(async_model):\n    class Foo(Enum):\n        bar = \"Bar\"\n        foor = \"Foo\"\n\n    with pytest.raises(TypeError, match=\"is not supported\"):\n        await async_model.generate(\"foo?\", Foo)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_wrong_input_type(async_model):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        await async_model.generate({\"foo?\": \"bar?\"}, None)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_stream(async_model):\n    async_generator = async_model.stream(\"Write a sentence about a cat.\")\n    assert isinstance(await async_generator.__anext__(), str)\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_stream_json(async_model_no_model_name):\n    class Foo(BaseModel):\n        foo: Annotated[str, Field(max_length=2)]\n\n    async_generator = async_model_no_model_name.stream(\"Create a character.\", Foo, model=MODEL_NAME)\n    generated_text = []\n    async for chunk in async_generator:\n        generated_text.append(chunk)\n    assert \"foo\" in json.loads(\"\".join(generated_text))\n\n\n@pytest.mark.asyncio\nasync def test_ollama_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n"
  },
  {
    "path": "tests/models/test_ollama_type_adapter.py",
    "content": "import io\nimport json\nimport pytest\nimport sys\nfrom dataclasses import dataclass\n\nfrom genson import SchemaBuilder\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.ollama import OllamaTypeAdapter\nfrom outlines.types import cfg, json_schema, regex\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n    }\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef adapter():\n    return OllamaTypeAdapter()\n\n\ndef test_ollama_type_adapter_input_text(adapter):\n    text_input = \"prompt\"\n    result = adapter.format_input(text_input)\n    assert isinstance(result, list)\n    assert len(result) == 1\n    assert result[0] == {\"role\": \"user\", \"content\": text_input}\n\n\ndef test_ollama_type_adapter_input_vision(adapter, image):\n    image_input = Image(image)\n    text_input = \"prompt\"\n    result = adapter.format_input([text_input, image_input])\n    assert isinstance(result, list)\n    assert len(result) == 1\n    assert result[0] == {\n        \"role\": \"user\",\n        \"content\": text_input,\n        \"images\": [image_input.image_str],\n    }\n\n\ndef test_ollama_type_adapter_input_chat(adapter, image):\n    image_input = Image(image)\n    chat_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": [\n            \"hello\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = adapter.format_input(chat_input)\n    assert isinstance(result, list)\n    assert len(result) == 3\n    assert result[0] == {\"role\": \"system\", \"content\": \"prompt\"}\n    assert result[1] == {\"role\": \"user\", \"content\": \"hello\", \"images\": [image_input.image_str]}\n    assert result[2] == {\"role\": \"assistant\", \"content\": \"response\"}\n\n\ndef test_ollama_type_adapter_input_invalid(adapter):\n    prompt = {\"foo\": \"bar\"}\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = adapter.format_input(prompt)\n\n    prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": {\"foo\": \"bar\"}},\n    ])\n    with pytest.raises(ValueError, match=\"Invalid content type\"):\n        _ = adapter.format_input(prompt)\n\n\ndef test_ollama_type_adapter_output_invalid(adapter):\n    with pytest.raises(TypeError, match=\"The type `str` is not supported\"):\n        adapter.format_output_type(str)\n\n    with pytest.raises(TypeError, match=\"The type `int` is not supported\"):\n        adapter.format_output_type(int)\n\n    with pytest.raises(TypeError, match=\"Regex-based structured outputs are not\"):\n        adapter.format_output_type(regex(\"[0-9]\"))\n\n    with pytest.raises(TypeError, match=\"CFG-based structured outputs are not\"):\n        adapter.format_output_type(cfg(\"\"))\n\n\ndef test_ollama_type_adapter_output_dataclass(adapter, schema):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_ollama_type_adapter_output_typed_dict(adapter, schema):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_ollama_type_adapter_output_pydantic(adapter, schema):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert result == schema\n\n\ndef test_ollama_type_adapter_output_genson_schema_builder(adapter):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n\n    result = adapter.format_output_type(builder)\n    assert result == {\n        \"$schema\": \"http://json-schema.org/schema#\",\n        \"type\": \"object\",\n        \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n        \"required\": [\"hi\"]\n    }\n\n\ndef test_ollama_type_adapter_json_schema_str(adapter, schema):\n    schema_str = json.dumps(schema)\n    result = adapter.format_output_type(json_schema(schema_str))\n    assert result == schema\n\n\ndef test_ollama_type_adapter_json_schema_dict(adapter, schema):\n    result = adapter.format_output_type(json_schema(schema))\n    assert result == schema\n"
  },
  {
    "path": "tests/models/test_openai.py",
    "content": "import io\nimport json\nimport os\nfrom typing import Annotated, Generator, AsyncGenerator\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom openai import AsyncOpenAI as AsyncOpenAIClient, OpenAI as OpenAIClient\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outlines.inputs import Chat, Image, Video\nfrom outlines.models.openai import AsyncOpenAI, OpenAI\nfrom outlines.types import json_schema\n\nMODEL_NAME = \"gpt-4o-mini-2024-07-18\"\n\n\n@pytest.fixture(scope=\"session\")\ndef api_key():\n    \"\"\"Get the OpenAI API key from the environment, providing a default value if not found.\n\n    This fixture should be used for tests that do not make actual api calls,\n    but still require to initialize the OpenAI client.\n\n    \"\"\"\n    api_key = os.getenv(\"OPENAI_API_KEY\")\n    if not api_key:\n        return \"MOCK_VALUE\"\n    return api_key\n\n\n@pytest.fixture(scope=\"session\")\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture(scope=\"session\")\ndef model(api_key):\n    return OpenAI(OpenAIClient(api_key=api_key), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef async_model(api_key):\n    return AsyncOpenAI(AsyncOpenAIClient(api_key=api_key), MODEL_NAME)\n\n\n@pytest.fixture(scope=\"session\")\ndef model_no_model_name(api_key):\n    return OpenAI(OpenAIClient(api_key=api_key))\n\n\n@pytest.fixture(scope=\"session\")\ndef async_model_no_model_name(api_key):\n    return AsyncOpenAI(AsyncOpenAIClient(api_key=api_key))\n\n\ndef test_openai_init_from_client(api_key):\n    client = OpenAIClient(api_key=api_key)\n\n    # With model name\n    model = outlines.from_openai(client, \"gpt-4o\")\n    assert isinstance(model, OpenAI)\n    assert model.client == client\n    assert model.model_name == \"gpt-4o\"\n\n    # Without model name\n    model = outlines.from_openai(client)\n    assert isinstance(model, OpenAI)\n    assert model.client == client\n    assert model.model_name is None\n\n\ndef test_openai_wrong_inference_parameters(model):\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        model.generate(\"prompt\", foo=10)\n\n\ndef test_openai_wrong_input_type(model, image):\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model.generate(Foo(\"prompt\"))\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        model.generate([\"foo?\", Image(image), Video(\"\")])\n\n\ndef test_openai_wrong_output_type(model):\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        model.generate(\"prompt\", Foo(1))\n\n\n@pytest.mark.api_call\ndef test_openai_simple_call(model):\n    result = model.generate(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_call_multiple_samples(model):\n    result = model.generate(\"Respond with one word. Not more.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.api_call\ndef test_openai_direct_call(model_no_model_name):\n    result = model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_vision(image, model):\n    result = model.generate([\"What does this logo represent?\", Image(image)])\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_openai_chat(image, model):\n    result = model.generate(Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]), max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_pydantic(model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_pydantic_refusal(model):\n    class Foo(BaseModel):\n        bar: Annotated[str, Field(int, pattern=r\"^\\d+$\")]\n\n    with pytest.raises(TypeError, match=\"OpenAI does not support your schema\"):\n        _ = model.generate(\"foo?\", Foo)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_vision_pydantic(image, model):\n    class Logo(BaseModel):\n        name: int\n\n    result = model.generate([\"What does this logo represent?\", Image(image)], Logo)\n    assert isinstance(result, str)\n    assert \"name\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_openai_simple_json_schema(model):\n    class Foo(BaseModel):\n        bar: int\n\n    schema = json.dumps(Foo.model_json_schema())\n\n    result = model.generate(\"foo?\", json_schema(schema))\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.api_call\ndef test_openai_streaming(model):\n    result = model.stream(\"Respond with one word. Not more.\")\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_openai_batch(model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_openai_async_init_from_client(api_key):\n    client = AsyncOpenAIClient(api_key=api_key)\n\n    # With model name\n    model = outlines.from_openai(client, \"gpt-4o\")\n    assert isinstance(model, AsyncOpenAI)\n    assert model.client == client\n    assert model.model_name == \"gpt-4o\"\n\n    # Without model name\n    model = outlines.from_openai(client)\n    assert isinstance(model, AsyncOpenAI)\n    assert model.client == client\n    assert model.model_name is None\n\n\n@pytest.mark.asyncio\nasync def test_openai_async_wrong_inference_parameters(async_model):\n    with pytest.raises(TypeError, match=\"got an unexpected\"):\n        await async_model.generate(\"prompt\", foo=10)\n\n\n@pytest.mark.asyncio\nasync def test_openai_async_wrong_input_type(async_model, image):\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        await async_model.generate(Foo(\"prompt\"))\n\n    with pytest.raises(ValueError, match=\"All assets provided must be of type Image\"):\n        await async_model.generate([\"foo?\", Image(image), Video(\"\")])\n\n\n@pytest.mark.asyncio\nasync def test_openai_async_wrong_output_type(async_model):\n    class Foo:\n        def __init__(self, foo):\n            self.foo = foo\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        await async_model.generate(\"prompt\", Foo(1))\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_call(async_model):\n    result = await async_model.generate(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_call_multiple_samples(async_model):\n    result = await async_model.generate(\"Respond with one word. Not more.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_direct_call(async_model_no_model_name):\n    result = await async_model_no_model_name(\n        \"Respond with one word. Not more.\",\n        model=MODEL_NAME,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_vision(image, async_model):\n    result = await async_model.generate([\"What does this logo represent?\", Image(image)])\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_chat(image, async_model):\n    result = await async_model.generate(Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\"What does this logo represent?\", Image(image)]\n        },\n    ]), max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_pydantic(async_model):\n    class Foo(BaseModel):\n        bar: int\n\n    result = await async_model.generate(\"foo?\", Foo)\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_pydantic_refusal(async_model):\n    class Foo(BaseModel):\n        bar: Annotated[str, Field(int, pattern=r\"^\\d+$\")]\n\n    with pytest.raises(TypeError, match=\"OpenAI does not support your schema\"):\n        _ = await async_model.generate(\"foo?\", Foo)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_vision_pydantic(image, async_model):\n    class Logo(BaseModel):\n        name: int\n\n    result = await async_model.generate([\"What does this logo represent?\", Image(image)], Logo)\n    assert isinstance(result, str)\n    assert \"name\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_simple_json_schema(async_model):\n    class Foo(BaseModel):\n        bar: int\n\n    schema = json.dumps(Foo.model_json_schema())\n\n    result = await async_model.generate(\"foo?\", json_schema(schema))\n    assert isinstance(result, str)\n    assert \"bar\" in json.loads(result)\n\n\n@pytest.mark.asyncio\n@pytest.mark.api_call\nasync def test_openai_async_streaming(async_model):\n    result = async_model.stream(\"Respond with a single word.\")\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n@pytest.mark.asyncio\nasync def test_openai_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n"
  },
  {
    "path": "tests/models/test_openai_type_adapter.py",
    "content": "import io\nimport json\nimport pytest\nimport sys\nfrom dataclasses import dataclass\nfrom typing import Literal\n\nfrom genson import SchemaBuilder\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\n\nfrom outlines import cfg, json_schema, regex\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.openai import OpenAITypeAdapter\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n@pytest.fixture\ndef schema():\n    return {\n        \"properties\": {\n            \"user_id\": {\"title\": \"User Id\", \"type\": \"integer\"},\n            \"name\": {\"title\": \"Name\", \"type\": \"string\"},\n        },\n        \"required\": [\"user_id\", \"name\"],\n        \"title\": \"User\",\n        \"type\": \"object\",\n        \"additionalProperties\": False,\n    }\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef adapter():\n    return OpenAITypeAdapter()\n\n\ndef test_openai_type_adapter_input_text(adapter):\n    message = \"prompt\"\n    result = adapter.format_input(message)\n    assert result == [{\"role\": \"user\", \"content\": message}]\n\n\ndef test_openai_type_adapter_input_vision(adapter, image):\n    image_input = Image(image)\n    text_input = \"hello\"\n    result = adapter.format_input([text_input, image_input])\n    assert result == [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": text_input},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ],\n        },\n    ]\n\n\ndef test_openai_type_adapter_input_chat(adapter, image):\n    image_input = Image(image)\n    model_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": [\n            \"hello\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = adapter.format_input(model_input)\n    assert result == [\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"hello\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ]\n        },\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ]\n\n\ndef test_openai_type_adapter_input_invalid(adapter):\n    @dataclass\n    class Audio:\n        file: str\n\n    with pytest.raises(TypeError, match=\"is not available\"):\n        _ = adapter.format_input(Audio(\"file\"))\n\n    with pytest.raises(\n        ValueError,\n        match=\"All assets provided must be of type Image\",\n    ):\n        _ = adapter.format_input([\"prompt\", Audio(\"file\")])\n\n    with pytest.raises(\n        ValueError,\n        match=\"The content must be a string or a list\",\n    ):\n        _ = adapter.format_input(\n            Chat(messages=[{\"role\": \"user\", \"content\": {\"foo\": \"bar\"}}])\n        )\n\n\ndef test_openai_type_adapter_output_invalid(adapter):\n    with pytest.raises(TypeError, match=\"The type `str` is not available\"):\n        adapter.format_output_type(str)\n\n    with pytest.raises(TypeError, match=\"The type `int` is not available\"):\n        adapter.format_output_type(int)\n\n    with pytest.raises(TypeError, match=\"The type `Literal` is not available\"):\n        adapter.format_output_type(Literal[1, 2])\n\n    with pytest.raises(TypeError, match=\"Neither regex-based\"):\n        adapter.format_output_type(regex(\"[0-9]\"))\n\n    with pytest.raises(TypeError, match=\"CFG-based structured outputs\"):\n        adapter.format_output_type(cfg(\"\"))\n\n    class Foo(BaseModel):\n        bar: str\n\n    with pytest.raises(TypeError, match=\"The type `list` is not available\"):\n        adapter.format_output_type(list[Foo])\n\n\ndef test_openai_type_adapter_output_none(adapter):\n    result = adapter.format_output_type(None)\n    assert result == {}\n\n\ndef test_openai_type_adapter_json_mode(adapter):\n    result = adapter.format_output_type(dict)\n    assert result == {\"response_format\": {\"type\": \"json_object\"}}\n\n\ndef test_openai_type_adapter_dataclass(adapter, schema):\n    @dataclass\n    class User:\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == schema\n\n\ndef test_openai_type_adapter_typed_dict(adapter, schema):\n    class User(TypedDict):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == schema\n\n\ndef test_openai_type_adapter_pydantic(adapter, schema):\n    class User(BaseModel):\n        user_id: int\n        name: str\n\n    result = adapter.format_output_type(User)\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == schema\n\n\ndef test_openai_type_adapter_genson_schema_builder(adapter, schema):\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n\n    result = adapter.format_output_type(builder)\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    expected_schema = {\n        \"$schema\": \"http://json-schema.org/schema#\",\n        \"type\": \"object\",\n        \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n        \"required\": [\"hi\"],\n        \"additionalProperties\": False  # OpenAI adds this\n    }\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == expected_schema\n\n\ndef test_openai_type_adapter_json_schema_str(adapter, schema):\n    schema_str = json.dumps(schema)\n    result = adapter.format_output_type(json_schema(schema_str))\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == schema\n\n\ndef test_openai_type_adapter_json_schema_dict(adapter, schema):\n    result = adapter.format_output_type(json_schema(schema))\n    assert isinstance(result, dict)\n    assert result[\"response_format\"][\"json_schema\"][\"strict\"] is True\n    assert result[\"response_format\"][\"json_schema\"][\"schema\"] == schema\n"
  },
  {
    "path": "tests/models/test_sglang.py",
    "content": "# ATTENTION: When running this test with an actual SGLang server, use the\n# llguidance backend (--grammar-backend llguidance)\n# The outlines backend does not support the EBNF grammar. The xgrammar\n# backend is slow and buggy.\n\nimport io\nimport os\nimport re\nimport warnings\nfrom typing import AsyncGenerator, Generator\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom openai import AsyncOpenAI, OpenAI\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.sglang import SGLang, AsyncSGLang, from_sglang\nfrom outlines.types.dsl import CFG, Regex, JsonSchema\nfrom tests.test_utils.mock_openai_client import MockOpenAIClient, MockAsyncOpenAIClient\n\n\nEBNF_YES_NO_GRAMMAR = \"\"\"\nroot ::= answer\nanswer ::= \"yes\" | \"no\"\n\"\"\"\n\n# Image for testing\nwidth, height = 1, 1\nwhite_background = (255, 255, 255)\nimage = PILImage.new(\"RGB\", (width, height), white_background)\nbuffer = io.BytesIO()\nimage.save(buffer, format=\"PNG\")\nbuffer.seek(0)\nimage = PILImage.open(buffer)\nimage_input = Image(image)\n\n\n# If the SGLANG_SERVER_URL environment variable is set, use the real SGLang server\n# Otherwise, use the mock server\nsglang_server_url = os.environ.get(\"SGLANG_SERVER_URL\")\nsglang_model_name = os.environ.get(\n    \"SGLANG_MODEL_NAME\", \"qwen/qwen2.5-0.5b-instruct\"\n)\nif sglang_server_url:\n    openai_client = OpenAI(base_url=sglang_server_url, api_key=\"foo\")\n    async_openai_client = AsyncOpenAI(base_url=sglang_server_url, api_key=\"foo\")\nelse:\n    warnings.warn(\"No SGLang server URL provided, using mock server\")\n    openai_client = MockOpenAIClient()\n    async_openai_client = MockAsyncOpenAIClient()\n\nmock_responses = [\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'model': sglang_model_name,\n        },\n        \"foo\"\n    ),\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'model': sglang_model_name,\n            'stream': True\n        },\n        [\"foo\", \"bar\"]\n    ),\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'n': 2,\n            'model': sglang_model_name,\n        },\n        [\"foo\", \"bar\"]\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': sglang_model_name,\n            'max_tokens': 10,\n            'response_format': {\n                'type': 'json_schema',\n                'json_schema': {\n                    'name': 'default',\n                    'strict': True,\n                    'schema': {\n                        'type': 'object',\n                        'properties': {'bar': {'type': 'string'}},\n                        'additionalProperties': False\n                    }\n                }\n            }\n        },\n        '{\"foo\": \"bar\"}'\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': sglang_model_name,\n            'max_tokens': 10,\n            'extra_body': {\n                'regex': '([0-9]{3})',\n            },\n        },\n        \"123\"\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': sglang_model_name,\n            'max_tokens': 10,\n            'extra_body': {\n                'ebnf': EBNF_YES_NO_GRAMMAR,\n            },\n        },\n        \"yes\"\n    ),\n    (\n        {\n            'messages': [\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"hello\"},\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\n                                \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                            },\n                        },\n                    ]\n                }\n            ],\n            'model': sglang_model_name,\n            'max_tokens': 10,\n        },\n        \"foo\"\n    ),\n    (\n        {\n            'messages': [\n                {\"role\": \"system\", \"content\": \"prompt\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"hello\"},\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\n                                \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                            },\n                        },\n                    ],\n                },\n                {\"role\": \"assistant\", \"content\": \"response\"},\n            ],\n            'model': sglang_model_name,\n            'max_tokens': 10,\n        },\n        \"foo\"\n    )\n]\n\n\n# If the SGLANG_SERVER_URL environment variable is not set, add the mock\n# responses to the mock clients\nif not sglang_server_url:\n    async_openai_client.add_mock_responses(mock_responses)\n    openai_client.add_mock_responses(mock_responses)\n\n\n@pytest.fixture\ndef sync_model():\n    return SGLang(openai_client, model_name=sglang_model_name)\n\n\n@pytest.fixture\ndef sync_model_no_model_name():\n    return SGLang(openai_client)\n\n\n@pytest.fixture\ndef async_model():\n    return AsyncSGLang(async_openai_client, model_name=sglang_model_name)\n\n\n@pytest.fixture\ndef async_model_no_model_name():\n    return AsyncSGLang(async_openai_client)\n\n\ndef test_sglang_init():\n    # We do not rely on the mock server here because we need an object\n    # of type OpenAI and AsyncOpenAI to test the init function.\n    openai_client = OpenAI(base_url=\"http://localhost:11434\", api_key=\"foo\")\n    async_openai_client = AsyncOpenAI(base_url=\"http://localhost:11434\", api_key=\"foo\")\n\n    # Sync with model name\n    model = from_sglang(openai_client, sglang_model_name)\n    assert isinstance(model, SGLang)\n    assert model.client == openai_client\n    assert model.model_name == sglang_model_name\n\n    # Sync without model name\n    model = from_sglang(openai_client)\n    assert isinstance(model, SGLang)\n    assert model.client == openai_client\n    assert model.model_name is None\n\n    # Async with model name\n    model = from_sglang(async_openai_client, sglang_model_name)\n    assert isinstance(model, AsyncSGLang)\n    assert model.client == async_openai_client\n    assert model.model_name == sglang_model_name\n\n    # Async without model name\n    model = from_sglang(async_openai_client)\n    assert isinstance(model, AsyncSGLang)\n    assert model.client == async_openai_client\n    assert model.model_name is None\n\n    with pytest.raises(ValueError, match=\"Unsupported client type\"):\n        from_sglang(\"foo\")\n\n\ndef test_sglang_sync_simple_call(sync_model):\n    result = sync_model(\"Respond with a single word.\",)\n    assert isinstance(result, str)\n\n\ndef test_sglang_sync_streaming(sync_model_no_model_name):\n    result = sync_model_no_model_name.stream(\n        \"Respond with a single word.\",\n        model=sglang_model_name,\n    )\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_sglang_sync_batch(sync_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        sync_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_sglang_sync_vision(sync_model):\n    result = sync_model([\"hello\", image_input], max_tokens=10)\n    assert isinstance(result, str)\n\n\ndef test_sglang_sync_vision_chat(sync_model):\n    result = sync_model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"prompt\"},\n            {\"role\": \"user\", \"content\": [\n                \"hello\",\n                image_input,\n            ]},\n            {\"role\": \"assistant\", \"content\": \"response\"},\n        ]),\n        max_tokens=10,\n    )\n    assert isinstance(result, str)\n\n\ndef test_sglang_sync_multiple_samples(sync_model):\n    result = sync_model(\"Respond with a single word.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\ndef test_sglang_sync_json(sync_model):\n    json_string = (\n        '{\"type\": \"object\", \"properties\":'\n        + ' {\"bar\": {\"type\": \"string\"}}}'\n    )\n    result = sync_model(\"foo?\", JsonSchema(json_string), max_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\ndef test_sglang_sync_regex(sync_model):\n    result = sync_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\ndef test_sglang_sync_cfg(sync_model):\n    with pytest.warns(\n        UserWarning,\n        match=\"SGLang grammar-based structured outputs expects an EBNF\"\n    ):\n        result = sync_model(\"foo?\", CFG(EBNF_YES_NO_GRAMMAR), max_tokens=10)\n        assert isinstance(result, str)\n        assert result in [\"yes\", \"no\"]\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_simple_call(async_model):\n    result = await async_model(\"Respond with a single word.\",)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_streaming(async_model_no_model_name):\n    result = async_model_no_model_name.stream(\n        \"Respond with a single word.\",\n        model=sglang_model_name,\n    )\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_vision(async_model):\n    result = await async_model([\"hello\", image_input], max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_vision_chat(async_model):\n    result = await async_model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"prompt\"},\n            {\"role\": \"user\", \"content\": [\n                \"hello\",\n                image_input,\n            ]},\n            {\"role\": \"assistant\", \"content\": \"response\"},\n        ]),\n        max_tokens=10,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_multiple_samples(async_model):\n    result = await async_model(\"Respond with a single word.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_json(async_model):\n    json_string = (\n        '{\"type\": \"object\", \"properties\":'\n        + ' {\"bar\": {\"type\": \"string\"}}}'\n    )\n    result = await async_model(\"foo?\", JsonSchema(json_string), max_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_regex(async_model):\n    result = await async_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\n@pytest.mark.asyncio\nasync def test_sglang_async_cfg(async_model):\n    result = await async_model(\"foo?\", CFG(EBNF_YES_NO_GRAMMAR), max_tokens=10)\n    assert isinstance(result, str)\n    assert result in [\"yes\", \"no\"]\n"
  },
  {
    "path": "tests/models/test_sglang_type_adapter.py",
    "content": "import io\nimport json\nimport pytest\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.sglang import SGLangTypeAdapter\nfrom outlines.types import CFG, JsonSchema\n\n\nCFG_STRING = \"\"\"\n?start: expr\n?expr: NUMBER\n\"\"\"\n\nJSON_SCHEMA_STRING = \"\"\"\n{\n    \"type\": \"object\",\n    \"properties\": {\n        \"answer\": {\"type\": \"number\"}\n    }\n}\n\"\"\"\n\n\n@pytest.fixture\ndef type_adapter():\n    return SGLangTypeAdapter()\n\n@pytest.fixture\ndef cfg_instance():\n    return CFG(CFG_STRING)\n\n@pytest.fixture\ndef json_schema_instance():\n    return JsonSchema(JSON_SCHEMA_STRING)\n\n@pytest.fixture\ndef json_schema_whitespace_instance():\n    return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern=\"\\n\")\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_sglang_type_adapter_input_text(type_adapter):\n    message = \"prompt\"\n    result = type_adapter.format_input(message)\n    assert result == [{\"role\": \"user\", \"content\": message}]\n\n\ndef test_sglang_type_adapter_input_vision(type_adapter, image):\n    image_input = Image(image)\n    result = type_adapter.format_input([\"hello\", image_input])\n    assert result == [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"hello\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ]\n        }\n    ]\n\n\ndef test_sglang_type_adapter_input_chat(type_adapter, image):\n    image_input = Image(image)\n    model_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": [\n            \"hello\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = type_adapter.format_input(model_input)\n    assert result == [\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"hello\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ],\n        },\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ]\n\n\ndef test_sglang_type_adapter_input_invalid(type_adapter):\n    @dataclass\n    class Audio:\n        file: str\n\n    prompt = Audio(\n        \"file\",\n    )\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = type_adapter.format_input(prompt)\n\n\ndef test_sglang_type_adapter_output_type(\n    type_adapter,\n    cfg_instance,\n    json_schema_instance,\n    json_schema_whitespace_instance,\n):\n    assert type_adapter.format_output_type(None) == {}\n    with pytest.warns(\n        UserWarning,\n        match=\"SGLang grammar-based structured outputs expects an EBNF\"\n    ):\n        assert type_adapter.format_output_type(cfg_instance) == {\n            \"extra_body\": {\"ebnf\": CFG_STRING}\n        }\n    assert type_adapter.format_output_type(json_schema_instance) == {\n        \"response_format\": {\n            \"type\": \"json_schema\",\n            \"json_schema\": {\n                \"name\": \"default\",\n                \"strict\": True,\n                \"schema\": {\n                    **json.loads(JSON_SCHEMA_STRING),\n                    \"additionalProperties\": False,\n                },\n            },\n        }\n    }\n    # whitespace pattern is ignored\n    assert type_adapter.format_output_type(json_schema_whitespace_instance) == {\n        \"response_format\": {\n            \"type\": \"json_schema\",\n            \"json_schema\": {\n                \"name\": \"default\",\n                \"strict\": True,\n                \"schema\": {\n                    **json.loads(JSON_SCHEMA_STRING),\n                    \"additionalProperties\": False,\n                },\n            },\n        }\n    }\n    assert type_adapter.format_output_type(int) == {\n        \"extra_body\": {\"regex\": \"([+-]?(0|[1-9][0-9]*))\"}\n    }\n"
  },
  {
    "path": "tests/models/test_tgi.py",
    "content": "import os\nimport re\nimport warnings\nfrom typing import AsyncGenerator, Generator\n\nimport pytest\nfrom huggingface_hub import InferenceClient, AsyncInferenceClient\n\nfrom outlines.models.tgi import TGI, AsyncTGI, from_tgi\nfrom outlines.types.dsl import CFG, Regex, JsonSchema\nfrom tests.test_utils.mock_tgi_client import MockTGIInferenceClient, MockAsyncTGIInferenceClient\n\n\nYES_NO_GRAMMAR = \"\"\"\n?start: answer\n\nanswer: \"yes\" | \"no\"\n\"\"\"\n\n# If the TGI_SERVER_URL environment variable is set, use the real TGI server\n# Otherwise, use the mock server\ntgi_server_url = os.environ.get(\"TGI_SERVER_URL\")\nif tgi_server_url:\n    tgi_client = InferenceClient(tgi_server_url)\n    async_tgi_client = AsyncInferenceClient(tgi_server_url)\nelse:\n    warnings.warn(\"No TGI server URL provided, using mock server\")\n    tgi_client = MockTGIInferenceClient()\n    async_tgi_client = MockAsyncTGIInferenceClient()\n\nmock_responses = [\n    (\n        {\n            'prompt': 'Respond with a single word.',\n            'max_new_tokens': 10,\n        },\n        \"foo\"\n    ),\n    (\n        {\n            'prompt': 'Respond with a single word.',\n            'max_new_tokens': 10,\n            'stream': True\n        },\n        [\"foo\", \"bar\"]\n    ),\n    (\n        {\n            'prompt': 'foo?',\n            'max_new_tokens': 10,\n            'grammar': {\n                'type': 'json',\n                'value': {\n                    'type': 'object',\n                    'properties': {\n                        'bar': {'type': 'string'}\n                    },\n                    'required': ['bar']\n                }\n            }\n        },\n        '{\"foo\": \"bar\"}'\n    ),\n    (\n        {\n            'prompt': 'foo?',\n            'max_new_tokens': 10,\n            'grammar': {\n                'type': 'regex',\n                'value': '([0-9]{3})',\n            },\n        },\n        \"123\"\n    ),\n]\n\n# If the TGI_SERVER_URL environment variable is not set, add the mock\n# responses to the mock clients\nif not tgi_server_url:\n    async_tgi_client.add_mock_responses(mock_responses)\n    tgi_client.add_mock_responses(mock_responses)\n\n\n@pytest.fixture\ndef sync_model():\n    return TGI(tgi_client)\n\n\n@pytest.fixture\ndef async_model():\n    return AsyncTGI(async_tgi_client)\n\n\ndef test_tgi_init():\n    model = from_tgi(\n        InferenceClient(\"http://localhost:11434\"),\n    )\n    assert isinstance(model, TGI)\n\n    model = from_tgi(\n        AsyncInferenceClient(\"http://localhost:11434\"),\n    )\n    assert isinstance(model, AsyncTGI)\n\n    with pytest.raises(ValueError, match=\"Unsupported client type\"):\n        from_tgi(\"foo\")\n\n\ndef test_tgi_sync_simple_call(sync_model):\n    result = sync_model(\"Respond with a single word.\", max_new_tokens=10)\n    assert isinstance(result, str)\n\n\ndef test_tgi_sync_streaming(sync_model):\n    result = sync_model.stream(\n        \"Respond with a single word.\",\n        max_new_tokens=10,\n    )\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_tgi_sync_batch(sync_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        sync_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_tgi_sync_json(sync_model):\n    json_string = '{\"type\": \"object\", \"properties\": {\"bar\": {\"type\": \"string\"}}, \"required\": [\"bar\"]}'\n    result = sync_model(\"foo?\", JsonSchema(json_string), max_new_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\ndef test_tgi_sync_regex(sync_model):\n    result = sync_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_new_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\ndef test_tgi_sync_cfg(sync_model):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"TGI does not support CFG-based structured outputs\",\n    ):\n        sync_model(\"foo?\", CFG(YES_NO_GRAMMAR), max_new_tokens=10)\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_simple_call(async_model):\n    result = await async_model(\"Respond with a single word.\", max_new_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_streaming(async_model):\n    result = async_model.stream(\"Respond with a single word.\", max_new_tokens=10)\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_json(async_model):\n    json_string = '{\"type\": \"object\", \"properties\": {\"bar\": {\"type\": \"string\"}}, \"required\": [\"bar\"]}'\n    result = await async_model(\"foo?\", JsonSchema(json_string), max_new_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_regex(async_model):\n    result = await async_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_new_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\n@pytest.mark.asyncio\nasync def test_tgi_async_cfg(async_model):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"TGI does not support CFG-based structured outputs\",\n    ):\n        await async_model(\"foo?\", CFG(YES_NO_GRAMMAR), max_new_tokens=10)\n"
  },
  {
    "path": "tests/models/test_tgi_model_adapter.py",
    "content": "import json\nimport pytest\n\nfrom outlines.models.tgi import TGITypeAdapter\nfrom outlines.types import CFG, JsonSchema\n\n\nCFG_STRING = \"\"\"\n?start: expr\n?expr: NUMBER\n\"\"\"\n\nJSON_SCHEMA_STRING = \"\"\"\n{\n    \"type\": \"object\",\n    \"properties\": {\n        \"answer\": {\"type\": \"number\"}\n    }\n}\n\"\"\"\n\n\n@pytest.fixture\ndef type_adapter():\n    return TGITypeAdapter()\n\n@pytest.fixture\ndef cfg_instance():\n    return CFG(CFG_STRING)\n\n@pytest.fixture\ndef json_schema_instance():\n    return JsonSchema(JSON_SCHEMA_STRING)\n\n@pytest.fixture\ndef json_schema_whitespace_instance():\n    return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern=\"\\n\")\n\n\ndef test_tgi_type_adapter_input_text(type_adapter):\n    message = \"prompt\"\n    assert message == type_adapter.format_input(message)\n\n\ndef test_tgi_type_adapter_input_invalid(type_adapter):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"is not available with TGI\",\n    ):\n        type_adapter.format_input({\"foo\": \"bar\"})\n\n\ndef test_tgi_type_adapter_output_type(\n    type_adapter,\n    json_schema_instance,\n    json_schema_whitespace_instance,\n):\n    assert type_adapter.format_output_type(None) == {}\n    assert type_adapter.format_output_type(json_schema_instance) == {\n        \"grammar\": {\n            \"type\": \"json\",\n            \"value\": json.loads(JSON_SCHEMA_STRING),\n        }\n    }\n    # whitespace_pattern is ignored\n    assert type_adapter.format_output_type(json_schema_whitespace_instance) == {\n        \"grammar\": {\n            \"type\": \"json\",\n            \"value\": json.loads(JSON_SCHEMA_STRING),\n        }\n    }\n    assert type_adapter.format_output_type(int) == {\n        \"grammar\": {\n            \"type\": \"regex\",\n            \"value\": \"([+-]?(0|[1-9][0-9]*))\",\n        }\n    }\n\n\ndef test_tgi_type_adapter_output_type_invalid(\n    type_adapter,\n    cfg_instance,\n):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"TGI does not support CFG-based structured outputs.\",\n    ):\n        type_adapter.format_output_type(cfg_instance)\n"
  },
  {
    "path": "tests/models/test_tokenizer.py",
    "content": "import pytest\n\nfrom outlines.models.tokenizer import Tokenizer, _check_hf_chat_template\n\n\ndef test_tokenizer():\n    with pytest.raises(TypeError, match=\"instantiate abstract\"):\n        Tokenizer()\n\ndef test_check_hf_chat_template():\n    from transformers import AutoTokenizer\n\n    assert _check_hf_chat_template(AutoTokenizer.from_pretrained(\"openai-community/gpt2\")) is False\n    assert _check_hf_chat_template(AutoTokenizer.from_pretrained(\"Qwen/Qwen3-0.6B\")) is True\n"
  },
  {
    "path": "tests/models/test_transformers.py",
    "content": "import re\nfrom enum import Enum\n\nfrom pydantic import BaseModel\nimport pytest\nimport torch\nimport transformers\n\nimport outlines\nfrom outlines.inputs import Chat\nfrom outlines.models.transformers import (\n    Transformers,\n    TransformerTokenizer,\n    TransformersTypeAdapter,\n)\nfrom outlines.types import Regex\n\n\nTEST_MODEL = \"erwanf/gpt2-mini\"\nTEST_MODEL_MAMBA = \"hf-internal-testing/tiny-random-MambaForCausalLM\"\nTEST_MODEL_BART = \"trl-internal-testing/tiny-BartModel\"\n\n\ndef test_transformers_instantiate_invalid():\n    with pytest.raises(ValueError):\n        outlines.from_transformers(\n            transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL),\n            int,\n        )\n\n\ndef test_transformers_instantiate_simple():\n    model = outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL),\n        transformers.AutoTokenizer.from_pretrained(TEST_MODEL),\n    )\n    assert isinstance(model, Transformers)\n    assert isinstance(model.tokenizer, TransformerTokenizer)\n    assert isinstance(model.type_adapter, TransformersTypeAdapter)\n    assert model.tensor_library_name == \"torch\"\n\n\ndef test_transformers_instantiate_mamba():\n    model = outlines.from_transformers(\n        transformers.MambaForCausalLM.from_pretrained(TEST_MODEL_MAMBA),\n        transformers.AutoTokenizer.from_pretrained(TEST_MODEL),\n    )\n    assert isinstance(model, Transformers)\n\n\ndef test_transformers_instantiate_tokenizer_kwargs_dtype():\n    model = outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL),\n        transformers.AutoTokenizer.from_pretrained(\n            TEST_MODEL, additional_special_tokens=[\"<t1>\", \"<t2>\"]\n        ),\n        device_dtype=torch.bfloat16,\n    )\n    assert \"<t1>\" in model.tokenizer.special_tokens\n    assert \"<t2>\" in model.tokenizer.special_tokens\n    assert model.device_dtype == torch.bfloat16\n\n\n@pytest.fixture\ndef model():\n    model = outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL),\n        transformers.AutoTokenizer.from_pretrained(TEST_MODEL),\n    )\n    chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'\n    model.type_adapter.tokenizer.chat_template = chat_template\n\n    return model\n\n\n@pytest.fixture\ndef model_bart():\n    model = outlines.from_transformers(\n        transformers.BartForConditionalGeneration.from_pretrained(TEST_MODEL_BART),\n        transformers.BartTokenizer.from_pretrained(TEST_MODEL_BART),\n    )\n    return model\n\n\ndef test_transformers_simple(model):\n    result = model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\ndef test_transformers_call(model, model_bart):\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n    model.device_dtype = torch.bfloat16\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n    result = model_bart(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\ndef test_transformers_chat(model):\n    result = model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n            {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n        ])\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_inference_kwargs(model):\n    result = model(\"Respond with one word. Not more.\", max_new_tokens=100)\n    assert isinstance(result, str)\n\n\ndef test_transformers_invalid_inference_kwargs(model):\n    with pytest.raises(ValueError):\n        model(\"Respond with one word. Not more.\", foo=\"bar\")\n\n\ndef test_transformers_regex(model):\n    result = model(\"Give a number between 0 and 9.\", Regex(r\"[0-9]\"))\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]\", result)\n\n\ndef test_transformers_json(model):\n    class Character(BaseModel):\n        name: str\n\n    result = model(\"Create a character with a name.\", Character)\n    assert \"name\" in result\n\n\ndef test_transformers_choice(model):\n    class Foo(Enum):\n        cat = \"cat\"\n        dog = \"dog\"\n\n    result = model(\"Cat or dog?\", Foo)\n    assert result in [\"cat\", \"dog\"]\n\n\ndef test_transformers_multiple_samples(model):\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n    result = model(\n        \"Respond with one word. Not more.\", num_return_sequences=2, do_sample=True\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n\ndef test_transformers_batch(model):\n    result = model.batch(\n        [\"Respond with one word. Not more.\", \"Respond with one word. Not more.\"]\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n    result = model.batch(\n        [\"Respond with one word. Not more.\", \"Respond with one word. Not more.\"],\n        num_return_sequences=2,\n        do_sample=True,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    for item in result:\n        assert isinstance(item, list)\n        assert len(item) == 2\n\n    result = model.batch(\n        [\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            ]),\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            ]),\n        ],\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n\ndef test_transformers_multiple_samples_constrained(model):\n    class Foo(Enum):\n        cat = \"cat\"\n        dog = \"dog\"\n\n    result = model(\"Cat or dog?\", Foo, num_return_sequences=2, do_sample=True)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert result[0] in [\"cat\", \"dog\"]\n    assert result[1] in [\"cat\", \"dog\"]\n\n\ndef test_transformers_batch_constrained(model):\n    class Foo(Enum):\n        cat = \"cat\"\n        dog = \"dog\"\n\n    result = model.batch(\n        [\"Cat or dog?\", \"Cat or dog?\"],\n        Foo,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert result[0] in [\"cat\", \"dog\"]\n    assert result[1] in [\"cat\", \"dog\"]\n\n    result = model.batch(\n        [\"Cat or dog?\", \"Cat or dog?\"],\n        Foo,\n        num_return_sequences=2,\n        do_sample=True,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    for item in result:\n        assert isinstance(item, list)\n        assert len(item) == 2\n        assert item[0] in [\"cat\", \"dog\"]\n        assert item[1] in [\"cat\", \"dog\"]\n\n\ndef test_transformers_streaming(model):\n    with pytest.raises(NotImplementedError, match=\"Streaming is not implemented\"):\n        model.stream(\"Respond with one word. Not more.\")\n\n\n@pytest.mark.parametrize(\n    \"model_name\",\n    [\n        TEST_MODEL,\n        \"HuggingFaceTB/SmolLM2-135M\"\n    ],\n)\ndef test_transformers_parametrized_smoke(model_name):\n    \"\"\"\n    Smoke test to ensure basic constrained generation works across\n    different tokenizers.\n    \"\"\"\n    hf_model = transformers.AutoModelForCausalLM.from_pretrained(model_name)\n    hf_model.eval()\n\n    hf_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\n\n    model = outlines.from_transformers(hf_model, hf_tokenizer)\n\n    prompt = \"Is 1+1=2? Answer Yes or No:\"\n    constraint = Regex(r\"\\s*(Yes|No)\")\n\n    out = model(\n        prompt,\n        constraint,\n        max_new_tokens=5,\n        do_sample=False,\n    )\n\n    assert out.strip() in {\"Yes\", \"No\"}\n"
  },
  {
    "path": "tests/models/test_transformers_multimodal.py",
    "content": "# we only test vision models here as audio models are too heavy to run on CI\n\nimport io\nimport re\nimport torch\nfrom enum import Enum\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\nfrom transformers import (\n    LlavaForConditionalGeneration,\n    AutoProcessor,\n)\n\nimport outlines\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.transformers import (\n    TransformersMultiModal,\n    TransformerTokenizer,\n    TransformersMultiModalTypeAdapter,\n)\nfrom outlines.types import Regex\n\nTEST_MODEL = \"trl-internal-testing/tiny-LlavaForConditionalGeneration\"\n\n\n@pytest.fixture\ndef image():\n    width, height = 256, 256\n    blue_background = (0, 0, 255)\n    image = PILImage.new(\"RGB\", (width, height), blue_background)\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n@pytest.fixture\ndef model():\n    model = outlines.from_transformers(\n        LlavaForConditionalGeneration.from_pretrained(TEST_MODEL),\n        AutoProcessor.from_pretrained(TEST_MODEL),\n    )\n\n    return model\n\n\ndef test_transformers_multimodal_instantiate():\n    model = outlines.from_transformers(\n        LlavaForConditionalGeneration.from_pretrained(TEST_MODEL),\n        AutoProcessor.from_pretrained(TEST_MODEL),\n        device_dtype=torch.bfloat16,\n    )\n    assert isinstance(model, TransformersMultiModal)\n    assert isinstance(model.tokenizer, TransformerTokenizer)\n    assert isinstance(model.type_adapter, TransformersMultiModalTypeAdapter)\n    assert model.tensor_library_name == \"torch\"\n    assert model.device_dtype == torch.bfloat16\n\n\ndef test_transformers_multimodal_simple(model, image):\n    result = model.generate(\n        [\"<image>Describe this image in one sentence:\", Image(image)],\n        None,\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_multimodal_call(model, image):\n    result = model(\n        [\"<image>Describe this image in one sentence:\", Image(image)],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n    model.device_dtype = torch.bfloat16\n    result = model(\n        [\"<image>Describe this image in one sentence:\", Image(image)],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_multimodal_wrong_number_image(model, image):\n    with pytest.raises(ValueError):\n        model(\n            [\n                \"<image>Describe this image in one sentence:\",\n                Image(image),\n                Image(image),\n            ],\n        )\n\n\ndef test_transformers_multimodal_wrong_input_type(model):\n    with pytest.raises(TypeError):\n        model.generate(\"invalid input\", None)\n\n\ndef test_transformers_multimodal_chat(model, image):\n    result = model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    \"Describe this image in one sentence:\",\n                    Image(image),\n                ],\n            },\n        ]),\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n    result = model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\"type\": \"text\", \"text\": \"Describe this image in one sentence:\"},\n                    {\"type\": \"image\", \"image\": Image(image)},\n                ],\n            },\n        ]),\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_inference_kwargs(model, image):\n    result = model(\n        [\"<image>Describe this image in one sentence:\", Image(image)],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_invalid_inference_kwargs(model, image):\n    with pytest.raises(ValueError):\n        model(\n            [\n                \"<image>Describe this image in one sentence:\",\n                Image(image),\n            ],\n            foo=\"bar\",\n        )\n\n\ndef test_transformers_several_image(model, image):\n    result = model(\n        [\n            \"<image><image>Describe this image in one sentence:\",\n            Image(image),\n            Image(image),\n        ],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, str)\n\n\ndef test_transformers_multimodal_json(model, image):\n    class Foo(BaseModel):\n        name: str\n\n    result = model(\n        [\"<image>Give the name of the color.\", Image(image)],\n        Foo,\n        max_new_tokens=10,\n    )\n    assert \"name\" in result\n\n\ndef test_transformers_multimodal_regex(model, image):\n    result = model(\n        [\"<image>How warn is the color from 0 to 9?\", Image(image)],\n        Regex(r\"[0-9]\")\n    )\n\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]\", result)\n\n\ndef test_transformers_multimodal_choice(model, image):\n    class Foo(Enum):\n        white = \"white\"\n        blue = \"blue\"\n\n    result = model(\n        [\"<image>Is it a white or a blue?\", Image(image)],\n        Foo,\n    )\n\n    assert isinstance(result, str)\n    assert result in [\"white\", \"blue\"]\n\n\ndef test_transformers_multimodal_multiple_samples(model, image):\n    result = model(\n        [\"<image>Describe this image in one sentence.\", Image(image)],\n        num_return_sequences=2,\n        num_beams=2,\n        max_new_tokens=2,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n\ndef test_transformers_multimodal_batch(model, image):\n    result = model.batch(\n        [\n            [\"<image>Describe this image in one sentence.\", Image(image)],\n            [\"<image>Describe this image in one sentence.\", Image(image)],\n        ],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n    result = model.batch(\n        [\n            [\"<image>Describe this image in one sentence.<image>\", Image(image), Image(image)],\n            [\"<image>Describe this image in one sentence.<image>\", Image(image), Image(image)],\n        ],\n        num_return_sequences=2,\n        num_beams=2,\n        max_new_tokens=2,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    for item in result:\n        assert isinstance(item, list)\n        assert len(item) == 2\n\n    result = model.batch(\n        [\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        \"Describe this image in one sentence:\",\n                        Image(image),\n                    ],\n                },\n            ]),\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        \"Describe this image in one sentence:\",\n                        Image(image),\n                    ],\n                },\n            ]),\n        ],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n    result = model.batch(\n        [\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"Describe this image in one sentence:\"},\n                        {\"type\": \"image\", \"image\": Image(image)},\n                    ],\n                },\n            ]),\n            Chat(messages=[\n                {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"Describe this image in one sentence:\"},\n                        {\"type\": \"image\", \"image\": Image(image)},\n                    ],\n                },\n            ]),\n        ],\n        max_new_tokens=2,\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n"
  },
  {
    "path": "tests/models/test_transformers_multimodal_type_adapter.py",
    "content": "import pytest\n\nfrom PIL import Image as PILImage\nfrom outlines_core import Index, Vocabulary\nfrom transformers import (\n    AutoProcessor,\n    LogitsProcessorList,\n)\n\nfrom outlines.inputs import Audio, Chat, Image, Video\nfrom outlines.models.transformers import TransformersMultiModalTypeAdapter\nfrom outlines.backends.outlines_core import OutlinesCoreLogitsProcessor\n\n\nMODEL_NAME = \"trl-internal-testing/tiny-LlavaForConditionalGeneration\"\n\n\n@pytest.fixture\ndef adapter():\n    processor = AutoProcessor.from_pretrained(MODEL_NAME)\n    tokenizer = processor.tokenizer\n    type_adapter = TransformersMultiModalTypeAdapter(tokenizer=tokenizer)\n\n    return type_adapter\n\n\n@pytest.fixture\ndef logits_processor():\n    vocabulary = Vocabulary.from_pretrained(\"openai-community/gpt2\")\n    index = Index(r\"[0-9]{3}\", vocabulary)\n    return OutlinesCoreLogitsProcessor(index, \"torch\")\n\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n    image.format = \"PNG\"\n\n    return image\n\n\n@pytest.fixture\ndef video():\n    # Simple mock video data\n    return \"mock_video_data\"\n\n\n@pytest.fixture\ndef audio():\n    # Simple mock audio data\n    return \"mock_audio_data\"\n\n\ndef test_transformers_multimodal_type_adapter_format_input(adapter, image):\n    with pytest.raises(TypeError):\n        adapter.format_input(\"hello\")\n\n    with pytest.raises(TypeError):\n        adapter.format_input({\"foo\": \"bar\"})\n\n    with pytest.raises(ValueError, match=\"All assets must be of the same type\"):\n        adapter.format_input([\"foo\", Image(image), Video(\"\")])\n\n    class MockAsset:\n        pass\n\n    with pytest.raises(ValueError, match=\"Unsupported asset type\"):\n        adapter.format_input([\"foo\", MockAsset()])\n\n    image_asset = Image(image)\n    assert adapter.format_input([\"foo\", image_asset]) == {\n        \"text\": \"foo\",\n        \"images\": [image_asset.image],\n    }\n\n    chat_prompt = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"foo\"},\n        {\"role\": \"user\", \"content\": [\"bar\", image_asset]},\n    ])\n    result = adapter.format_input(chat_prompt)\n    assert isinstance(result, dict)\n    assert isinstance(result[\"text\"], str)\n    assert isinstance(result[\"images\"], list)\n    assert len(result[\"images\"]) == 1\n    assert result[\"images\"][0] == image_asset.image\n\n    chat_prompt = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"foo\"},\n        {\"role\": \"user\", \"content\": [{\"type\": \"text\", \"text\": \"bar\"}, {\"type\": \"image\", \"image\": image_asset}]},\n    ])\n    result = adapter.format_input(chat_prompt)\n    assert isinstance(result, dict)\n    assert isinstance(result[\"text\"], str)\n    assert isinstance(result[\"images\"], list)\n    assert len(result[\"images\"]) == 1\n    assert result[\"images\"][0] == image_asset.image\n\n\n\n\ndef test_transformers_multimodal_type_adapter_format_input_empty_assets(adapter):\n    result = adapter.format_input([\"Just text prompt\"])\n    assert result == {\"text\": \"Just text prompt\"}\n\n\ndef test_transformers_multimodal_type_adapter_format_input_chat_invalid_asset_type(adapter, image):\n    class MockAsset:\n        pass\n\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"Hello\"},\n            {\"type\": \"image\", \"image\": MockAsset()}  # Wrong type\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Assets must be of type\"):\n        adapter.format_input(chat_prompt)\n\n\ndef test_transformers_multimodal_type_adapter_format_input_chat_unsupported_content_type(adapter):\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"Hello\"},\n            {\"type\": \"unsupported\", \"data\": \"some_data\"}  # Unsupported type\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Content must be 'text'\"):\n        adapter.format_input(chat_prompt)\n\n\ndef test_transformers_multimodal_type_adapter_format_output_type(\n    adapter, logits_processor\n):\n    formatted = adapter.format_output_type(logits_processor)\n    assert isinstance(formatted, LogitsProcessorList)\n    assert len(formatted) == 1\n    assert formatted[0].index == logits_processor.index\n    assert formatted[0].tensor_library_name == logits_processor.tensor_library_name\n\n    formatted = adapter.format_output_type(None)\n    assert formatted is None\n\n\ndef test_transformers_multimodal_type_adapter_format_input_chat_missing_asset_key(adapter, image):\n    image_asset = Image(image)\n\n    # Test missing 'image' key when type is 'image'\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"What's in this image?\"},\n            {\"type\": \"image\", \"txt\": image_asset}  # Wrong key: 'txt' instead of 'image'\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Item with type 'image' must contain a 'image' key\"):\n        adapter.format_input(chat_prompt)\n\n    # Test missing 'video' key when type is 'video'\n    video_asset = Video(\"dummy_video\")\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"What's in this video?\"},\n            {\"type\": \"video\", \"vid\": video_asset}  # Wrong key: 'vid' instead of 'video'\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Item with type 'video' must contain a 'video' key\"):\n        adapter.format_input(chat_prompt)\n\n\ndef test_transformers_multimodal_type_adapter_format_input_chat_missing_type_key(adapter, image):\n    image_asset = Image(image)\n\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"text\": \"What's in this image?\"},  # Missing 'type' key\n            {\"type\": \"image\", \"image\": image_asset}\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Each item in the content list must be a dictionary with a 'type' key\"):\n        adapter.format_input(chat_prompt)\n\n\ndef test_transformers_multimodal_type_adapter_format_input_invalid_content_type(adapter):\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": 42}  # Invalid content type (integer)\n    ])\n\n    with pytest.raises(ValueError, match=\"Invalid content type\"):\n        adapter.format_input(chat_prompt)\n\n    # Test with another invalid type\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": {\"invalid\": \"dict\"}}  # Invalid content type (dict not in list)\n    ])\n\n    with pytest.raises(ValueError, match=\"Invalid content type\"):\n        adapter.format_input(chat_prompt)\n\n\ndef test_transformers_multimodal_type_adapter_format_asset_for_template(adapter, image, video, audio):\n    # Test Image asset\n    image_asset = Image(image)\n    formatted_image = adapter._format_asset_for_template(image_asset)\n    assert formatted_image == {\"type\": \"image\", \"image\": image_asset}\n\n    # Test Video asset\n    video_asset = Video(video)\n    formatted_video = adapter._format_asset_for_template(video_asset)\n    assert formatted_video == {\"type\": \"video\", \"video\": video_asset}\n\n    # Test Audio asset\n    audio_asset = Audio(audio)\n    formatted_audio = adapter._format_asset_for_template(audio_asset)\n    assert formatted_audio == {\"type\": \"audio\", \"audio\": audio_asset}\n\n\ndef test_transformers_multimodal_type_adapter_format_asset_for_template_invalid_type(adapter):\n    class MockUnsupportedAsset:\n        pass\n\n    # This test requires accessing the private method directly since the error\n    # would normally be caught earlier in the validation chain\n    unsupported_asset = MockUnsupportedAsset()\n\n    with pytest.raises(ValueError, match=\"Assets must be of type `Image`, `Video` or `Audio`\"):\n        adapter._format_asset_for_template(unsupported_asset)\n\n\ndef test_transformers_multimodal_type_adapter_multiple_assets_in_single_item(adapter, image):\n    image_asset = Image(image)\n    video_asset = Video(\"dummy_video\")\n\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"What's in this?\"},\n            {\"type\": \"image\", \"image\": image_asset, \"video\": video_asset}  # Multiple asset types\n        ]}\n    ])\n\n    with pytest.raises(ValueError, match=\"Found item with multiple keys:\"):\n        adapter.format_input(chat_prompt)\n\n\n\ndef test_transformers_multimodal_type_adapter_correct_multiple_assets_usage(adapter, image):\n    image_asset1 = Image(image)\n    image_asset2 = Image(image)\n\n    # Correct way: separate dictionary items for each asset\n    chat_prompt = Chat(messages=[\n        {\"role\": \"user\", \"content\": [\n            {\"type\": \"text\", \"text\": \"What's in these images?\"},\n            {\"type\": \"image\", \"image\": image_asset1},\n            {\"type\": \"image\", \"image\": image_asset2}\n        ]}\n    ])\n\n    result = adapter.format_input(chat_prompt)\n    assert isinstance(result, dict)\n    assert \"text\" in result\n    assert \"images\" in result\n    assert len(result[\"images\"]) == 2\n"
  },
  {
    "path": "tests/models/test_transformers_tokenizer.py",
    "content": "import pytest\n\nimport transformers\n\nfrom outlines.models.transformers import (\n    get_llama_tokenizer_types,\n    TransformerTokenizer,\n)\n\n\nTEST_MODEL = \"erwanf/gpt2-mini\"\nTEST_MODEL_SEQ2SEQ = \"hf-internal-testing/tiny-random-t5\"\n\n\n@pytest.fixture\ndef tokenizer():\n    return transformers.AutoTokenizer.from_pretrained(TEST_MODEL)\n\n\n@pytest.fixture\ndef tokenizer_no_pad_token_id(tokenizer):\n    tokenizer.pad_token_id = None\n    return tokenizer\n\n\n@pytest.fixture\ndef tokenizer_seq2seq():\n    return transformers.AutoTokenizer.from_pretrained(TEST_MODEL_SEQ2SEQ)\n\n\n@pytest.fixture\ndef transformer_tokenizer(tokenizer):\n    return TransformerTokenizer(tokenizer)\n\n\n@pytest.fixture\ndef another_transformer_tokenizer(tokenizer):\n    return TransformerTokenizer(tokenizer)\n\n\n@pytest.fixture\ndef transformer_tokenizer_seq2seq(tokenizer_seq2seq):\n    return TransformerTokenizer(tokenizer_seq2seq)\n\n\ndef test_get_llama_tokenizer_types():\n    tokenizer_types = get_llama_tokenizer_types()\n    assert tokenizer_types[0] is transformers.models.llama.LlamaTokenizer\n    assert tokenizer_types[1] is transformers.models.llama.LlamaTokenizerFast\n    assert tokenizer_types[2] is transformers.models.code_llama.CodeLlamaTokenizer\n    assert tokenizer_types[3] is transformers.models.code_llama.CodeLlamaTokenizerFast\n\n\ndef test_transformer_tokenizer_init(\n    tokenizer,\n    tokenizer_no_pad_token_id\n):\n    # tokenizer with a pad_token_id\n    transformer_tokenizer = TransformerTokenizer(tokenizer)\n    assert transformer_tokenizer.tokenizer == tokenizer\n    assert transformer_tokenizer.eos_token_id == tokenizer.eos_token_id\n    assert transformer_tokenizer.pad_token_id == tokenizer.pad_token_id\n    assert transformer_tokenizer.special_tokens == set(tokenizer.all_special_tokens)\n    assert transformer_tokenizer.vocabulary == tokenizer.get_vocab()\n\n    # tokenizer with no pad_token_id\n    transformer_tokenizer_no_pad_token_id = TransformerTokenizer(tokenizer_no_pad_token_id)\n    assert transformer_tokenizer_no_pad_token_id.tokenizer == tokenizer_no_pad_token_id\n    assert transformer_tokenizer_no_pad_token_id.eos_token_id == tokenizer_no_pad_token_id.eos_token_id\n    assert transformer_tokenizer_no_pad_token_id.pad_token_id == tokenizer_no_pad_token_id.eos_token_id\n    assert transformer_tokenizer_no_pad_token_id.special_tokens == set(tokenizer_no_pad_token_id.all_special_tokens)\n    assert transformer_tokenizer_no_pad_token_id.vocabulary == tokenizer_no_pad_token_id.get_vocab()\n\n\ndef test_transformer_tokenizer_encode(transformer_tokenizer):\n    input_ids, attention_mask = transformer_tokenizer.encode(\"Hello, world!\")\n    assert input_ids is not None\n    assert attention_mask is not None\n    assert input_ids.shape == attention_mask.shape\n\n\ndef test_transformer_tokenizer_decode(transformer_tokenizer):\n    input_ids, _ = transformer_tokenizer.encode(\"Hello, world!\")\n    decoded_text = transformer_tokenizer.decode(input_ids)\n    assert isinstance(decoded_text, list)\n    assert \"Hello, world!\" in decoded_text[0]\n\n\ndef test_transformer_tokenizer_convert_token_to_string(transformer_tokenizer):\n    # regular\n    transformer_tokenizer.is_llama = False\n    token = transformer_tokenizer.tokenizer.tokenize(\"Hello\")[0]\n    string = transformer_tokenizer.convert_token_to_string(token)\n    assert isinstance(string, str)\n    assert \"Hello\" in string\n\n    # is_llama + <0x20>\n    transformer_tokenizer.is_llama = True\n    string = transformer_tokenizer.convert_token_to_string(\"<0x20>\")\n    assert isinstance(string, str)\n    assert \" \" in string\n\n\ndef test_transformer_tokenizer_eq(\n    transformer_tokenizer,\n    another_transformer_tokenizer,\n    transformer_tokenizer_seq2seq,\n):\n    # different types of object\n    assert transformer_tokenizer.__eq__(1) == NotImplemented\n\n    # regular case\n    assert transformer_tokenizer == another_transformer_tokenizer\n    assert transformer_tokenizer != transformer_tokenizer_seq2seq\n\n    # with model name and kwargs attributes\n    transformer_tokenizer.model_name = \"foo\"\n    transformer_tokenizer.kwargs = {\"foo\": \"bar\"}\n    another_transformer_tokenizer.model_name = \"foo\"\n    another_transformer_tokenizer.kwargs = {\"foo\": \"bar\"}\n    assert transformer_tokenizer == another_transformer_tokenizer\n\n\ndef test_transformer_tokenizer_hash(\n    transformer_tokenizer,\n    another_transformer_tokenizer,\n    transformer_tokenizer_seq2seq,\n):\n    assert isinstance(hash(transformer_tokenizer), int)\n    assert hash(transformer_tokenizer) == hash(another_transformer_tokenizer)\n    assert hash(transformer_tokenizer) != hash(transformer_tokenizer_seq2seq)\n\n\ndef test_transformer_tokenizer_getstate_setstate(\n    transformer_tokenizer,\n    another_transformer_tokenizer,\n):\n    state = transformer_tokenizer.__getstate__()\n    assert \"tokenizer\" in state\n\n    another_transformer_tokenizer.__setstate__(state)\n    assert another_transformer_tokenizer == transformer_tokenizer\n"
  },
  {
    "path": "tests/models/test_transformers_type_adapter.py",
    "content": "import io\nimport pytest\n\nimport transformers\nfrom transformers import LogitsProcessorList\nfrom outlines_core import Index, Vocabulary\nfrom PIL import Image as PILImage\n\nfrom outlines.backends.outlines_core import OutlinesCoreLogitsProcessor\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.transformers import TransformersTypeAdapter\n\n\nMODEL_NAME = \"erwanf/gpt2-mini\"\n\n\n@pytest.fixture\ndef adapter():\n    tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)\n    type_adapter = TransformersTypeAdapter(tokenizer=tokenizer)\n    chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'\n    type_adapter.tokenizer.chat_template = chat_template\n\n    return type_adapter\n\n@pytest.fixture\ndef logits_processor():\n    vocabulary = Vocabulary.from_pretrained(\"openai-community/gpt2\")\n    index = Index(r\"[0-9]{3}\", vocabulary)\n    return OutlinesCoreLogitsProcessor(index, \"torch\")\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\n\ndef test_transformers_type_adapter_format_input(adapter, image):\n    # invalid input\n    with pytest.raises(TypeError, match=\"is not available.\"):\n        adapter.format_input([\"prompt\", Image(image)])\n\n    # string with chat template\n    # The fixture sets a chat template, so it should be formatted\n    adapter.has_chat_template = True\n    assert adapter.format_input(\"Hello, world!\") == \"user: Hello, world!\"\n\n    # string without chat template\n    adapter.has_chat_template = False\n    assert adapter.format_input(\"Hello, world!\") == \"Hello, world!\"\n\n    # chat\n    # Restore chat template for chat test\n    adapter.has_chat_template = True\n    assert isinstance(adapter.format_input(Chat(messages=[\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\"role\": \"user\", \"content\": \"Hello, world!\"},\n        {\"role\": \"assistant\", \"content\": \"Hello, world!\"},\n    ])), str)\n\n\ndef test_transformers_type_adapter_format_output_type(\n    adapter, logits_processor\n):\n    formatted = adapter.format_output_type(logits_processor)\n    assert isinstance(formatted, LogitsProcessorList)\n    assert formatted[0].index == logits_processor.index\n    assert formatted[0].tensor_library_name == logits_processor.tensor_library_name\n\n    formatted = adapter.format_output_type(None)\n    assert formatted is None\n"
  },
  {
    "path": "tests/models/test_utils.py",
    "content": "from outlines.models.utils import set_additional_properties_false_json_schema\n\n\ndef test_set_additional_properties_false_json_schema():\n    # additionalProperties is not set\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"},\n        },\n        \"required\": [\"name\"],\n    }\n    modified_schema = set_additional_properties_false_json_schema(schema)\n    target_schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"},\n        },\n        \"required\": [\"name\"],\n        \"additionalProperties\": False,\n    }\n    assert modified_schema == target_schema\n\n    # additionalProperties is set to False\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"},\n        },\n        \"required\": [\"name\"],\n        \"additionalProperties\": False,\n    }\n    modified_schema = set_additional_properties_false_json_schema(schema)\n    assert modified_schema == schema\n\n    # additionalProperties is set to True\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"},\n        },\n        \"required\": [\"name\"],\n        \"additionalProperties\": True,\n    }\n    modified_schema = set_additional_properties_false_json_schema(schema)\n    assert modified_schema == schema\n"
  },
  {
    "path": "tests/models/test_vllm.py",
    "content": "import io\nimport os\nimport re\nimport warnings\nimport base64\nfrom typing import AsyncGenerator, Generator\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom openai import AsyncOpenAI, OpenAI\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.vllm import VLLM, AsyncVLLM, from_vllm\nfrom outlines.types.dsl import CFG, Regex, JsonSchema\nfrom tests.test_utils.mock_openai_client import MockOpenAIClient, MockAsyncOpenAIClient\n\n\nYES_NO_GRAMMAR = \"\"\"\n?start: answer\n\nanswer: \"yes\" | \"no\"\n\"\"\"\n\n# Image for testing\nwidth, height = 1, 1\nwhite_background = (255, 255, 255)\nimage = PILImage.new(\"RGB\", (width, height), white_background)\nbuffer = io.BytesIO()\nimage.save(buffer, format=\"PNG\")\nbuffer.seek(0)\nimage = PILImage.open(buffer)\nimage_input = Image(image)\n\n\n# If the VLLM_SERVER_URL environment variable is set, use the real vLLM server\n# Otherwise, use the mock server\nvllm_server_url = os.environ.get(\"VLLM_SERVER_URL\")\nvllm_model_name = os.environ.get(\n    \"VLLM_MODEL_NAME\", \"Qwen/Qwen2.5-VL-3B-Instruct\"\n)\nif vllm_server_url:\n    openai_client = OpenAI(base_url=vllm_server_url, api_key=\"foo\")\n    async_openai_client = AsyncOpenAI(base_url=vllm_server_url, api_key=\"foo\")\nelse:\n    warnings.warn(\"No VLLM server URL provided, using mock server\")\n    openai_client = MockOpenAIClient()\n    async_openai_client = MockAsyncOpenAIClient()\n\n\nmock_responses = [\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'model': vllm_model_name,\n        },\n        \"foo\"\n    ),\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'model': vllm_model_name,\n            'stream': True\n        },\n        [\"foo\", \"bar\"]\n    ),\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Respond with a single word.'}\n            ],\n            'n': 2,\n            'model': vllm_model_name,\n        },\n        [\"foo\", \"bar\"]\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': vllm_model_name,\n            'max_tokens': 10,\n            'extra_body': {\n            'guided_json': {\n                'type': 'object',\n                'properties': {\n                    'bar': {'type': 'string'}\n                }\n            },\n            }\n        },\n        '{\"foo\": \"bar\"}'\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': vllm_model_name,\n            'max_tokens': 10,\n            'extra_body': {\n                'guided_regex': '([0-9]{3})',\n            },\n        },\n        \"123\"\n    ),\n    (\n        {\n            'messages': [{'role': \"user\", 'content': 'foo?'}],\n            'model': vllm_model_name,\n            'max_tokens': 10,\n            'extra_body': {\n                'guided_grammar': YES_NO_GRAMMAR,\n            },\n        },\n        \"yes\"\n    ),\n    (\n        {\n            'messages': [\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"hello\"},\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\n                                \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                            },\n                        },\n                    ]\n                }\n            ],\n            'model': vllm_model_name,\n            'max_tokens': 10,\n        },\n        \"foo\"\n    ),\n    (\n        {\n            'messages': [\n                {\"role\": \"system\", \"content\": \"prompt\"},\n                {\n                    \"role\": \"user\",\n                    \"content\": [\n                        {\"type\": \"text\", \"text\": \"hello\"},\n                        {\n                            \"type\": \"image_url\",\n                            \"image_url\": {\n                                \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                            },\n                        },\n                    ],\n                },\n                {\"role\": \"assistant\", \"content\": \"response\"},\n            ],\n            'model': vllm_model_name,\n            'max_tokens': 10,\n        },\n        \"foo\"\n    )\n]\n\n\n# If the VLLM_SERVER_URL environment variable is not set, add the mock\n# responses to the mock clients\nif not vllm_server_url:\n    async_openai_client.add_mock_responses(mock_responses)\n    openai_client.add_mock_responses(mock_responses)\n\n\n@pytest.fixture\ndef sync_model():\n    return VLLM(openai_client, vllm_model_name)\n\n\n@pytest.fixture\ndef sync_model_no_model_name():\n    return VLLM(openai_client)\n\n\n@pytest.fixture\ndef async_model():\n    return AsyncVLLM(async_openai_client, vllm_model_name)\n\n\n@pytest.fixture\ndef async_model_no_model_name():\n    return AsyncVLLM(async_openai_client)\n\n\ndef test_vllm_init():\n    # We do not rely on the mock server here because we need an object\n    # of type OpenAI and AsyncOpenAI to test the init function.\n    openai_client = OpenAI(base_url=\"http://localhost:11434\", api_key=\"foo\")\n    async_openai_client = AsyncOpenAI(base_url=\"http://localhost:11434\", api_key=\"foo\")\n\n    # Sync with model name\n    model = from_vllm(openai_client, vllm_model_name)\n    assert isinstance(model, VLLM)\n    assert model.client == openai_client\n    assert model.model_name == vllm_model_name\n\n    # Sync without model name\n    model = from_vllm(openai_client)\n    assert isinstance(model, VLLM)\n    assert model.client == openai_client\n    assert model.model_name is None\n\n    # Async with model name\n    model = from_vllm(async_openai_client, vllm_model_name)\n    assert isinstance(model, AsyncVLLM)\n    assert model.client == async_openai_client\n    assert model.model_name == vllm_model_name\n\n    # Async without model name\n    model = from_vllm(async_openai_client)\n    assert isinstance(model, AsyncVLLM)\n    assert model.client == async_openai_client\n    assert model.model_name is None\n\n    with pytest.raises(ValueError, match=\"Unsupported client type\"):\n        from_vllm(\"foo\")\n\n\ndef test_vllm_sync_simple_call(sync_model):\n    result = sync_model(\"Respond with a single word.\",)\n    assert isinstance(result, str)\n\n\ndef test_vllm_sync_streaming(sync_model_no_model_name):\n    result = sync_model_no_model_name.stream(\n        \"Respond with a single word.\",\n        model=vllm_model_name,\n    )\n    assert isinstance(result, Generator)\n    assert isinstance(next(result), str)\n\n\ndef test_vllm_sync_batch(sync_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        sync_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\ndef test_vllm_sync_vision(sync_model):\n    result = sync_model([\"hello\", image_input], max_tokens=10)\n    assert isinstance(result, str)\n\n\ndef test_vllm_sync_vision_chat(sync_model):\n    result = sync_model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"prompt\"},\n            {\"role\": \"user\", \"content\": [\n                \"hello\",\n                image_input,\n            ]},\n            {\"role\": \"assistant\", \"content\": \"response\"},\n        ]),\n        max_tokens=10,\n    )\n    assert isinstance(result, str)\n\n\ndef test_vllm_sync_multiple_samples(sync_model):\n    result = sync_model(\"Respond with a single word.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\ndef test_vllm_sync_json(sync_model):\n    json_string = '{\"type\": \"object\", \"properties\": {\"bar\": {\"type\": \"string\"}}}'\n    result = sync_model(\"foo?\", JsonSchema(json_string), max_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\ndef test_vllm_sync_regex(sync_model):\n    result = sync_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\ndef test_vllm_sync_cfg(sync_model):\n    result = sync_model(\"foo?\", CFG(YES_NO_GRAMMAR), max_tokens=10)\n    assert isinstance(result, str)\n    assert result in [\"yes\", \"no\"]\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_simple_call(async_model):\n    result = await async_model(\"Respond with a single word.\",)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_streaming(async_model_no_model_name):\n    result = async_model_no_model_name.stream(\n        \"Respond with a single word.\",\n        model=vllm_model_name,\n    )\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_batch(async_model):\n    with pytest.raises(NotImplementedError, match=\"does not support\"):\n        await async_model.batch(\n            [\"Respond with one word.\", \"Respond with one word.\"],\n        )\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_vision(async_model):\n    result = await async_model([\"hello\", image_input], max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_vision_chat(async_model):\n    result = await async_model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"prompt\"},\n            {\"role\": \"user\", \"content\": [\n                \"hello\",\n                image_input,\n            ]},\n            {\"role\": \"assistant\", \"content\": \"response\"},\n        ]),\n        max_tokens=10,\n    )\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_multiple_samples(async_model):\n    result = await async_model(\"Respond with a single word.\", n=2)\n    assert isinstance(result, list)\n    assert len(result) == 2\n    assert isinstance(result[0], str)\n    assert isinstance(result[1], str)\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_json(async_model):\n    json_string = '{\"type\": \"object\", \"properties\": {\"bar\": {\"type\": \"string\"}}}'\n    result = await async_model(\"foo?\", JsonSchema(json_string), max_tokens=10)\n    assert isinstance(result, str)\n    assert \"bar\" in result\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_regex(async_model):\n    result = await async_model(\"foo?\", Regex(r\"[0-9]{3}\"), max_tokens=10)\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]{3}\", result)\n\n\n@pytest.mark.asyncio\nasync def test_vllm_async_cfg(async_model):\n    result = await async_model(\"foo?\", CFG(YES_NO_GRAMMAR), max_tokens=10)\n    assert isinstance(result, str)\n    assert result in [\"yes\", \"no\"]\n"
  },
  {
    "path": "tests/models/test_vllm_offline.py",
    "content": "import io\nimport re\nfrom enum import Enum\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom pydantic import BaseModel\n\ntry:\n    from vllm import LLM, SamplingParams\n    HAS_VLLM = True\nexcept ImportError:\n    HAS_VLLM = False\n\nimport outlines\nfrom outlines.inputs import Chat\nfrom outlines.models.vllm_offline import (\n    VLLMOffline,\n    VLLMOfflineTypeAdapter,\n    from_vllm_offline\n)\nfrom outlines.types import Regex\n\n\nTEST_MODEL = \"microsoft/Phi-3-mini-4k-instruct\"\n\npytestmark = pytest.mark.skipif(\n    not HAS_VLLM,\n    reason=\"vLLM models can only be run on GPU.\"\n)\n\n@pytest.fixture(scope=\"session\")\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_vllm_model_initialization():\n    model = from_vllm_offline(LLM(TEST_MODEL))\n    assert isinstance(model, VLLMOffline)\n    assert isinstance(model.model, LLM)\n    assert isinstance(model.type_adapter, VLLMOfflineTypeAdapter)\n\n\n@pytest.fixture(scope=\"session\")\ndef model(tmp_path_factory):\n    model = outlines.from_vllm_offline(LLM(TEST_MODEL))\n    return model\n\n\ndef test_vllm_simple(model):\n    result = model.generate(\"Respond with one word. Not more.\", None)\n    assert isinstance(result, str)\n\n\ndef test_vllm_call(model):\n    result = model(\"Respond with one word. Not more.\")\n    assert isinstance(result, str)\n\n\ndef test_vllm_inference_kwargs(model):\n    result = model(\n        \"Write a short story about a cat.\",\n        sampling_params=SamplingParams(max_tokens=2),\n        use_tqdm=True\n    )\n    assert isinstance(result, str)\n    assert len(result) <= 20\n\n\ndef test_vllm_chat(model):\n    result = model(\n        Chat(messages=[\n            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n            {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            {\"role\": \"assistant\", \"content\": \"Response: \"},\n        ]),\n        sampling_params=SamplingParams(max_tokens=2),\n    )\n    assert isinstance(result, str)\n\n\ndef test_vllm_invalid_inference_kwargs(model):\n    with pytest.raises(TypeError):\n        model(\"Respond with one word. Not more.\", foo=\"bar\")\n\n\ndef test_vllm_regex(model):\n    result = model(\"Give a number between 0 and 9.\", Regex(r\"[0-9]\"))\n    assert isinstance(result, str)\n    assert re.match(r\"[0-9]\", result)\n\n\ndef test_vllm_json(model):\n    class Character(BaseModel):\n        name: str\n\n    result = model(\"Create a character with a name.\", Character)\n    assert \"name\" in result\n\n\ndef test_vllm_choice(model):\n    class Foo(Enum):\n        cat = \"cat\"\n        dog = \"dog\"\n\n    result = model(\"Cat or dog?\", Foo)\n    assert result in [\"cat\", \"dog\"]\n\n\ndef test_vllm_multiple_samples(model):\n    result = model(\n        \"Respond with one word. Not more.\",\n        sampling_params=SamplingParams(n=2)\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n\ndef test_vllm_batch(model):\n    result = model.batch(\n        [\"Respond with one word. Not more.\", \"Respond with one word. Not more.\"]\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n\n    result = model.batch(\n        [\"Respond with one word. Not more.\", \"Respond with one word. Not more.\"],\n        sampling_params=SamplingParams(n=2)\n    )\n    assert isinstance(result, list)\n    assert len(result) == 2\n    for item in result:\n        assert isinstance(item, list)\n        assert len(item) == 2\n\n    with pytest.raises(TypeError, match=\"Batch generation is not available\"):\n        model.batch(\n            [\n                Chat(messages=[\n                    {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n                ]),\n            ]\n        )\n\ndef test_vllm_streaming(model):\n    with pytest.raises(\n        NotImplementedError,\n        match=\"Streaming is not available\"\n    ):\n        model.stream(\"Respond with one word. Not more.\")\n"
  },
  {
    "path": "tests/models/test_vllm_offline_type_adapter.py",
    "content": "import io\nimport json\n\nimport pytest\nfrom PIL import Image as PILImage\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.vllm_offline import VLLMOfflineTypeAdapter\nfrom outlines.types import CFG, JsonSchema, Regex\n\n\nCFG_STRING = \"\"\"\n?start: expr\n?expr: NUMBER\n\"\"\"\n\nJSON_SCHEMA_STRING = \"\"\"\n{\n    \"type\": \"object\",\n    \"properties\": {\n        \"answer\": {\"type\": \"number\"}\n    }\n}\n\"\"\"\n\n\n@pytest.fixture\ndef type_adapter():\n    return VLLMOfflineTypeAdapter()\n\n@pytest.fixture\ndef cfg_instance():\n    return CFG(CFG_STRING)\n\n@pytest.fixture\ndef json_schema_instance():\n    return JsonSchema(JSON_SCHEMA_STRING)\n\n@pytest.fixture\ndef json_schema_whitespace_instance():\n    return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern=\"\\n\")\n\n@pytest.fixture\ndef regex_instance():\n    return Regex(r\"[0-9]+\")\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_vllm_offline_type_adapter_input_text(type_adapter):\n    message = \"prompt\"\n    result = type_adapter.format_input(message)\n    assert result == message\n\n\ndef test_vllm_offline_type_adapter_input_text_with_template():\n    adapter = VLLMOfflineTypeAdapter(has_chat_template=True)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == [{\"role\": \"user\", \"content\": \"prompt\"}]\n\n\ndef test_vllm_offline_type_adapter_input_text_without_template():\n    adapter = VLLMOfflineTypeAdapter(has_chat_template=False)\n    message = \"prompt\"\n    result = adapter.format_input(message)\n\n    assert result == \"prompt\"\n\n\ndef test_vllm_offline_type_adapter_input_chat(type_adapter):\n    model_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": \"hello\"},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = type_adapter.format_input(model_input)\n    assert result == [\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": \"hello\"},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ]\n\n\ndef test_vllm_offline_type_adapter_input_invalid(type_adapter, image):\n    with pytest.raises(TypeError, match=\"is not available\"):\n        _ = type_adapter.format_input([\"Hello\", Image(image)])\n\n    with pytest.raises(ValueError, match=\"Assets are not supported\"):\n        _ = type_adapter.format_input(Chat(messages=[\n            {\"role\": \"user\", \"content\": [\n                \"Hello\",\n                Image(image),\n            ]},\n        ]))\n\n\ndef test_vllm_offline_type_adapter_output_type(\n    type_adapter,\n    cfg_instance,\n    json_schema_instance,\n    json_schema_whitespace_instance,\n    regex_instance,\n):\n    assert type_adapter.format_output_type(None) == {}\n    assert type_adapter.format_output_type(cfg_instance) == {\n        \"grammar\": CFG_STRING\n    }\n    assert type_adapter.format_output_type(json_schema_instance) == {\n        \"json\": json.loads(JSON_SCHEMA_STRING)\n    }\n    assert type_adapter.format_output_type(json_schema_whitespace_instance) == {\n        \"json\": json.loads(JSON_SCHEMA_STRING),\n        \"whitespace_pattern\": \"\\n\"\n    }\n    assert type_adapter.format_output_type(regex_instance) == {\n        \"regex\": \"([0-9]+)\"\n    }\n"
  },
  {
    "path": "tests/models/test_vllm_type_adapter.py",
    "content": "import io\nimport json\nimport pytest\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\n\nfrom outlines.inputs import Chat, Image\nfrom outlines.models.vllm import VLLMTypeAdapter\nfrom outlines.types import CFG, JsonSchema\n\n\n\nCFG_STRING = \"\"\"\n?start: expr\n?expr: NUMBER\n\"\"\"\n\nJSON_SCHEMA_STRING = \"\"\"\n{\n    \"type\": \"object\",\n    \"properties\": {\n        \"answer\": {\"type\": \"number\"}\n    }\n}\n\"\"\"\n\n\n@pytest.fixture\ndef type_adapter():\n    return VLLMTypeAdapter()\n\n@pytest.fixture\ndef cfg_instance():\n    return CFG(CFG_STRING)\n\n@pytest.fixture\ndef json_schema_instance():\n    return JsonSchema(JSON_SCHEMA_STRING)\n\n@pytest.fixture\ndef json_schema_whitespace_instance():\n    return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern=\"\\n\")\n\n@pytest.fixture\ndef image():\n    width, height = 1, 1\n    white_background = (255, 255, 255)\n    image = PILImage.new(\"RGB\", (width, height), white_background)\n\n    # Save to an in-memory bytes buffer and read as png\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n    buffer.seek(0)\n    image = PILImage.open(buffer)\n\n    return image\n\n\ndef test_vllm_type_adapter_input_text(type_adapter):\n    message = \"prompt\"\n    result = type_adapter.format_input(message)\n    assert result == [{\"role\": \"user\", \"content\": message}]\n\n\ndef test_vllm_type_adapter_input_vision(type_adapter, image):\n    image_input = Image(image)\n    result = type_adapter.format_input([\"hello\", image_input])\n    assert result == [\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"hello\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ],\n        }\n    ]\n\n\ndef test_vllm_type_adapter_input_chat(type_adapter, image):\n    image_input = Image(image)\n    model_input = Chat(messages=[\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\"role\": \"user\", \"content\": [\n            \"hello\",\n            image_input,\n        ]},\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ])\n    result = type_adapter.format_input(model_input)\n    assert result == [\n        {\"role\": \"system\", \"content\": \"prompt\"},\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": \"hello\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": f\"data:image/png;base64,{image_input.image_str}\"\n                    },\n                },\n            ],\n        },\n        {\"role\": \"assistant\", \"content\": \"response\"},\n    ]\n\n\ndef test_vllm_type_adapter_input_invalid(type_adapter):\n    @dataclass\n    class Audio:\n        file: str\n\n    prompt = Audio(\n        \"file\",\n    )\n    with pytest.raises(TypeError, match=\"The input type\"):\n        _ = type_adapter.format_input(prompt)\n\n\ndef test_vllm_type_adapter_output_type(\n    type_adapter,\n    cfg_instance,\n    json_schema_instance,\n    json_schema_whitespace_instance,\n):\n    assert type_adapter.format_output_type(None) == {}\n    assert type_adapter.format_output_type(cfg_instance) == {\n        \"guided_grammar\": CFG_STRING\n    }\n    assert type_adapter.format_output_type(json_schema_instance) == {\n        \"guided_json\": json.loads(JSON_SCHEMA_STRING)\n    }\n    assert type_adapter.format_output_type(json_schema_whitespace_instance) == {\n        \"guided_json\": json.loads(JSON_SCHEMA_STRING),\n        \"whitespace_pattern\": \"\\n\"\n    }\n    assert type_adapter.format_output_type(int) == {\n        \"guided_regex\": \"([+-]?(0|[1-9][0-9]*))\"\n    }\n"
  },
  {
    "path": "tests/processors/test_base_processor.py",
    "content": "from typing import List\n\nimport numpy as np\nimport pytest\nimport torch\n\nfrom outlines.processors.base_logits_processor import OutlinesLogitsProcessor\n\ntry:\n    import mlx.core as mx\n    HAS_MLX = True\nexcept ImportError:\n    HAS_MLX = False\n\n\nlibraries = [\"numpy\", \"torch\"]\nif HAS_MLX:\n    libraries.append(\"mlx\")\n\n# we check the accepted shapes:\n# - both 1D\n# - both 2D\n# - input_ids 1D and logits 2D with a single sequence\n# we raise an error if the shapes are not accepted:\n# - input_ids 2D and logits 1D\n# - input_ids 1D and logits 2D, but with multiple sequences\n# - both 3D\narrays = {\n    \"numpy\": [\n        (np.array([1, 2], dtype=np.float32), np.array([1, 2], dtype=np.int32), None),\n        (np.array([[1, 2], [3, 4]], dtype=np.float32), np.array([[1, 2], [3, 4]], dtype=np.int32), None),\n        (np.array([1, 2], dtype=np.float32), np.array([[1, 2]], dtype=np.int32), None),\n        (np.array([[1, 2]], dtype=np.float32), np.array([1, 2], dtype=np.int32), AssertionError),\n        (np.array([1, 2], dtype=np.float32), np.array([[1, 2], [3, 4]], dtype=np.int32), AssertionError),\n        (np.array([[[1, 2]]], dtype=np.float32), np.array([[[1, 2]]], dtype=np.int32), ValueError),\n    ],\n    \"torch\": [\n        (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([1, 2], dtype=torch.int32), None),\n        (torch.tensor([[1, 2], [3, 4]], dtype=torch.float32), torch.tensor([[1, 2], [3, 4]], dtype=torch.int32), None),\n        (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([[1, 2]], dtype=torch.int32), None),\n        (torch.tensor([[1, 2]], dtype=torch.float32), torch.tensor([1, 2], dtype=torch.int32), AssertionError),\n        (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([[1, 2], [3, 4]], dtype=torch.int32), AssertionError),\n        (torch.tensor([[[1, 2]]], dtype=torch.float32), torch.tensor([[[1, 2]]], dtype=torch.int32), ValueError),\n    ],\n}\nif HAS_MLX:\n    arrays[\"mlx\"] = [\n        (mx.array([1, 2], dtype=mx.float32), mx.array([1, 2], dtype=mx.int32), None),\n        (mx.array([[1, 2], [3, 4]], dtype=mx.float32), mx.array([[1, 2], [3, 4]], dtype=mx.int32), None),\n        (mx.array([1, 2], dtype=mx.float32), mx.array([[1, 2]], dtype=mx.int32), None),\n        (mx.array([[1, 2]], dtype=mx.float32), mx.array([1, 2], dtype=mx.int32), AssertionError),\n        (mx.array([1, 2], dtype=mx.float32), mx.array([[1, 2], [3, 4]], dtype=mx.int32), AssertionError),\n        (mx.array([[[1, 2]]], dtype=mx.float32), mx.array([[[1, 2]]], dtype=mx.int32), ValueError),\n    ]\n\nclass MockLogitsProcessor(OutlinesLogitsProcessor):\n    def process_logits(self, input_ids, logits):\n        # check that input_ids and logits received are 2D tensors\n        assert len(self.tensor_adapter.shape(input_ids)) == 2\n        assert len(self.tensor_adapter.shape(logits)) == 2\n        return logits\n\n\n@pytest.mark.parametrize(\"library\", libraries)\ndef test_base_logits_processor_init(library):\n    processor = MockLogitsProcessor(library)\n    assert processor.tensor_adapter is not None\n    with pytest.raises(NotImplementedError):\n        processor = MockLogitsProcessor(\"foo\")\n        processor.reset()\n\n\n@pytest.mark.parametrize(\"library\", libraries)\ndef test_base_logits_processor_call(library):\n    processor = MockLogitsProcessor(library)\n    input_values = arrays[library]\n    for input_value in input_values:\n        input_ids, logits, expected_error = input_value\n        if expected_error is not None:\n            with pytest.raises(expected_error):\n                processor(input_ids, logits)\n        else:\n            original_shape = processor.tensor_adapter.shape(logits)\n            processed_logits = processor(input_ids, logits)\n            # we check that the shape of logits is preserved\n            assert processor.tensor_adapter.shape(processed_logits) == original_shape\n\n\n@pytest.mark.parametrize(\"library\", libraries)\ndef test_base_logits_processor_init_library_name(library):\n    processor = MockLogitsProcessor(library)\n    assert processor.tensor_adapter is not None\n    with pytest.raises(NotImplementedError):\n        processor = MockLogitsProcessor(\"foo\")\n"
  },
  {
    "path": "tests/processors/test_tensor_adapters.py",
    "content": "import pytest\nfrom pytest import mark\n\nimport numpy as np\nimport torch\n\nfrom outlines.processors.tensor_adapters import (\n    NumpyTensorAdapter,\n    TorchTensorAdapter,\n    MLXTensorAdapter,\n)\n\ntry:\n    import mlx_lm\n    import mlx.core as mx\n\n    HAS_MLX = mx.metal.is_available()\nexcept ImportError:\n    HAS_MLX = False\n\n\nadapters = {\n    \"numpy\": NumpyTensorAdapter(),\n    \"torch\": TorchTensorAdapter(),\n}\nif HAS_MLX:\n    adapters[\"mlx\"] = MLXTensorAdapter()\n\nframeworks = [\"numpy\", \"torch\", \"mlx\"]\n\ndef create_tensor(framework, shape, dtype=None):\n    if framework == \"torch\":\n        return torch.randn(*shape)\n    elif framework == \"numpy\":\n        return np.random.randn(*shape)\n    elif framework == \"mlx\":\n        if not HAS_MLX:\n            pytest.skip(\"MLX not available\")\n        return mx.random.normal(shape)\n\ndef compare_tensors(framework, tensor1, tensor2):\n    if framework == \"torch\":\n        return torch.allclose(tensor1, tensor2)\n    elif framework == \"numpy\":\n        return np.array_equal(tensor1, tensor2)\n    elif framework == \"mlx\":\n        if not HAS_MLX:\n            pytest.skip(\"MLX not available\")\n        return mx.array_equal(tensor1, tensor2)\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_shape(framework):\n    # 1d tensor\n    tensor_1d = create_tensor(framework, (2,))\n    result_1d = adapters[framework].shape(tensor_1d)\n    assert len(result_1d) == 1\n    assert result_1d[0] == 2\n\n    # 2d tensor\n    tensor_2d = create_tensor(framework, (2, 3))\n    result_2d = adapters[framework].shape(tensor_2d)\n    assert len(result_2d) == 2\n    assert result_2d[0] == 2\n    assert result_2d[1] == 3\n\n    # 3d tensor\n    tensor_3d = create_tensor(framework, (2, 2, 3))\n    result_3d = adapters[framework].shape(tensor_3d)\n    assert len(result_3d) == 3\n    assert result_3d[0] == 2\n    assert result_3d[1] == 2\n    assert result_3d[2] == 3\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_unsqueeze(framework):\n    # 1d tensor\n    tensor_1d = create_tensor(framework, (2,))\n    result_1d = adapters[framework].unsqueeze(tensor_1d)\n    assert result_1d.shape == (1, 2)\n\n    # 2d tensor\n    tensor_2d = create_tensor(framework, (2, 3))\n    result_2d = adapters[framework].unsqueeze(tensor_2d)\n    assert result_2d.shape == (1, 2, 3)\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_squeeze(framework):\n    # 1d tensor\n    tensor_1d = create_tensor(framework, (1,))\n    result_1d = adapters[framework].squeeze(tensor_1d)\n    with pytest.raises(TypeError):\n        len(result_1d)\n\n    # 2d tensor\n    tensor_2d = create_tensor(framework, (1, 2))\n    result_2d = adapters[framework].squeeze(tensor_2d)\n    assert result_2d.shape == (2,)\n\n    # 3d tensor\n    tensor_3d = create_tensor(framework, (1, 2, 3))\n    result_3d = adapters[framework].squeeze(tensor_3d)\n    assert result_3d.shape == (2, 3)\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_to_list(framework):\n    # 1d tensor\n    tensor_1d = create_tensor(framework, (2,))\n    result_1d = adapters[framework].to_list(tensor_1d)\n    assert isinstance(result_1d, list)\n    assert len(result_1d) == 2\n\n    # 2d tensor\n    tensor_2d = create_tensor(framework, (2, 3))\n    result_2d = adapters[framework].to_list(tensor_2d)\n    assert isinstance(result_2d, list)\n    assert len(result_2d) == 2\n    assert len(result_2d[0]) == 3\n    assert len(result_2d[1]) == 3\n\n    # 3d tensor\n    tensor_3d = create_tensor(framework, (2, 2, 3))\n    result_3d = adapters[framework].to_list(tensor_3d)\n    assert isinstance(result_3d, list)\n    assert len(result_3d) == 2\n    assert len(result_3d[0]) == 2\n    assert len(result_3d[1]) == 2\n    assert len(result_3d[0][0]) == 3\n    assert len(result_3d[0][1]) == 3\n    assert len(result_3d[1][0]) == 3\n    assert len(result_3d[1][1]) == 3\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_to_scalar(framework):\n    # multi-elements tensor, should raise an error\n    tensor_multi = create_tensor(framework, (2, 3))\n    if framework == \"torch\":\n        with pytest.raises(RuntimeError):\n            adapters[framework].to_scalar(tensor_multi)\n    else:\n        with pytest.raises(ValueError):\n            adapters[framework].to_scalar(tensor_multi)\n\n    # single-element tensor\n    tensor_single = create_tensor(framework, (1, 1))\n    scalar = adapters[framework].to_scalar(tensor_single)\n    assert isinstance(scalar, float)\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_full_like(framework):\n    tensor = create_tensor(framework, (2, 3))\n    result = adapters[framework].full_like(tensor, 0)\n    assert result.shape == (2, 3)\n    for i in range(2):\n        for j in range(3):\n            assert result[i, j] == 0\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_concatenate(framework):\n    # 1d tensors\n    tensor1 = create_tensor(framework, (2,))\n    tensor2 = create_tensor(framework, (2,))\n    result = adapters[framework].concatenate([tensor1, tensor2])\n    assert result.shape == (4,)\n    assert result[0] == tensor1[0]\n    assert result[1] == tensor1[1]\n    assert result[2] == tensor2[0]\n    assert result[3] == tensor2[1]\n\n    # 2d tensors\n    tensor1 = create_tensor(framework, (2, 3))\n    tensor2 = create_tensor(framework, (2, 3))\n    result = adapters[framework].concatenate([tensor1, tensor2])\n    assert result.shape == (4, 3)\n    for i in range(2):\n        for j in range(3):\n            assert result[i, j] == tensor1[i, j]\n            assert result[i + 2, j] == tensor2[i, j]\n\n    # 3d tensors\n    tensor1 = create_tensor(framework, (2, 2, 3))\n    tensor2 = create_tensor(framework, (2, 2, 3))\n    result = adapters[framework].concatenate([tensor1, tensor2])\n    assert result.shape == (4, 2, 3)\n    for i in range(2):\n        for j in range(2):\n            for k in range(3):\n                assert result[i, j, k] == tensor1[i, j, k]\n                assert result[i + 2, j, k] == tensor2[i, j, k]\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_get_to_device(framework):\n    tensor = create_tensor(framework, (2, 3))\n    device = adapters[framework].get_device(tensor)\n    device_tensor = adapters[framework].to_device(tensor, device)\n\n    if framework == \"torch\":\n        assert isinstance(device_tensor.device.type, str)\n        assert compare_tensors(framework, device_tensor, tensor)\n    else:\n        assert compare_tensors(framework, device_tensor, tensor)\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_boolean_ones_like(framework):\n    tensor = create_tensor(framework, (2, 3))\n    ones = adapters[framework].boolean_ones_like(tensor)\n\n    assert ones.shape == (2, 3)\n    for i in range(2):\n        for j in range(3):\n            assert ones[i, j]\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_apply_mask(framework):\n    tensor = create_tensor(framework, (2, 3))\n\n    if framework == \"torch\":\n        mask = torch.randn(2, 3) > 0\n    elif framework == \"numpy\":\n        mask = np.random.randn(2, 3) > 0\n    elif framework == \"mlx\":\n        if not HAS_MLX:\n            pytest.skip(\"MLX not available\")\n        mask = mx.random.normal((2, 3)) > 0\n\n    masked = adapters[framework].apply_mask(tensor, mask, float(\"-inf\"))\n\n    assert masked.shape == (2, 3)\n    for i in range(2):\n        for j in range(3):\n            if mask[i, j]:\n                assert masked[i, j] == float(\"-inf\")\n            else:\n                assert masked[i, j] == tensor[i, j]\n\n\n@pytest.mark.parametrize(\"framework\", frameworks)\ndef test_tensor_adapter_argsort_descending(framework):\n    tensor = create_tensor(framework, (2, 3))\n    indices = adapters[framework].argsort_descending(tensor)\n\n    assert indices.shape == (2, 3)\n    for i in range(2):\n        sorted_values = [tensor[i][idx] for idx in indices[i]]\n        for j in range(len(sorted_values) - 1):\n            assert sorted_values[j] >= sorted_values[j + 1]\n"
  },
  {
    "path": "tests/test_applications.py",
    "content": "from typing import Any\n\nimport jinja2\nimport pytest\nimport transformers\n\nfrom outlines import from_transformers\nfrom outlines.applications import Application\nfrom outlines.templates import Template\n\n\n@pytest.fixture(scope=\"session\")\ndef model():\n    return from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"gpt2\"),\n        transformers.AutoTokenizer.from_pretrained(\"gpt2\"),\n    )\n\n\n@pytest.fixture(scope=\"session\")\ndef another_model():\n    return from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"gpt2\"),\n        transformers.AutoTokenizer.from_pretrained(\"gpt2\"),\n    )\n\n\ndef test_application_initialization():\n    template = Template.from_string(\"Test {{ value }}\")\n    output_type = None\n    application = Application(template, output_type)\n\n    assert application.template == template\n    assert application.output_type == output_type\n    assert application.model is None\n    assert application.generator is None\n\n\ndef test_application_generator_no_model():\n    template = Template.from_string(\"Test {{ value }}\")\n    output_type = None\n    application = Application(template, output_type)\n\n    with pytest.raises(ValueError):\n        application(None, {\"value\": \"example\"})\n\n\ndef test_application_template_call(model):\n    template = Template.from_string(\"Test {{ value }}\")\n    output_type = None\n    application = Application(template, output_type)\n    result = application(model, {\"value\": \"example\"}, max_new_tokens=10)\n\n    assert isinstance(result, str)\n\n\ndef test_application_callable_call(model):\n    def template(value):\n        return f\"Test {value}\"\n\n    output_type = None\n    application = Application(template, output_type)\n    result = application(model, {\"value\": \"example\"}, max_new_tokens=10)\n\n    assert isinstance(result, str)\n\n\ndef test_application_template_error(model):\n    template = Template.from_string(\"Test {{ value }}\")\n    output_type = None\n    application = Application(template, output_type)\n\n    with pytest.raises(jinja2.exceptions.UndefinedError):\n        application(model, {\"foo\": \"bar\"})\n\n\ndef test_application_generator_reuse(model, another_model):\n    template = Template.from_string(\"Test {{ value }}\")\n    output_type = None\n    application = Application(template, output_type)\n\n    application(model, {\"value\": \"example\"}, max_new_tokens=10)\n    first_generator = application.generator\n    first_model = application.model\n\n    application(model, {\"value\": \"example\"}, max_new_tokens=10)\n    assert application.model == first_model\n    assert application.generator == first_generator\n\n    application(another_model, {\"value\": \"example\"}, max_new_tokens=10)\n    assert application.model == another_model\n    assert application.model != first_model\n    assert application.generator != first_generator\n"
  },
  {
    "path": "tests/test_cache.py",
    "content": "import os\nimport tempfile\nimport unittest\nfrom importlib import reload\n\nimport diskcache\nimport pytest\nfrom diskcache import Cache, UNKNOWN\nfrom outlines.caching import CloudpickleDisk\n\n\n@pytest.fixture\ndef temp_dir():\n    \"\"\"Create a temporary directory for testing.\"\"\"\n    directory = tempfile.mkdtemp()\n    yield directory\n\n\n@pytest.fixture\ndef refresh_environment():\n    \"\"\"Refresh the test environment.\n\n    This deletes any reference to `outlines` in the modules dictionary and unsets the\n    `OUTLINES_CACHE_DIR` environment variable if set. This is necessary because we\n    are using a module variable to hold the cache.\n\n    \"\"\"\n    import sys\n\n    for key in list(sys.modules.keys()):\n        if \"outlines\" in key:\n            del sys.modules[key]\n\n    try:\n        del os.environ[\"OUTLINES_CACHE_DIR\"]\n    except KeyError:\n        pass\n\n\n@pytest.fixture\ndef test_cache(refresh_environment):\n    \"\"\"Initialize a temporary cache and delete it after the test has run.\"\"\"\n    with tempfile.TemporaryDirectory() as tempdir:\n        os.environ[\"OUTLINES_CACHE_DIR\"] = tempdir\n        import outlines\n\n        memory = outlines.get_cache()\n        assert memory.directory == tempdir\n\n        yield outlines.caching.cache()\n\n        memory.clear()\n\n\ndef test_get_cache(test_cache):\n    import outlines\n\n    memory = outlines.get_cache()\n    assert isinstance(memory, diskcache.Cache)\n\n    # If the cache is enabled then the size\n    # of `store` should not increase the\n    # second time `f` is called.\n    store = list()\n\n    @test_cache\n    def f(x):\n        store.append(1)\n        return x\n\n    f(1)\n    store_size = len(store)\n\n    f(1)\n    assert len(store) == store_size\n\n    f(2)\n    assert len(store) == store_size + 1\n\n\ndef test_disable_cache(test_cache):\n    \"\"\"Make sure that we can disable the cache.\"\"\"\n    import outlines\n\n    outlines.disable_cache()\n\n    # If the cache is disabled then the size\n    # of `store` should increase every time\n    # `f` is called.\n    store = list()\n\n    @test_cache\n    def f(x):\n        store.append(1)\n        return x\n\n    f(1)\n    store_size = len(store)\n    f(1)\n    assert len(store) == store_size + 1\n\n\ndef test_clear_cache(test_cache):\n    \"\"\"Make sure that we can clear the cache.\"\"\"\n    import outlines\n\n    store = list()\n\n    @test_cache\n    def f(x):\n        store.append(1)\n        return x\n\n    # The size of `store` does not increase since\n    # `f` is cached after the first run.\n    f(1)\n    store_size = len(store)\n    f(1)\n    assert len(store) == store_size\n\n    # The size of `store` should increase if we call `f`\n    # after clearing the cache.\n    outlines.clear_cache()\n    f(1)\n    assert len(store) == store_size + 1\n\n\ndef test_version_upgrade_cache_invalidate(test_cache, mocker):\n    \"\"\"Ensure we can change the signature of a cached function if we upgrade the version\"\"\"\n\n    import outlines.caching\n\n    def simulate_restart_outlines():\n        # clearing in-memory lru_cache which returns the diskcache in\n        # order to simulate a reload, we're not clearing the diskcache itself\n        outlines.caching.get_cache.cache_clear()\n\n    mocker.patch(\"outlines._version.__version__\", new=\"0.0.0\")\n    simulate_restart_outlines()\n\n    # initialize cache with signature of Tuple-of-3\n    @test_cache\n    def foo():\n        return (1, 2, 3)\n\n    a, b, c = foo()\n\n    # \"restart\" outlines without upgrading version\n    simulate_restart_outlines()\n\n    # change signature to Tuple-of-2\n    @test_cache\n    def foo():\n        return (1, 2)\n\n    # assert without version upgrade, old, bad cache is used\n    with pytest.raises(ValueError):\n        a, b = foo()\n\n    # \"restart\" outlines WITH version upgrade\n    mocker.patch(\"outlines._version.__version__\", new=\"0.0.1\")\n    simulate_restart_outlines()\n\n    # change signature to Tuple-of-2\n    @test_cache\n    def foo():\n        return (1, 2)\n\n    # assert with version upgrade, old cache is invalidated and new cache is used\n    a, b = foo()\n\n\ndef test_cache_disabled_decorator(test_cache):\n    \"\"\"Ensure cache can be disabled in a local scope\"\"\"\n\n    from outlines.caching import cache_disabled\n\n    mock = unittest.mock.MagicMock()\n\n    @test_cache\n    def fn():\n        mock()\n        return 1\n\n    # first call isn't cached\n    fn()\n    assert mock.call_count == 1\n\n    # second call doesn't run fn, uses cache\n    fn()\n    assert mock.call_count == 1\n\n    # cache_disabled decorator disables cache within scope\n    with cache_disabled():\n        fn()\n    assert mock.call_count == 2  # called once in cache_disabled scope\n\n    # scope has exited, cache is enabled again\n    fn()\n    assert mock.call_count == 2\n\n\n@pytest.fixture\ndef temp_cache_dir():\n    import os\n    import tempfile\n\n    import outlines.caching\n\n    with tempfile.TemporaryDirectory() as tempdir:\n        os.environ[\"OUTLINES_CACHE_DIR\"] = tempdir\n        outlines.caching.get_cache.cache_clear()\n        reload(outlines)\n        cache_status = outlines.caching._caching_enabled\n        try:\n            outlines.caching._caching_enabled = True\n            yield\n        finally:\n            outlines.caching._caching_enabled = cache_status\n"
  },
  {
    "path": "tests/test_generator.py",
    "content": "import pytest\nfrom typing import AsyncGenerator, Generator as TypingGenerator, Literal\n\nimport transformers\nfrom outlines_core import Index, Vocabulary\n\nimport outlines\nfrom outlines.backends.outlines_core import OutlinesCoreLogitsProcessor\nfrom outlines.generator import (\n    BlackBoxGenerator,\n    SteerableGenerator,\n    Generator,\n    AsyncBlackBoxGenerator,\n)\nfrom outlines.models import AsyncVLLM, VLLM\nfrom outlines.processors import (\n    OutlinesLogitsProcessor,\n)\nfrom outlines.types import CFG\nfrom tests.test_utils.mock_openai_client import (\n    MockAsyncOpenAIClient,\n    MockOpenAIClient,\n)\n\n\nMODEL_NAME = \"microsoft/Phi-3-mini-4k-instruct\"\n\n\n# We used the mocked vllm model to test the black box generator\nasync_openai_client = MockAsyncOpenAIClient()\nopenai_client = MockOpenAIClient()\nmock_responses = [\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Write a very short sentence'}\n            ],\n            'model': MODEL_NAME,\n            'max_tokens': 10,\n            'extra_body': {'guided_regex': '(\"[^\"]*\")'},\n        },\n        \"Mock response\"\n    ),\n    (\n        {\n            'messages': [\n                {'role': \"user\", 'content': 'Write a very short sentence'}\n            ],\n            'model': MODEL_NAME,\n            'max_tokens': 10,\n            'extra_body': {'guided_regex': '(\"[^\"]*\")'},\n            'stream': True,\n        },\n        [\"Mock\", \"response\"]\n    ),\n]\nasync_openai_client.add_mock_responses(mock_responses)\nopenai_client.add_mock_responses(mock_responses)\n\n\n@pytest.fixture(scope=\"session\")\ndef steerable_model():\n    model = outlines.from_transformers(\n        transformers.AutoModelForCausalLM.from_pretrained(\"erwanf/gpt2-mini\"),\n        transformers.AutoTokenizer.from_pretrained(\"erwanf/gpt2-mini\"),\n    )\n    return model\n\n\n@pytest.fixture(scope=\"session\")\ndef sample_processor():\n    vocabulary = Vocabulary.from_pretrained(\"openai-community/gpt2\")\n    index = Index(r\"[0-9]{3}\", vocabulary)\n    return OutlinesCoreLogitsProcessor(index, \"torch\")\n\n\n@pytest.fixture(scope=\"module\")\ndef black_box_sync_model():\n    return VLLM(openai_client, MODEL_NAME)\n\n\n@pytest.fixture(scope=\"module\")\ndef black_box_async_model():\n    return AsyncVLLM(async_openai_client, MODEL_NAME)\n\n\n# SteerableGenerator\n\n\ndef test_steerable_generator_init_valid_processor(steerable_model, sample_processor):\n    generator = SteerableGenerator.from_processor(steerable_model, sample_processor)\n    assert generator.logits_processor == sample_processor\n    assert generator.model == steerable_model\n\n\ndef test_steerable_generator_init_cfg_output_type(steerable_model):\n    generator = SteerableGenerator(steerable_model, CFG('start: \"a\"'))\n    assert generator.model == steerable_model\n    assert isinstance(generator.logits_processor, OutlinesLogitsProcessor)\n\n\ndef test_steerable_generator_init_other_output_type(steerable_model):\n    generator = SteerableGenerator(steerable_model, Literal[\"foo\", \"bar\"])\n    assert generator.model == steerable_model\n    assert isinstance(generator.logits_processor, OutlinesLogitsProcessor)\n\n\ndef test_steerable_generator_init_invalid_output_type(steerable_model, sample_processor):\n    with pytest.raises(ValueError):\n        SteerableGenerator(steerable_model, sample_processor)\n\n\ndef test_steerable_generator_call(steerable_model):\n    generator = SteerableGenerator(steerable_model, Literal[\"foo\", \"bar\"])\n    result = generator(\"foo\", max_new_tokens=10)\n    assert isinstance(result, str)\n\n\ndef test_steerable_generator_stream(steerable_model):\n    with pytest.raises(NotImplementedError):\n        generator = SteerableGenerator(steerable_model, Literal[\"foo\", \"bar\"])\n        result = generator.stream(\"foo\", max_tokens=10)\n        assert isinstance(result, TypingGenerator)\n        assert isinstance(next(result), str)\n\n\n# BlackBoxGenerator\n\n\ndef test_black_box_generator_init(black_box_sync_model):\n    generator = BlackBoxGenerator(black_box_sync_model, Literal[\"foo\", \"bar\"])\n    assert generator.model == black_box_sync_model\n    assert generator.output_type == Literal[\"foo\", \"bar\"]\n\ndef test_black_box_generator_call(black_box_sync_model):\n    generator = BlackBoxGenerator(black_box_sync_model, str)\n    result = generator(\"Write a very short sentence\", max_tokens=10)\n    assert isinstance(result, str)\n\n\ndef test_black_box_generator_stream(black_box_sync_model):\n    generator = BlackBoxGenerator(black_box_sync_model, str)\n    result = generator.stream(\"Write a very short sentence\", max_tokens=10)\n    assert isinstance(result, TypingGenerator)\n    assert isinstance(next(result), str)\n\n\n# AsyncBlackBoxGenerator\n\n\ndef test_async_black_box_generator_init(black_box_async_model):\n    generator = AsyncBlackBoxGenerator(black_box_async_model, Literal[\"foo\", \"bar\"])\n    assert generator.model == black_box_async_model\n    assert generator.output_type == Literal[\"foo\", \"bar\"]\n\n\n@pytest.mark.asyncio\nasync def test_async_black_box_generator_call(black_box_async_model):\n    generator = AsyncBlackBoxGenerator(black_box_async_model, str)\n    result = await generator(\"Write a very short sentence\", max_tokens=10)\n    assert isinstance(result, str)\n\n\n@pytest.mark.asyncio\nasync def test_async_black_box_generator_stream(black_box_async_model):\n    generator = AsyncBlackBoxGenerator(black_box_async_model, str)\n    result = generator.stream(\"Write a very short sentence\", max_tokens=10)\n    assert isinstance(result, AsyncGenerator)\n    async for chunk in result:\n        assert isinstance(chunk, str)\n        break  # Just check the first chunk\n\n\n# Generator\n\n\ndef test_generator_init_no_model():\n    with pytest.raises(ValueError):\n        Generator(None, Literal[\"foo\", \"bar\"])\n\n\ndef test_generator_init_multiple_output_type(steerable_model, sample_processor):\n    with pytest.raises(ValueError):\n        Generator(steerable_model, Literal[\"foo\", \"bar\"], processor=sample_processor)\n\n\ndef test_generator_steerable_output_type(steerable_model):\n    generator = Generator(steerable_model, Literal[\"foo\", \"bar\"])\n    assert isinstance(generator, SteerableGenerator)\n    assert generator.model == steerable_model\n    assert isinstance(generator.logits_processor, OutlinesLogitsProcessor)\n\n\ndef test_generator_steerable_processor(steerable_model, sample_processor):\n    generator = Generator(steerable_model, processor=sample_processor)\n    assert isinstance(generator, SteerableGenerator)\n    assert generator.model == steerable_model\n    assert isinstance(generator.logits_processor, OutlinesLogitsProcessor)\n\n\ndef test_generator_black_box_sync_output_type(black_box_sync_model):\n    generator = Generator(black_box_sync_model, Literal[\"foo\", \"bar\"])\n    assert isinstance(generator, BlackBoxGenerator)\n    assert generator.model == black_box_sync_model\n    assert generator.output_type == Literal[\"foo\", \"bar\"]\n\n\ndef test_generator_black_box_sync_processor(black_box_sync_model, sample_processor):\n    with pytest.raises(NotImplementedError):\n        Generator(black_box_sync_model, processor=sample_processor)\n\n\ndef test_generator_black_box_async_output_type(black_box_async_model):\n    generator = Generator(black_box_async_model, Literal[\"foo\", \"bar\"])\n    assert isinstance(generator, AsyncBlackBoxGenerator)\n    assert generator.model == black_box_async_model\n    assert generator.output_type == Literal[\"foo\", \"bar\"]\n\n\ndef test_generator_black_box_async_processor(black_box_async_model, sample_processor):\n    with pytest.raises(NotImplementedError):\n        Generator(black_box_async_model, processor=sample_processor)\n"
  },
  {
    "path": "tests/test_inputs.py",
    "content": "\"\"\"Unit tests for the inputs module.\"\"\"\n\nimport base64\nimport tempfile\nfrom io import BytesIO\nfrom typing import Dict, List, Any\n\nimport pytest\nfrom PIL import Image as PILImage\n\nfrom outlines.inputs import Image, Video, Audio, Chat\n\n\n@pytest.fixture\ndef image_input():\n    image = PILImage.new(\"RGB\", (100, 100), color=\"red\")\n    image.format = \"PNG\"\n    buffer = BytesIO()\n    image.save(buffer, format=\"PNG\")\n    return Image(image=image)\n\n\ndef test_image_initialization():\n    # png\n    image = PILImage.new(\"RGB\", (100, 100), color=\"red\")\n    image.format = \"PNG\"\n    buffer = BytesIO()\n    image.save(buffer, format=\"PNG\")\n    image_input = Image(image=image)\n\n    assert image_input.image == image\n    assert image_input.image_format == \"image/png\"\n    assert image_input.image_str == base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n\n    # jpeg\n    image = PILImage.new(\"RGB\", (100, 100), color=\"blue\")\n    image.format = \"JPEG\"\n    buffer = BytesIO()\n    image.save(buffer, format=\"JPEG\")\n    image_input = Image(image=image)\n\n    assert image_input.image == image\n    assert image_input.image_format == \"image/jpeg\"\n    assert image_input.image_str == base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n\n\ndef test_image_initialization_invalid():\n    \"\"\"Test that Image initialization fails when image has no format.\"\"\"\n    # No format\n    image = PILImage.new(\"RGB\", (100, 100), color=\"yellow\")\n    with pytest.raises(TypeError, match=\"Could not read the format of the image\"):\n        Image(image=image)\n\n    # Empty string format\n    image = PILImage.new(\"RGB\", (100, 100), color=\"orange\")\n    image.format = \"\"\n    with pytest.raises(TypeError, match=\"Could not read the format of the image\"):\n        Image(image=image)\n\n\ndef test_video_initialization():\n    video = \"foo\"\n    video_input = Video(video=video)\n    assert video_input.video == video\n\n\ndef test_audio_initialization():\n    audio = \"foo\"\n    audio_input = Audio(audio=audio)\n    assert audio_input.audio == audio\n\n\ndef test_chat_initialization():\n    # Empty\n    chat = Chat()\n    assert chat.messages == []\n    assert len(chat.messages) == 0\n    assert str(chat) == \"\"\n    assert repr(chat) == \"Chat(messages=[])\"\n\n    # With messages\n    messages = [\n        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n        {\"role\": \"user\", \"content\": \"Hello!\"},\n        {\"role\": \"assistant\", \"content\": \"Hi there!\"}\n    ]\n    chat = Chat(messages=messages)\n    assert chat.messages == messages\n    assert len(chat.messages) == 3\n    assert str(chat) == \"{'role': 'system', 'content': 'You are a helpful assistant.'}\\n{'role': 'user', 'content': 'Hello!'}\\n{'role': 'assistant', 'content': 'Hi there!'}\"\n    assert repr(chat) == \"Chat(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}, {'role': 'assistant', 'content': 'Hi there!'}])\"\n\n\ndef test_chat_append():\n    chat = Chat(messages=[])\n    message = {\"role\": \"user\", \"content\": \"Hello\"}\n    chat.append(message)\n    assert len(chat.messages) == 1\n    assert chat.messages[0] == message\n\ndef test_chat_extend():\n    chat = Chat(messages=[])\n    messages = [\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi\"}\n    ]\n    chat.extend(messages)\n    assert len(chat.messages) == 2\n    assert chat.messages == messages\n\ndef test_chat_pop():\n    # Pop from non-empty chat\n    messages = [\n        {\"role\": \"user\", \"content\": \"Hello\"},\n        {\"role\": \"assistant\", \"content\": \"Hi\"}\n    ]\n    chat = Chat(messages=messages.copy())\n    popped_message = chat.pop()\n    assert popped_message == {\"role\": \"assistant\", \"content\": \"Hi\"}\n    assert len(chat.messages) == 1\n    assert chat.messages[0] == {\"role\": \"user\", \"content\": \"Hello\"}\n\n    # Pop from empty chat\n    chat = Chat(messages=[])\n    with pytest.raises(IndexError):\n        chat.pop()\n\n\ndef test_chat_add_system_message(image_input):\n    # Add a string\n    chat = Chat(messages=[])\n    chat.add_system_message(\"You are a helpful assistant.\")\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"system\"\n    assert chat.messages[0][\"content\"] == \"You are a helpful assistant.\"\n\n    # Add a list\n    chat = Chat(messages=[])\n    chat.add_system_message([\"prompt\", image_input])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"system\"\n    assert chat.messages[0][\"content\"] == [\"prompt\", image_input]\n\n    # Add a list of dict items with explicit types\n    chat = Chat(messages=[])\n    chat.add_system_message([{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"system\"\n    assert chat.messages[0][\"content\"] == [{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}]\n\n\ndef test_add_user_message_string(image_input):\n    # Add a string\n    chat = Chat(messages=[])\n    chat.add_user_message(\"Hello, how are you?\")\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"user\"\n    assert chat.messages[0][\"content\"] == \"Hello, how are you?\"\n\n    # Add a list\n    chat = Chat(messages=[])\n    chat.add_user_message([\"prompt\", image_input])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"user\"\n    assert chat.messages[0][\"content\"] == [\"prompt\", image_input]\n\n    # Add a list of dict items with explicit types\n    chat = Chat(messages=[])\n    chat.add_user_message([{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"user\"\n    assert chat.messages[0][\"content\"] == [{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}]\n\n\ndef test_add_assistant_message_string(image_input):\n    # Add a string\n    chat = Chat(messages=[])\n    chat.add_assistant_message(\"I'm doing well, thank you!\")\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"assistant\"\n    assert chat.messages[0][\"content\"] == \"I'm doing well, thank you!\"\n\n    # Add a list\n    chat = Chat(messages=[])\n    chat.add_assistant_message([\"prompt\", image_input])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"assistant\"\n    assert chat.messages[0][\"content\"] == [\"prompt\", image_input]\n\n    # Add a list of dict items with explicit types\n    chat = Chat(messages=[])\n    chat.add_assistant_message([{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}])\n    assert len(chat.messages) == 1\n    assert chat.messages[0][\"role\"] == \"assistant\"\n    assert chat.messages[0][\"content\"] == [{\"type\": \"text\", \"text\": \"prompt\"}, {\"type\": \"image\", \"image\": image_input}]\n"
  },
  {
    "path": "tests/test_templates.py",
    "content": "import base64\nimport os\nimport tempfile\nfrom typing import Optional\n\nimport pytest\nfrom PIL import Image as PILImage\nfrom io import BytesIO\nfrom pydantic import BaseModel, Field\n\nfrom outlines.inputs import Image\nfrom outlines.templates import (\n    Template,\n    build_template_from_string,\n    Vision,\n    get_fn_name,\n    get_fn_args,\n    get_fn_description,\n    get_fn_source,\n    get_fn_signature,\n    get_schema,\n)\n\n\ndef sample_function(x, y=2):\n    \"\"\"This is a sample function.\"\"\"\n    return x + y\n\ndef function_with_annotations(x: int, y: str) -> str:\n    \"\"\"Function with annotations.\"\"\"\n    return f\"{x} {y}\"\n\ndef function_with_no_docstring(x, y):\n    return x * y\n\nclass CallableClass:\n    def __call__(self):\n        pass\n\nclass PydanticClass(BaseModel):\n    foo: str\n\n\ndef test_vision_initialization():\n    # Create a simple image for testing\n    image = PILImage.new(\"RGB\", (10, 10), color=\"red\")\n    image.format = \"PNG\"\n\n    # Initialize the Vision object\n    with pytest.deprecated_call():\n        vision = Vision(prompt=\"Test prompt\", image=image)\n\n    # Check that the prompt is set correctly\n    assert isinstance(vision, list)\n    assert len(vision) == 2\n    assert vision[0] == \"Test prompt\"\n    assert isinstance(vision[1], Image)\n\n    # Check that the image is encoded correctly\n    buffer = BytesIO()\n    image.save(buffer, format=image.format)\n    expected_image_str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n    assert vision[1].image_str == expected_image_str\n\n    # Check that the image format is set correctly\n    assert vision[1].image_format == \"image/png\"\n\n\ndef test_vision_invalid_image_format():\n    # Create an image without a format\n    image = PILImage.new(\"RGB\", (10, 10), color=\"blue\")\n\n    # Expect a TypeError when the image format is not set\n    with pytest.deprecated_call():\n        with pytest.raises(TypeError, match=\"Could not read the format\"):\n            Vision(prompt=\"Test prompt\", image=image)\n\n\ndef render(content: str, filters: Optional[dict] = None, **kwargs):\n    template = build_template_from_string(content, filters or {})\n    return template.render(kwargs)\n\n\ndef test_render():\n    tpl = \"\"\"\n    A test string\"\"\"\n    assert render(tpl) == \"A test string\"\n\n    tpl = \"\"\"\n    A test string\n    \"\"\"\n    assert render(tpl) == \"A test string\"\n\n    tpl = \"\"\"\n        A test\n        Another test\n    \"\"\"\n    assert render(tpl) == \"A test\\nAnother test\"\n\n    tpl = \"\"\"A test\n        Another test\n    \"\"\"\n    assert render(tpl) == \"A test\\nAnother test\"\n\n    tpl = \"\"\"\n        A test line\n            An indented line\n    \"\"\"\n    assert render(tpl) == \"A test line\\n    An indented line\"\n\n    tpl = \"\"\"\n        A test line\n            An indented line\n\n    \"\"\"\n    assert render(tpl) == \"A test line\\n    An indented line\\n\"\n\n\ndef test_render_escaped_linebreak():\n    tpl = \"\"\"\n        A long test \\\n        that we break \\\n        in several lines\n    \"\"\"\n    assert render(tpl) == \"A long test that we break in several lines\"\n\n    tpl = \"\"\"\n        Break in \\\n        several lines \\\n        But respect the indentation\n            on line breaks.\n        And after everything \\\n        Goes back to normal\n    \"\"\"\n    assert (\n        render(tpl)\n        == \"Break in several lines But respect the indentation\\n    on line breaks.\\nAnd after everything Goes back to normal\"\n    )\n\n\ndef test_render_jinja():\n    \"\"\"Make sure that we can use basic Jinja2 syntax, and give examples\n    of how we can use it for basic use cases.\n    \"\"\"\n\n    # Notice the newline after the end of the loop\n    examples = [\"one\", \"two\"]\n    prompt = render(\n        \"\"\"\n        {% for e in examples %}\n        Example: {{e}}\n        {% endfor -%}\"\"\",\n        examples=examples,\n    )\n    assert prompt == \"Example: one\\nExample: two\\n\"\n\n    # We can remove the newline by cloing with -%}\n    examples = [\"one\", \"two\"]\n    prompt = render(\n        \"\"\"\n        {% for e in examples %}\n        Example: {{e}}\n        {% endfor -%}\n\n        Final\"\"\",\n        examples=examples,\n    )\n    assert prompt == \"Example: one\\nExample: two\\nFinal\"\n\n    # Same for conditionals\n    tpl = \"\"\"\n        {% if is_true %}\n        true\n        {% endif -%}\n\n        final\n        \"\"\"\n    assert render(tpl, is_true=True) == \"true\\nfinal\"\n    assert render(tpl, is_true=False) == \"final\"\n\n\ndef test_render_filters():\n    def foo(bar: str) -> str:\n        \"\"\"This is a sample function.\"\"\"\n        return bar\n\n    class PydanticClass(BaseModel):\n        foo: str = Field(description=\"bar\")\n\n    def custom_filter(x: str) -> str:\n        return x.upper()\n\n    # name filter\n    tpl = \"\"\"\n    {{ func | name }}\n    \"\"\"\n    assert render(tpl, func=foo) == \"foo\"\n\n    # description filter\n    tpl = \"\"\"\n    {{ func | description }}\n    \"\"\"\n    assert render(tpl, func=foo) == \"This is a sample function.\"\n\n    # source filter\n    tpl = \"\"\"\n    {{ func | source }}\n    \"\"\"\n    assert render(tpl, func=foo) == 'def foo(bar: str) -> str:\\n    \"\"\"This is a sample function.\"\"\"\\n    return bar\\n'\n\n    # signature filter\n    tpl = \"\"\"\n    {{ func | signature }}\n    \"\"\"\n    assert render(tpl, func=foo) == \"bar: str\"\n\n    # args filter\n    tpl = \"\"\"\n    {{ func | args }}\n    \"\"\"\n    assert render(tpl, func=foo) == \"bar: str\"\n\n    # schema filter\n    tpl = \"\"\"\n    {{ schema | schema }}\n    \"\"\"\n    assert render(tpl, schema=PydanticClass) == '{\\n  \"foo\": \"bar\"\\n}'\n\n    # custom filters\n    tpl = \"\"\"\n    {{ name | custom_filter }}\n    \"\"\"\n    assert render(tpl, {\"custom_filter\": custom_filter}, name=\"John\") == \"JOHN\"\n\n\n@pytest.fixture\ndef temp_prompt_file():\n    test_dir = tempfile.mkdtemp()\n\n    base_template_path = os.path.join(test_dir, \"base_template.txt\")\n    with open(base_template_path, \"w\") as f:\n        f.write(\n            \"\"\"{% block content %}{% endblock %}\n\"\"\"\n        )\n\n    include_file_path = os.path.join(test_dir, \"include.txt\")\n    with open(include_file_path, \"w\") as f:\n        f.write(\n            \"\"\"{% for example in examples %}\n- Q: {{ example.question }}\n- A: {{ example.answer }}\n{% endfor %}\n\"\"\"\n        )\n\n    prompt_file_path = os.path.join(test_dir, \"prompt.txt\")\n    with open(prompt_file_path, \"w\") as f:\n        f.write(\n            \"\"\"{% extends \"base_template.txt\" %}\n\n{% block content %}\nHere is a prompt with examples:\n\n{% include \"include.txt\" %}\n\nNow please answer the following question:\n\nQ: {{ question }}\nA:\n{% endblock %}\n\"\"\"\n        )\n    yield prompt_file_path\n\n\ndef test_prompt_from_file(temp_prompt_file):\n    prompt = Template.from_file(temp_prompt_file)\n    examples = [\n        {\"question\": \"What is the capital of France?\", \"answer\": \"Paris\"},\n        {\"question\": \"What is 2 + 2?\", \"answer\": \"4\"},\n    ]\n    question = \"What is the Earth's diameter?\"\n    rendered = prompt(examples=examples, question=question)\n    expected = \"\"\"Here is a prompt with examples:\n\n- Q: What is the capital of France?\n- A: Paris\n- Q: What is 2 + 2?\n- A: 4\n\nNow please answer the following question:\n\nQ: What is the Earth's diameter?\nA:\n\"\"\"\n    assert rendered.strip() == expected.strip()\n\n\ndef test_prompt_from_str():\n    content = \"\"\"\n    Hello, {{ name }}!\n    \"\"\"\n    prompt = Template.from_string(content)\n    assert prompt(name=\"World\") == \"Hello, World!\"\n\n\ndef test_template_from_str_with_extra_linebreaks():\n    content = \"\"\"\n    Hello, {{ name }}!\n\n\n    \"\"\"\n    template = build_template_from_string(content)\n    assert template.render(name=\"World\") == \"Hello, World!\\n\"\n\n\ndef test_get_fn_name():\n    with pytest.raises(TypeError):\n        get_fn_name(1)\n    assert get_fn_name(sample_function) == \"sample_function\"\n    assert get_fn_name(function_with_annotations) == \"function_with_annotations\"\n    no_name_func = lambda x: x\n    assert get_fn_name(no_name_func) == \"<lambda>\"\n    assert get_fn_name(CallableClass()) == \"CallableClass\"\n\n\ndef test_get_fn_args():\n    with pytest.raises(TypeError):\n        get_fn_args(1)\n    assert get_fn_args(sample_function) == \"x, y=2\"\n    assert get_fn_args(function_with_annotations) == \"x: int, y: str\"\n\n\ndef test_get_fn_description():\n    with pytest.raises(TypeError):\n        get_fn_description(1)\n    assert get_fn_description(sample_function) == \"This is a sample function.\"\n    assert get_fn_description(function_with_annotations) == \"Function with annotations.\"\n    assert get_fn_description(function_with_no_docstring) == \"\"\n\n\ndef test_get_fn_source():\n    with pytest.raises(TypeError, match=\"The `source` filter only applies to callables.\"):\n        get_fn_source(1)\n    source = (\n        'def sample_function(x, y=2):\\n'\n        '    \"\"\"This is a sample function.\"\"\"\\n'\n        '    return x + y'\n    )\n    assert get_fn_source(sample_function).strip() == source\n\n\ndef test_get_fn_signature():\n    with pytest.raises(TypeError, match=\"The `source` filter only applies to callables.\"):\n        get_fn_signature(1)\n    sample_function_signature = \"x, y=2\"\n    assert get_fn_signature(sample_function) == sample_function_signature\n    function_with_annotations_signature = \"x: int, y: str\"\n    assert get_fn_signature(function_with_annotations) == function_with_annotations_signature\n\n\ndef test_get_schema():\n    with pytest.raises(NotImplementedError):\n        get_schema(1)\n\n    dict_schema = {\"foo\": \"bar\"}\n    dict_schema_output = get_schema(dict_schema)\n    assert dict_schema_output == '{\\n  \"foo\": \"bar\"\\n}'\n\n    pydantic_schema_output = get_schema(PydanticClass)\n    assert pydantic_schema_output == '{\\n  \"foo\": \"<foo>\"\\n}'\n"
  },
  {
    "path": "tests/test_utils/mock_lmstudio_client.py",
    "content": "import json\nfrom typing import Any, Dict, List, Optional, Tuple\n\nfrom tests.test_utils.utils import hash_dict\n\n\ndef normalize_for_hash(obj):\n    \"\"\"Normalize objects for consistent hashing.\n\n    lms.Chat objects have unique identifiers that change between instances,\n    so we convert them to a canonical dict format for hashing.\n    \"\"\"\n    obj_str = str(obj)\n    if obj_str.startswith(\"Chat.from_history(\"):\n        # Get the json from the string representation\n        json_part = obj_str[len(\"Chat.from_history(\"):-1]\n        data = json.loads(json_part)\n        return {\n            \"type\": \"lms.Chat\",\n            \"messages\": normalize_lmstudio_messages(data.get(\"messages\", []))\n        }\n    elif isinstance(obj, dict):\n        return {k: normalize_for_hash(v) for k, v in obj.items()}\n    elif isinstance(obj, list):\n        return [normalize_for_hash(item) for item in obj]\n    else:\n        return obj\n\n\ndef normalize_lmstudio_messages(messages):\n    \"\"\"Normalize message list for hashing.\"\"\"\n    result = []\n    for msg in messages:\n        normalized_msg = {\n            \"role\": msg.get(\"role\", \"\"),\n            \"content\": normalize_lmstudio_content(msg.get(\"content\", \"\")),\n        }\n        result.append(normalized_msg)\n    return result\n\n\ndef normalize_lmstudio_content(content):\n    \"\"\"Normalize message content for hashing.\"\"\"\n    if isinstance(content, list):\n        result = []\n        for item in content:\n            if isinstance(item, dict):\n                if item.get(\"type\") == \"text\":\n                    result.append({\"type\": \"text\", \"text\": item.get(\"text\", \"\")})\n                elif item.get(\"type\") == \"file\":\n                    result.append({\"type\": \"file\", \"sizeBytes\": item.get(\"sizeBytes\", 0)})\n                else:\n                    result.append(item)\n            else:\n                result.append(str(item))\n        return result\n    elif isinstance(content, str):\n        return content\n    else:\n        return str(content)\n\n\ndef hash_lmstudio_request(data: dict) -> str:\n    \"\"\"Hash a request dict, normalizing lms.Chat objects.\"\"\"\n    normalized = normalize_for_hash(data)\n    return hash_dict(normalized)\n\n\nclass MockLMStudioResponse:\n    \"\"\"Mock for LMStudio response object\"\"\"\n\n    def __init__(self, content: str):\n        self.content = content\n\n\nclass MockLMStudioModel:\n    \"\"\"Mock for LMStudio model object returned by client.llm.model()\"\"\"\n\n    def __init__(self, mock_responses: Dict[str, Any]):\n        self._mock_responses = mock_responses\n\n    def respond(self, messages, **kwargs):\n        request_key = hash_lmstudio_request({\"messages\": messages, **kwargs})\n        response = self._mock_responses.get(request_key)\n        if not response:\n            raise ValueError(f\"No response found for {{'messages': {messages}, **{kwargs}}}\")\n        return MockLMStudioResponse(response)\n\n    def respond_stream(self, messages, **kwargs):\n        request_key = hash_lmstudio_request({\"messages\": messages, **kwargs})\n        response = self._mock_responses.get(request_key)\n        if not response:\n            raise ValueError(f\"No response found for {{'messages': {messages}, **{kwargs}}}\")\n        for chunk in response:\n            yield MockLMStudioResponse(chunk)\n\n\nclass MockLMStudioLLM:\n    \"\"\"Mock for the llm attribute of Client\"\"\"\n\n    def __init__(self, mock_responses: Dict[str, Any]):\n        self._mock_responses = mock_responses\n\n    def model(self, model_key=None):\n        return MockLMStudioModel(self._mock_responses)\n\n\nclass MockLMStudioClient:\n    \"\"\"Mock for LMStudio `Client` that can be used to test the LMStudio model\"\"\"\n\n    def __init__(self):\n        self._mock_responses: Dict[str, Any] = {}\n        self.llm: Optional[MockLMStudioLLM] = None\n\n    def add_mock_responses(self, mocks: List[Tuple[dict, Any]]):\n        for kwargs, response in mocks:\n            request_key = hash_lmstudio_request(kwargs)\n            self._mock_responses[request_key] = response\n        self.llm = MockLMStudioLLM(self._mock_responses)\n\n\nclass MockAsyncLMStudioModel:\n    \"\"\"Mock for async LMStudio model object returned by client.llm.model()\"\"\"\n\n    def __init__(self, mock_responses: Dict[str, Any]):\n        self._mock_responses = mock_responses\n\n    async def respond(self, messages, **kwargs):\n        request_key = hash_lmstudio_request({\"messages\": messages, **kwargs})\n        response = self._mock_responses.get(request_key)\n        if not response:\n            raise ValueError(f\"No response found for {{'messages': {messages}, **{kwargs}}}\")\n        return MockLMStudioResponse(response)\n\n    async def respond_stream(self, messages, **kwargs):\n        \"\"\"Return an async iterator (must be awaited first, then iterated).\"\"\"\n        request_key = hash_lmstudio_request({\"messages\": messages, **kwargs})\n        response = self._mock_responses.get(request_key)\n        if not response:\n            raise ValueError(f\"No response found for {{'messages': {messages}, **{kwargs}}}\")\n\n        async def _stream():\n            for chunk in response:\n                yield MockLMStudioResponse(chunk)\n\n        return _stream()\n\n\nclass MockAsyncLMStudioLLM:\n    \"\"\"Mock for the llm attribute of AsyncClient\"\"\"\n\n    def __init__(self, mock_responses: Dict[str, Any]):\n        self._mock_responses = mock_responses\n\n    async def model(self, model_key=None):\n        return MockAsyncLMStudioModel(self._mock_responses)\n\n\nclass MockAsyncLMStudioClient:\n    \"\"\"Mock for LMStudio `AsyncClient` that can be used to test the AsyncLMStudio model\"\"\"\n\n    def __init__(self):\n        self._mock_responses: Dict[str, Any] = {}\n        self.llm: Optional[MockAsyncLMStudioLLM] = None\n        self._context_entered = False\n\n    def add_mock_responses(self, mocks: List[Tuple[dict, Any]]):\n        for kwargs, response in mocks:\n            request_key = hash_lmstudio_request(kwargs)\n            self._mock_responses[request_key] = response\n        self.llm = MockAsyncLMStudioLLM(self._mock_responses)\n\n    async def __aenter__(self):\n        self._context_entered = True\n        return self\n\n    async def __aexit__(self, exc_type, exc_val, exc_tb):\n        self._context_entered = False\n        return False\n"
  },
  {
    "path": "tests/test_utils/mock_openai_client.py",
    "content": "from typing import List, Dict, Any, Optional\nfrom unittest.mock import MagicMock\n\nfrom tests.test_utils.utils import hash_dict\n\n\nclass MockChoice:\n    def __init__(\n        self,\n        content: str,\n        finish_reason: str = \"stop\",\n        refusal: Optional[str] = None\n    ):\n        self.message = MagicMock()\n        self.message.content = content\n        self.message.refusal = refusal\n        self.finish_reason = finish_reason\n        self.delta = MagicMock()\n        self.delta.content = content\n\n\nclass MockCompletionResponse:\n    def __init__(self, choices: List[MockChoice]):\n        self.choices = choices\n\n\nclass MockStreamingChunk:\n    def __init__(self, content: Optional[str] = None):\n        self.choices = []\n        if content is not None:\n            choice = MagicMock()\n            delta = MagicMock()\n            delta.content = content\n            choice.delta = delta\n            self.choices = [choice]\n\n\nclass MockOpenAIClient:\n    \"\"\"Mock for OpenAI client that can be used to test vLLM integration\"\"\"\n\n    def __init__(self):\n        self.chat = MagicMock()\n        self.chat.completions = MagicMock()\n        self.chat.completions.create = MagicMock()\n\n        # The method that will be called by the model when it makes a request\n        def _create(**kwargs):\n            # Hash the arguments to create a unique key\n            request_key = hash_dict(kwargs)\n            response = self._mock_responses.get(request_key)\n            if not response:\n                raise ValueError(f\"No response found for {kwargs}\")\n            if kwargs.get(\"stream\", False):\n                return self._create_streaming_response(response)\n            else:\n                return self._create_standard_response(response)\n\n        self.chat.completions.create.side_effect = _create\n        self._mock_responses: Dict[str, Any] = {}\n\n    def add_mock_responses(self, mocks: list):\n        for kwargs, response in mocks:\n            request_key = hash_dict(kwargs)\n            self._mock_responses[request_key] = response\n\n    def _create_standard_response(self, response):\n        if isinstance(response, str):\n            response = [response]\n        choices = [MockChoice(content=chunk) for chunk in response]\n        return MockCompletionResponse(choices=choices)\n\n    def _create_streaming_response(self, response):\n        chunks = [MockStreamingChunk(content=chunk) for chunk in response]\n        return iter(chunks)\n\n\nclass MockAsyncOpenAIClient:\n    \"\"\"Mock for AsyncOpenAI client that can be used to test AsyncVLLM integration\"\"\"\n\n    def __init__(self):\n        self.chat = MagicMock()\n        self.chat.completions = MagicMock()\n        self.chat.completions.create = MagicMock()\n\n        # The method that will be called by the model when it makes a request\n        async def _async_create(**kwargs):\n            # Hash the arguments to create a unique key\n            request_key = hash_dict(kwargs)\n            response = self._mock_responses.get(request_key)\n            if not response:\n                raise ValueError(f\"No response found for {kwargs}\")\n            if kwargs.get(\"stream\", False):\n                return self._create_async_streaming_response(response)\n            else:\n                return await self._create_async_standard_response(response)\n\n        self.chat.completions.create.side_effect = _async_create\n        self._mock_responses: Dict[str, Any] = {}\n\n    def add_mock_responses(self, mocks: list):\n        for kwargs, response in mocks:\n            request_key = hash_dict(kwargs)\n            self._mock_responses[request_key] = response\n\n    async def _create_async_standard_response(self, response):\n        \"\"\"Create an async standard (non-streaming) response\"\"\"\n        if isinstance(response, str):\n            response = [response]\n        choices = [MockChoice(content=chunk) for chunk in response]\n        return MockCompletionResponse(choices=choices)\n\n    async def _create_async_streaming_response(self, response):\n        \"\"\"Create an async streaming response generator\"\"\"\n        chunks = [MockStreamingChunk(content=chunk) for chunk in response]\n\n        for chunk in chunks:\n            yield chunk\n"
  },
  {
    "path": "tests/test_utils/mock_tgi_client.py",
    "content": "from typing import Any, Dict\nfrom unittest.mock import MagicMock\n\nfrom tests.test_utils.utils import hash_dict\n\n\nclass MockTGIInferenceClient:\n    \"\"\"Mock for TGI `InferenceClient` that can be used to test the TGI model\"\"\"\n\n    def __init__(self):\n        self.text_generation = MagicMock()\n\n        # The method that will be called by the model when it makes a request\n        def _create(**kwargs):\n            # Hash the arguments to create a unique key\n            request_key = hash_dict(kwargs)\n            response = self._mock_responses.get(request_key)\n            if not response:\n                raise ValueError(f\"No response found for {kwargs}\")\n            if kwargs.get(\"stream\", False):\n                return iter(response)\n            else:\n                return response\n\n        self.text_generation.side_effect = _create\n        self._mock_responses: Dict[str, Any] = {}\n\n    def add_mock_responses(self, mocks: list):\n        for kwargs, response in mocks:\n            request_key = hash_dict(kwargs)\n            self._mock_responses[request_key] = response\n\n\nclass MockAsyncTGIInferenceClient:\n    \"\"\"Mock for TGI `InferenceClient` that can be used to test the TGI model\"\"\"\n\n    def __init__(self):\n        self.text_generation = MagicMock()\n\n        # The method that will be called by the model when it makes a request\n        async def _async_create(**kwargs):\n            # Hash the arguments to create a unique key\n            request_key = hash_dict(kwargs)\n            response = self._mock_responses.get(request_key)\n            if not response:\n                raise ValueError(f\"No response found for {kwargs}\")\n            if kwargs.get(\"stream\", False):\n                return self._create_async_streaming_response(response)\n            else:\n                return response\n\n        self.text_generation.side_effect = _async_create\n        self._mock_responses: Dict[str, Any] = {}\n\n    def add_mock_responses(self, mocks: list):\n        for kwargs, response in mocks:\n            request_key = hash_dict(kwargs)\n            self._mock_responses[request_key] = response\n\n    async def _create_async_streaming_response(self, response):\n        \"\"\"Create an async streaming response generator\"\"\"\n        for chunk in response:\n            yield chunk\n"
  },
  {
    "path": "tests/test_utils/utils.py",
    "content": "import hashlib\nimport pickle\nimport sys\n\n\ndef hash_dict(d) -> str:\n    def make_hashable(obj):\n        if isinstance(obj, (bool, int, float, str, type(None))):\n            if isinstance(obj, str):\n                return sys.intern(obj)\n            return obj\n        if isinstance(obj, dict):\n            return tuple(sorted(\n                (sys.intern(k) if isinstance(k, str) else k, make_hashable(v))\n                for k, v in obj.items()\n            ))\n        if isinstance(obj, (list, tuple)):\n            return tuple(make_hashable(e) for e in obj)\n        return str(obj)\n\n    hashable_obj = make_hashable(d)\n    pickled_obj = pickle.dumps(hashable_obj, protocol=4)\n    return hashlib.sha256(pickled_obj).hexdigest()\n"
  },
  {
    "path": "tests/types/test_custom_types.py",
    "content": "import re\n\nimport pytest\nfrom pydantic import BaseModel\n\nfrom outlines import types\nfrom outlines.types.dsl import to_regex\n\n\n@pytest.mark.parametrize(\n    \"custom_type,test_string,should_match\",\n    [\n        (types.locale.us.phone_number, \"12\", False),\n        (types.locale.us.phone_number, \"(123) 123-1234\", True),\n        (types.locale.us.phone_number, \"123-123-1234\", True),\n        (types.locale.us.zip_code, \"12\", False),\n        (types.locale.us.zip_code, \"12345\", True),\n        (types.locale.us.zip_code, \"12345-1234\", True),\n        (types.isbn, \"ISBN 0-1-2-3-4-5\", False),\n        (types.isbn, \"ISBN 978-0-596-52068-7\", True),\n        (types.isbn, \"ISBN-13: 978-0-596-52068-7\", True),\n        (types.isbn, \"978 0 596 52068 7\", True),\n        (types.isbn, \"9780596520687\", True),\n        (types.isbn, \"ISBN-10: 0-596-52068-9\", True),\n        (types.isbn, \"0-596-52068-9\", True),\n        (types.email, \"eitan@gmail.com\", True),\n        (types.email, \"99@yahoo.com\", True),\n        (types.email, \"eitan@.gmail.com\", False),\n        (types.email, \"myemail\", False),\n        (types.email, \"eitan@gmail\", False),\n        (types.email, \"eitan@my.custom.domain\", True),\n        (types.integer, \"-19\", True),\n        (types.integer, \"19\", True),\n        (types.integer, \"019\", False),\n        (types.integer, \"1.9\", False),\n        (types.integer, \"a\", False),\n        (types.boolean, \"True\", True),\n        (types.boolean, \"False\", True),\n        (types.boolean, \"true\", False),\n        (types.number, \"10\", True),\n        (types.number, \"10.9\", True),\n        (types.number, \"10.9e+3\", True),\n        (types.number, \"10.9e-3\", True),\n        (types.number, \"a\", False),\n        (types.date, \"2022-03-23\", True),\n        (types.date, \"2022-03-32\", False),\n        (types.date, \"2022-13-23\", False),\n        (types.date, \"32-03-2022\", False),\n        (types.time, \"01:23:59\", True),\n        (types.time, \"01:23:61\", False),\n        (types.time, \"01:61:59\", False),\n        (types.time, \"24:23:59\", False),\n        (types.sentence, \"The temperature is 23.5 degrees !\", True),\n        (types.sentence, \"Did you earn $1,234.56 last month  ?\", True),\n        (types.sentence, \"The #1 player scored 100 points .\", True),\n        (types.sentence, \"Hello @world, this is a test!\", True),\n        (types.sentence, \"invalid sentence.\", False),\n        (types.sentence, \"Invalid sentence\", False),\n        (types.paragraph, \"This is a paragraph!\\n\", True),\n        (types.paragraph, \"Line1\\nLine2\", False),\n        (types.paragraph, \"One sentence. Two sentences.\\n\\n\", True),\n        (types.paragraph, \"One sentence. invalid sentence.\", False),\n        (types.paragraph, \"One sentence. Invalid sentence\\n\", False),\n        (types.hex_str, \"0x123\", True),\n        (types.hex_str, \"0xABC\", True),\n        (types.hex_str, \"0xabc\", True),\n        (types.hex_str, \"0x123ABC\", True),\n        (types.hex_str, \"123\", True),\n        (types.hex_str, \"ABC\", True),\n        (types.hex_str, \"abc\", True),\n        (types.hex_str, \"123ABC\", True),\n        (types.hex_str, \"0xg123\", False),\n        (types.hex_str, \"0x\", False),\n        (types.hex_str, \"0x123G\", False),\n        (types.uuid4, \"123e4567-e89b-42d3-a456-426614174000\", True),\n        (types.uuid4, \"00000000-0000-4000-8000-000000000000\", True),\n        (types.uuid4, \"123e4567-e89b-12d3-a456-426614174000\", False),\n        (types.uuid4, \"123e4567-e89b-12d3-a456-42661417400\", False),\n        (types.uuid4, \"123e4567-e89b-12d3-a456-4266141740000\", False),\n        (types.uuid4, \"123e4567-e89b-12d3-x456-426614174000\", False),\n        (types.uuid4, \"123e4567-e89b-12d3-a456-42661417400g\", False),\n        (types.ipv4, \"192.168.1.1\", True),\n        (types.ipv4, \"10.0.0.1\", True),\n        (types.ipv4, \"172.16.0.1\", True),\n        (types.ipv4, \"255.255.255.255\", True),\n        (types.ipv4, \"0.0.0.0\", True),\n        (types.ipv4, \"256.1.2.3\", False),\n        (types.ipv4, \"1.256.2.3\", False),\n        (types.ipv4, \"1.2.256.3\", False),\n        (types.ipv4, \"1.2.3.256\", False),\n        (types.ipv4, \"1.2.3\", False),\n        (types.ipv4, \"1.2.3.4.5\", False),\n        (types.ipv4, \"1.2.3.4.\", False),\n        (types.ipv4, \".1.2.3.4\", False),\n        (types.ipv4, \"1..2.3.4\", False),\n    ],\n)\ndef test_type_regex(custom_type, test_string, should_match):\n    class Model(BaseModel):\n        attr: custom_type\n\n    schema = Model.model_json_schema()\n    assert schema[\"properties\"][\"attr\"][\"type\"] == \"string\"\n    regex_str = schema[\"properties\"][\"attr\"][\"pattern\"]\n    does_match = re.fullmatch(regex_str, test_string) is not None\n    assert does_match is should_match\n\n    regex_str = to_regex(custom_type)\n    does_match = re.fullmatch(regex_str, test_string) is not None\n    assert does_match is should_match\n\n\n@pytest.mark.parametrize(\n    \"custom_type,test_string,should_match\",\n    [\n        (types.airports.IATA, \"CDG\", True),\n        (types.airports.IATA, \"XXX\", False),\n        (types.countries.Alpha2, \"FR\", True),\n        (types.countries.Alpha2, \"XX\", False),\n        (types.countries.Alpha3, \"UKR\", True),\n        (types.countries.Alpha3, \"XXX\", False),\n        (types.countries.Numeric, \"004\", True),\n        (types.countries.Numeric, \"900\", False),\n        (types.countries.Name, \"Ukraine\", True),\n        (types.countries.Name, \"Wonderland\", False),\n        (types.countries.Flag, \"🇿🇼\", True),\n        (types.countries.Flag, \"🤗\", False),\n    ],\n)\ndef test_type_enum(custom_type, test_string, should_match):\n    type_name = custom_type.__name__\n\n    class Model(BaseModel):\n        attr: custom_type\n\n    schema = Model.model_json_schema()\n    assert isinstance(schema[\"$defs\"][type_name][\"enum\"], list)\n    does_match = test_string in schema[\"$defs\"][type_name][\"enum\"]\n    assert does_match is should_match\n\n    does_match = test_string in custom_type.__members__\n    assert does_match is should_match\n"
  },
  {
    "path": "tests/types/test_dsl.py",
    "content": "import datetime\nimport json\nimport re as _re\nimport sys\nimport tempfile\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import (\n    Literal,\n    Tuple,\n    Union,\n    get_args,\n    Optional as PyOptional\n)\n\nimport jsonschema\nimport pytest\nfrom genson import SchemaBuilder\nfrom pydantic import BaseModel\n\nfrom outlines import grammars, types\nfrom outlines.types.dsl import (\n    Alternatives,\n    JsonSchema,\n    KleenePlus,\n    KleeneStar,\n    Optional,\n    QuantifyBetween,\n    QuantifyExact,\n    QuantifyMaximum,\n    QuantifyMinimum,\n    Choice,\n    Regex,\n    Sequence,\n    String,\n    Term,\n    either,\n    CFG,\n    _handle_dict,\n    _handle_list,\n    _handle_literal,\n    _handle_tuple,\n    _handle_union,\n    _ensure_json_quoted,\n    json_schema,\n    one_or_more,\n    zero_or_more,\n    optional,\n    between,\n    at_most,\n    at_least,\n    exactly,\n    regex,\n    python_types_to_terms,\n    to_regex,\n)\nfrom outlines.types.utils import (\n    is_pydantic_model,\n    is_typed_dict,\n    is_dataclass,\n)\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\ndef test_dsl_init():\n    string = String(\"test\")\n    assert string.value == \"test\"\n    assert repr(string) == \"String(value='test')\"\n    assert string.display_ascii_tree() == \"└── String('test')\\n\"\n\n    choice = Choice([\"a\", \"b\"])\n    assert choice.items == [\"a\", \"b\"]\n    assert repr(choice) == \"Choice(items=['a', 'b'])\"\n    assert choice.display_ascii_tree() == \"└── Choice(['a', 'b'])\\n\"\n\n    regex = Regex(\"[0-9]\")\n    assert regex.pattern == \"[0-9]\"\n    assert repr(regex) == \"Regex(pattern='[0-9]')\"\n    assert regex.display_ascii_tree() == \"└── Regex('[0-9]')\\n\"\n\n    schema = JsonSchema('{ \"type\": \"string\" }')\n    assert schema.schema == '{ \"type\": \"string\" }'\n    assert repr(schema) == 'JsonSchema(schema=\\'{ \"type\": \"string\" }\\')'\n    assert schema.display_ascii_tree() == \"└── JsonSchema('{ \\\"type\\\": \\\"string\\\" }')\\n\"\n\n    kleene_star = KleeneStar(string)\n    assert kleene_star.term == string\n    assert repr(kleene_star) == \"KleeneStar(term=String(value='test'))\"\n    assert kleene_star.display_ascii_tree() == \"└── KleeneStar(*)\\n    └── String('test')\\n\"\n\n    kleene_plus = KleenePlus(string)\n    assert kleene_plus.term == string\n    assert repr(kleene_plus) == \"KleenePlus(term=String(value='test'))\"\n    assert kleene_plus.display_ascii_tree() == \"└── KleenePlus(+)\\n    └── String('test')\\n\"\n\n    optional = Optional(string)\n    assert optional.term == string\n    assert repr(optional) == \"Optional(term=String(value='test'))\"\n    assert optional.display_ascii_tree() == \"└── Optional(?)\\n    └── String('test')\\n\"\n\n    alternatives = Alternatives([string, regex])\n    assert alternatives.terms[0] == string\n    assert alternatives.terms[1] == regex\n    assert (\n        repr(alternatives)\n        == \"Alternatives(terms=[String(value='test'), Regex(pattern='[0-9]')])\"\n    )\n    assert alternatives.display_ascii_tree() == \"└── Alternatives(|)\\n    ├── String('test')\\n    └── Regex('[0-9]')\\n\"\n\n    sequence = Sequence([string, regex])\n    assert sequence.terms[0] == string\n    assert sequence.terms[1] == regex\n    assert (\n        repr(sequence)\n        == \"Sequence(terms=[String(value='test'), Regex(pattern='[0-9]')])\"\n    )\n    assert sequence.display_ascii_tree() == \"└── Sequence\\n    ├── String('test')\\n    └── Regex('[0-9]')\\n\"\n\n    exact = QuantifyExact(string, 3)\n    assert exact.term == string\n    assert exact.count == 3\n    assert repr(exact) == \"QuantifyExact(term=String(value='test'), count=3)\"\n    assert exact.display_ascii_tree() == \"└── Quantify({3})\\n    └── String('test')\\n\"\n\n    minimum = QuantifyMinimum(string, 3)\n    assert minimum.term == string\n    assert minimum.min_count == 3\n    assert repr(minimum) == \"QuantifyMinimum(term=String(value='test'), min_count=3)\"\n    assert minimum.display_ascii_tree() == \"└── Quantify({3,})\\n    └── String('test')\\n\"\n\n    maximum = QuantifyMaximum(string, 3)\n    assert maximum.term == string\n    assert maximum.max_count == 3\n    assert repr(maximum) == \"QuantifyMaximum(term=String(value='test'), max_count=3)\"\n    assert maximum.display_ascii_tree() == \"└── Quantify({,3})\\n    └── String('test')\\n\"\n\n    between = QuantifyBetween(string, 1, 3)\n    assert between.term == string\n    assert between.min_count == 1\n    assert between.max_count == 3\n    assert (\n        repr(between)\n        == \"QuantifyBetween(term=String(value='test'), min_count=1, max_count=3)\"\n    )\n    assert between.display_ascii_tree() == \"└── Quantify({1,3})\\n    └── String('test')\\n\"\n\n    with pytest.raises(\n        ValueError, match=\"`max_count` must be greater than `min_count`\"\n    ):\n        QuantifyBetween(string, 3, 1)\n\n\ndef test_dsl_term_methods():\n    a = String(\"a\")\n    b = Regex(\"[0-9]\")\n    c = \"c\"\n\n    assert a + b == Sequence([a, b])\n    assert a + c == Sequence([a, String(c)])\n    assert a.__radd__(b) == Sequence([b, a])\n    assert a.__radd__(c) == Sequence([String(c), a])\n\n    assert a | b == Alternatives([a, b])\n    assert a | c == Alternatives([a, String(c)])\n    assert a.__ror__(b) == Alternatives([b, a])\n    assert a.__ror__(c) == Alternatives([String(c), a])\n\n    core_schema = a.__get_pydantic_core_schema__(\"\", \"\")\n    validator = a.__get_validator__(core_schema)\n    assert validator(\"a\") == \"a\"\n    with pytest.raises(\n        ValueError,\n        match=\"Input should be in the language of the regular expression\",\n    ):\n        validator(\"b\")\n\n    assert a.__get_pydantic_json_schema__(\"\", \"\") == {\"type\": \"string\", \"pattern\": \"a\"}\n\n    assert a.matches(\"a\")\n    assert not a.matches(\"b\")\n\n    assert a.display_ascii_tree() == \"└── String('a')\\n\"\n\n    with pytest.raises(NotImplementedError):\n        Term()._display_node()\n\n    assert a._display_children(\"\") == \"\"\n\n    assert a.__str__() == \"└── String('a')\\n\"\n\ndef test_dsl_sequence():\n    a = String(\"a\")\n    b = String(\"b\")\n\n    sequence = a + b\n    assert isinstance(sequence, Sequence)\n    assert sequence.terms[0] == a\n    assert sequence.terms[1] == b\n\n    sequence = \"a\" + b\n    assert isinstance(sequence, Sequence)\n    assert isinstance(sequence.terms[0], String)\n    assert sequence.terms[0].value == \"a\"\n    assert sequence.terms[1].value == \"b\"\n\n    sequence = a + \"b\"\n    assert isinstance(sequence, Sequence)\n    assert isinstance(sequence.terms[1], String)\n    assert sequence.terms[0].value == \"a\"\n    assert sequence.terms[1].value == \"b\"\n\n\ndef test_dsl_alternatives():\n    a = String(\"a\")\n    b = String(\"b\")\n\n    alt = either(a, b)\n    assert isinstance(alt, Alternatives)\n    assert isinstance(alt.terms[0], String)\n    assert isinstance(alt.terms[1], String)\n\n    alt = either(\"a\", \"b\")\n    assert isinstance(alt, Alternatives)\n    assert isinstance(alt.terms[0], String)\n    assert isinstance(alt.terms[1], String)\n\n    alt = either(\"a\", b)\n    assert isinstance(alt, Alternatives)\n    assert isinstance(alt.terms[0], String)\n    assert isinstance(alt.terms[1], String)\n\n\ndef test_dsl_optional():\n    a = String(\"a\")\n\n    opt = a.optional()\n    assert isinstance(opt, Optional)\n\n    opt = optional(\"a\")\n    assert isinstance(opt, Optional)\n    assert isinstance(opt.term, String)\n\n    opt = a.optional()\n    assert isinstance(opt, Optional)\n\n\ndef test_dsl_exactly():\n    a = String(\"a\")\n\n    rep = a.exactly(2)\n    assert isinstance(rep, QuantifyExact)\n    assert rep.count == 2\n\n    rep = exactly(2, \"a\")\n    assert isinstance(rep, QuantifyExact)\n    assert isinstance(rep.term, String)\n\n    rep = a.exactly(2)\n    assert isinstance(rep, QuantifyExact)\n\n\ndef test_dsl_at_least():\n    a = String(\"a\")\n\n    rep = a.at_least(2)\n    assert isinstance(rep, QuantifyMinimum)\n    assert rep.min_count == 2\n\n    rep = at_least(2, \"a\")\n    assert isinstance(rep, QuantifyMinimum)\n    assert isinstance(rep.term, String)\n\n    rep = a.at_least(2)\n    assert isinstance(rep, QuantifyMinimum)\n\n\ndef test_dsl_at_most():\n    a = String(\"a\")\n\n    rep = a.at_most(2)\n    assert isinstance(rep, QuantifyMaximum)\n    assert rep.max_count == 2\n\n    rep = at_most(2, \"a\")\n    assert isinstance(rep, QuantifyMaximum)\n    assert isinstance(rep.term, String)\n\n    rep = a.at_most(2)\n    assert isinstance(rep, QuantifyMaximum)\n\n\ndef test_between():\n    a = String(\"a\")\n\n    rep = a.between(1, 2)\n    assert isinstance(rep, QuantifyBetween)\n    assert rep.min_count == 1\n    assert rep.max_count == 2\n\n    rep = between(1, 2, \"a\")\n    assert isinstance(rep, QuantifyBetween)\n    assert isinstance(rep.term, String)\n\n    rep = a.between(1, 2)\n    assert isinstance(rep, QuantifyBetween)\n\n\ndef test_dsl_zero_or_more():\n    a = String(\"a\")\n\n    rep = a.zero_or_more()\n    assert isinstance(rep, KleeneStar)\n\n    rep = zero_or_more(\"a\")\n    assert isinstance(rep, KleeneStar)\n    assert isinstance(rep.term, String)\n\n    rep = a.zero_or_more()\n    assert isinstance(rep, KleeneStar)\n\n\ndef test_dsl_one_or_more():\n    a = String(\"a\")\n\n    rep = a.one_or_more()\n    assert isinstance(rep, KleenePlus)\n\n    rep = one_or_more(\"a\")\n    assert isinstance(rep, KleenePlus)\n    assert isinstance(rep.term, String)\n\n    rep = a.zero_or_more()\n    assert isinstance(rep, KleeneStar)\n\n\ndef test_dsl_aliases():\n    test = regex(\"[0-9]\")\n    assert isinstance(test, Regex)\n\n    test = json_schema('{\"type\": \"string\"}')\n    assert isinstance(test, JsonSchema)\n\n\ndef test_dsl_term_pydantic_simple():\n    a = String(\"a\")\n\n    class Model(BaseModel):\n        field: a\n\n    schema = Model.model_json_schema()\n    assert schema == {\n        \"properties\": {\"field\": {\"pattern\": \"a\", \"title\": \"Field\", \"type\": \"string\"}},\n        \"required\": [\"field\"],\n        \"title\": \"Model\",\n        \"type\": \"object\",\n    }\n\n\ndef test_dsl_term_pydantic_combination():\n    a = String(\"a\")\n    b = String(\"b\")\n    c = String(\"c\")\n\n    class Model(BaseModel):\n        field: either((a + b), c)\n\n    schema = Model.model_json_schema()\n    assert schema == {\n        \"properties\": {\n            \"field\": {\"pattern\": \"(ab|c)\", \"title\": \"Field\", \"type\": \"string\"}\n        },\n        \"required\": [\"field\"],\n        \"title\": \"Model\",\n        \"type\": \"object\",\n    }\n\n\ndef test_dsl_display():\n    a = String(\"a\")\n    b = String(\"b\")\n    c = Regex(\"[0-9]\")\n    d = Sequence([KleeneStar(Alternatives([a, b])), c])\n\n    tree = str(d)\n    assert (\n        tree\n        == \"└── Sequence\\n    ├── KleeneStar(*)\\n    │   └── Alternatives(|)\\n    │       ├── String('a')\\n    │       └── String('b')\\n    └── Regex('[0-9]')\\n\"\n    )\n\n\ndef test_cfg():\n    cfg_string = \"\"\"\n?start: expr\n?expr: NUMBER\n\"\"\"\n    cfg = types.cfg(cfg_string)\n    assert isinstance(cfg, CFG)\n    assert cfg.definition.strip() == \"?start: expr\\n?expr: NUMBER\"\n    assert cfg._display_node() == \"CFG('\\n?start: expr\\n?expr: NUMBER\\n')\"\n    assert cfg.__repr__() == \"CFG(definition='\\n?start: expr\\n?expr: NUMBER\\n')\"\n    assert cfg == types.cfg(cfg_string)\n    assert not cfg == \"a\"\n\n\ndef test_json_schema():\n    # variables to be used in the tests\n    json_schema = types.json_schema('{\"type\": \"object\", \"properties\": {\"foo\": {\"type\": \"string\"}, \"bar\": {\"type\": \"integer\"}}, \"required\": [\"foo\"]}')\n    schema_builder_instance = SchemaBuilder()\n    schema_builder_instance.add_schema({\"type\": \"object\", \"properties\": {\"foo\": {\"type\": \"string\"}, \"bar\": {\"type\": \"integer\"}}, \"required\": [\"foo\"]})\n    class MyPydanticModel(BaseModel):\n        foo: str\n        bar: PyOptional[int] = None\n    class MyTypedDict(TypedDict):\n        foo: str\n        bar: int\n    @dataclass\n    class MyDataClass:\n        foo: str\n        bar: PyOptional[int] = None\n\n    # init dict\n    schema = types.json_schema({\"type\": \"string\"})\n    assert schema.schema == '{\"type\": \"string\"}'\n\n    # init str\n    schema = types.json_schema('{\"type\": \"string\"}')\n    assert schema.schema == '{\"type\": \"string\"}'\n\n    # init Pydantic model\n    schema = types.json_schema(MyPydanticModel)\n    assert schema.schema == '{\"properties\": {\"foo\": {\"title\": \"Foo\", \"type\": \"string\"}, \"bar\": {\"anyOf\": [{\"type\": \"integer\"}, {\"type\": \"null\"}], \"default\": null, \"title\": \"Bar\"}}, \"required\": [\"foo\"], \"title\": \"MyPydanticModel\", \"type\": \"object\"}'\n\n    # init TypedDict\n    schema = types.json_schema(MyTypedDict)\n    assert schema.schema == '{\"properties\": {\"foo\": {\"title\": \"Foo\", \"type\": \"string\"}, \"bar\": {\"title\": \"Bar\", \"type\": \"integer\"}}, \"required\": [\"foo\", \"bar\"], \"title\": \"MyTypedDict\", \"type\": \"object\"}'\n\n    # init dataclass\n    schema = types.json_schema(MyDataClass)\n    assert schema.schema == '{\"properties\": {\"foo\": {\"title\": \"Foo\", \"type\": \"string\"}, \"bar\": {\"anyOf\": [{\"type\": \"integer\"}, {\"type\": \"null\"}], \"default\": null, \"title\": \"Bar\"}}, \"required\": [\"foo\"], \"title\": \"MyDataClass\", \"type\": \"object\"}'\n\n    # init SchemaBuilder\n    schema = types.json_schema(schema_builder_instance)\n    assert schema.schema == '{\"$schema\": \"http://json-schema.org/schema#\", \"type\": \"object\", \"properties\": {\"foo\": {\"type\": \"string\"}, \"bar\": {\"type\": \"integer\"}}, \"required\": [\"foo\"]}'\n\n    # init unsupported type\n    with pytest.raises(ValueError, match=\"Cannot parse schema\"):\n        types.json_schema(1)\n\n    # init invalide JSON schema\n    with pytest.raises(jsonschema.exceptions.SchemaError):\n        types.json_schema({\"type\": \"strin\"})\n\n    # is_json_schema\n    assert not JsonSchema.is_json_schema(None)\n    assert not JsonSchema.is_json_schema('{\"type\": \"string\"}')\n    assert not JsonSchema.is_json_schema({\"type\": \"string\"})\n    assert JsonSchema.is_json_schema(json_schema)\n    assert JsonSchema.is_json_schema(schema_builder_instance)\n    assert JsonSchema.is_json_schema(MyPydanticModel)\n    assert JsonSchema.is_json_schema(MyTypedDict)\n    assert JsonSchema.is_json_schema(MyDataClass)\n\n    # convert_to\n    assert JsonSchema.convert_to(json_schema, [\"str\"]) == json_schema.schema\n    assert JsonSchema.convert_to(json_schema, [\"dict\"]) == json.loads(json_schema.schema)\n    assert JsonSchema.convert_to(MyPydanticModel, [\"pydantic\"]) == MyPydanticModel\n    assert JsonSchema.convert_to(MyTypedDict, [\"typeddict\"]) == MyTypedDict\n    assert JsonSchema.convert_to(MyDataClass, [\"dataclass\"]) == MyDataClass\n    assert JsonSchema.convert_to(schema_builder_instance, [\"genson\"]) == schema_builder_instance\n    assert JsonSchema.convert_to(MyPydanticModel, [\"str\"]) == JsonSchema(MyPydanticModel).schema\n    assert JsonSchema.convert_to(MyPydanticModel, [\"dict\"]) == json.loads(JsonSchema(MyPydanticModel).schema)\n    assert is_pydantic_model(JsonSchema.convert_to(json_schema, [\"pydantic\"]))\n    assert is_typed_dict(JsonSchema.convert_to(json_schema, [\"typeddict\"]))\n    assert is_dataclass(JsonSchema.convert_to(json_schema, [\"dataclass\"]))\n    with pytest.raises(ValueError, match=\"Cannot convert schema type\"):\n        JsonSchema.convert_to(json_schema, [\"genson\"])\n\n    # other methods\n    schema = types.json_schema('{\"type\": \"string\"}')\n    assert schema._display_node() == \"JsonSchema('{\\\"type\\\": \\\"string\\\"}')\"\n    assert schema.__repr__() == \"JsonSchema(schema='{\\\"type\\\": \\\"string\\\"}')\"\n    assert schema == types.json_schema('{\"type\": \"string\"}')\n    assert not schema == \"a\"\n\n\ndef test_dsl_cfg_from_file():\n    grammar_content = \"\"\"\n    ?start: expression\n    ?expression: term ((\"+\" | \"-\") term)*\n    ?term: factor ((\"*\" | \"/\") factor)*\n    ?factor: NUMBER\n    \"\"\"\n    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=True) as temp_file:\n        temp_file.write(grammar_content)\n        temp_file.flush()\n        temp_file_path = temp_file.name\n        cfg = CFG.from_file(temp_file_path)\n        assert cfg == CFG(grammar_content)\n\n\ndef test_dsl_json_schema_from_file():\n    schema_content = \"\"\"\n    {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\n                \"type\": \"string\"\n            }\n        }\n    }\n    \"\"\"\n    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=True) as temp_file:\n        temp_file.write(schema_content)\n        temp_file.flush()\n        temp_file_path = temp_file.name\n        schema = JsonSchema.from_file(temp_file_path)\n        assert schema == JsonSchema(schema_content)\n\n\ndef test_dsl_python_types_to_terms():\n    with pytest.raises(RecursionError):\n        python_types_to_terms(None, 11)\n\n    term = Term()\n    assert python_types_to_terms(term) == term\n\n    assert python_types_to_terms(int) == types.integer\n    assert python_types_to_terms(float) == types.number\n    assert python_types_to_terms(bool) == types.boolean\n    assert python_types_to_terms(str) == types.string\n    assert python_types_to_terms(datetime.time) == types.time\n    assert python_types_to_terms(datetime.date) == types.date\n    assert python_types_to_terms(datetime.datetime) == types.datetime\n    assert python_types_to_terms(dict) == types.CFG(grammars.json)\n\n    string_instance = \"a\"\n    assert python_types_to_terms(string_instance) == String(string_instance)\n    int_instance = 1\n    assert python_types_to_terms(int_instance) == Regex(r\"1\")\n    float_instance = 1.0\n    assert python_types_to_terms(float_instance) == Regex(r\"1.0\")\n\n    @dataclass\n    class DataClass:\n        a: int\n        b: str\n\n    assert python_types_to_terms(DataClass) == JsonSchema(\n        {\n            \"properties\": {\"a\": {\"title\": \"A\", \"type\": \"integer\"}, \"b\": {\"title\": \"B\", \"type\": \"string\"}},\n            \"required\": [\"a\", \"b\"],\n            \"title\": \"DataClass\",\n            \"type\": \"object\",\n        }\n    )\n\n    class SomeTypedDict(TypedDict):\n        a: int\n        b: str\n\n    assert python_types_to_terms(SomeTypedDict) == JsonSchema(\n        {\n            \"properties\": {\"a\": {\"title\": \"A\", \"type\": \"integer\"}, \"b\": {\"title\": \"B\", \"type\": \"string\"}},\n            \"required\": [\"a\", \"b\"],\n            \"title\": \"SomeTypedDict\",\n            \"type\": \"object\",\n        }\n    )\n\n    class PydanticModel(BaseModel):\n        a: int\n        b: str\n\n    assert python_types_to_terms(PydanticModel) == JsonSchema(\n        {\n            \"properties\": {\"a\": {\"title\": \"A\", \"type\": \"integer\"}, \"b\": {\"title\": \"B\", \"type\": \"string\"}},\n            \"required\": [\"a\", \"b\"],\n            \"title\": \"PydanticModel\",\n            \"type\": \"object\",\n        }\n    )\n\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n    assert python_types_to_terms(builder) == JsonSchema(\n        {\n            \"$schema\": \"http://json-schema.org/schema#\",\n            \"type\": \"object\",\n            \"properties\": {\"hi\": {\"type\": [\"integer\", \"string\"]}},\n            \"required\": [\"hi\"]\n        }\n    )\n\n    def func(a: int, b: str):\n        return (a, b)\n\n    assert python_types_to_terms(func) == JsonSchema(\n        {\n            \"type\": \"object\",\n            \"properties\": {\n                \"a\": {\"title\": \"A\", \"type\": \"integer\"},\n                \"b\": {\"title\": \"B\", \"type\": \"string\"},\n            },\n            \"required\": [\"a\", \"b\"],\n            \"title\": \"func\",\n        }\n    )\n\n    class SomeEnum(Enum):\n        a = \"a\"\n        b = int\n        c = func\n\n    result = python_types_to_terms(SomeEnum)\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 3\n    assert result.terms[0] == String(\"a\")\n    assert result.terms[1] == types.integer\n    assert isinstance(result.terms[2], JsonSchema)\n    schema_dict = json.loads(result.terms[2].schema)\n    assert schema_dict == {\n        \"properties\": {\n            \"a\": {\"title\": \"A\", \"type\": \"integer\"},\n            \"b\": {\"title\": \"B\", \"type\": \"string\"},\n        },\n        \"required\": [\"a\", \"b\"],\n        \"title\": \"func\",\n        \"type\": \"object\",\n    }\n\n    # for generic types we only test the dispatch as the functions that\n    # convert to terms are tested in distinct tests below\n    assert python_types_to_terms(Literal[\"a\", \"b\"]) == _handle_literal((\"a\", \"b\"))\n    assert python_types_to_terms(Union[int, str]) == _handle_union((int, str), recursion_depth=0)\n    assert python_types_to_terms(list[int]) == _handle_list((int,), recursion_depth=0)\n    assert python_types_to_terms(tuple[int, str]) == _handle_tuple((int, str), recursion_depth=0)\n    assert python_types_to_terms(dict[int, str]) == _handle_dict((int, str), recursion_depth=0)\n\n    # type not supported\n    with pytest.raises(TypeError, match=\"is currently not supported\"):\n        python_types_to_terms(bytes)\n\n\ndef test_dsl_handle_literal():\n    literal = Literal[\"a\", 1]\n    result = _handle_literal(get_args(literal))\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 2\n    assert result.terms[0] == String(\"a\")\n    assert result.terms[1] == Regex(r\"1\")\n\n\ndef test_dsl_handle_union():\n    # test simple Union\n    simple_union = Union[int, str]\n    result = _handle_union(get_args(simple_union), recursion_depth=0)\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 2\n    assert result.terms[0] == types.integer\n    assert result.terms[1] == types.string\n\n    # test with Optional[T]\n    optional_type = PyOptional[int]\n    result = _handle_union(get_args(optional_type), recursion_depth=0)\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 2\n    assert result.terms[0] == types.integer\n    assert result.terms[1] == String(\"None\")\n\n    # test with more complex types\n    class TestModel(BaseModel):\n        field: str\n\n    class TestEnum(Enum):\n        a = \"a\"\n        b = \"b\"\n\n    complex_union = Union[TestModel, TestEnum]\n    result = _handle_union(get_args(complex_union), recursion_depth=0)\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 2\n    assert isinstance(result.terms[0], JsonSchema)\n    assert isinstance(result.terms[1], Alternatives)\n    assert len(result.terms[1].terms) == 2\n    assert result.terms[1].terms[0] == String(\"a\")\n    assert result.terms[1].terms[1] == String(\"b\")\n\n\ndef test_dsl_handle_list():\n    with pytest.raises(TypeError):\n        _handle_list(None, recursion_depth=0)\n\n    with pytest.raises(TypeError):\n        _handle_list((), recursion_depth=0)\n\n    with pytest.raises(TypeError):\n        _handle_list((int, str), recursion_depth=0)\n\n    # simple type\n    list_type = list[int]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 4\n    assert result.terms[0] == String(\"[\")\n    assert result.terms[1] == types.integer\n    assert isinstance(result.terms[2], KleeneStar)\n    assert result.terms[2].term == Sequence([String(\", \"), types.integer])\n    assert result.terms[3] == String(\"]\")\n\n    # more complex type\n    list_type = list[Union[int, str]]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 4\n    assert result.terms[0] == String(\"[\")\n    assert result.terms[1] == _handle_union(get_args(Union[int, str]), recursion_depth=0)\n    assert isinstance(result.terms[2], KleeneStar)\n    assert result.terms[2].term == Sequence([String(\", \"), _handle_union(get_args(Union[int, str]), recursion_depth=0)])\n    assert result.terms[3] == String(\"]\")\n\n\ndef test_dsl_handle_tuple():\n    # empty tuple\n    tuple_type = Tuple[()]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, String)\n    assert result.value == \"()\"\n\n    # tuple with ellipsis\n    tuple_type = tuple[int, ...]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 4\n    assert result.terms[0] == String(\"(\")\n    assert result.terms[1] == types.integer\n    assert isinstance(result.terms[2], KleeneStar)\n    assert result.terms[2].term == Sequence([String(\", \"), types.integer])\n    assert result.terms[3] == String(\")\")\n\n    # tuple with fixed length\n    tuple_type = tuple[int, str]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 5\n    assert result.terms[0] == String(\"(\")\n    assert result.terms[1] == types.integer\n    assert result.terms[2] == String(\", \")\n    assert result.terms[3] == types.string\n    assert result.terms[4] == String(\")\")\n\n    # tuple with fixed length and complex types\n    tuple_type = tuple[int, Union[str, int]]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 5\n    assert result.terms[0] == String(\"(\")\n    assert result.terms[1] == types.integer\n    assert result.terms[2] == String(\", \")\n    assert result.terms[3] == _handle_union(get_args(Union[str, int]), recursion_depth=0)\n    assert result.terms[4] == String(\")\")\n\n\ndef test_dsl_handle_dict():\n    # args of incorrect length\n    with pytest.raises(TypeError):\n        incorrect_dict_type = dict[int, str, int]\n        _handle_dict(get_args(incorrect_dict_type), recursion_depth=0)\n\n    # correct type\n    dict_type = dict[int, str]\n    result = _handle_dict(get_args(dict_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert len(result.terms) == 3\n    assert result.terms[0] == String(\"{\")\n    assert isinstance(result.terms[1], Optional)\n    assert isinstance(result.terms[1].term, Sequence)\n    assert len(result.terms[1].term.terms) == 4\n    assert result.terms[1].term.terms[0] == types.integer\n    assert result.terms[1].term.terms[1] == String(\":\")\n    assert result.terms[1].term.terms[2] == types.string\n    assert result.terms[1].term.terms[3] == KleeneStar(Sequence([String(\", \"), types.integer, String(\":\"), types.string]))\n    assert result.terms[2] == String(\"}\")\n\n\ndef test_ensure_json_quoted_string():\n    \"\"\"String terms are wrapped in double-quote delimiters.\"\"\"\n    term = String(\"hello\")\n    result = _ensure_json_quoted(term)\n    assert isinstance(result, String)\n    assert result == String('\"hello\"')\n\n\ndef test_ensure_json_quoted_alternatives():\n    \"\"\"Each branch of an Alternatives is independently quoted.\"\"\"\n    term = Alternatives([String(\"a\"), String(\"b\")])\n    result = _ensure_json_quoted(term)\n    assert isinstance(result, Alternatives)\n    assert len(result.terms) == 2\n    for branch in result.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\ndef test_ensure_json_quoted_passthrough():\n    \"\"\"Non-String, non-Alternatives terms are returned unchanged.\"\"\"\n    regex_term = types.integer\n    assert _ensure_json_quoted(regex_term) is regex_term\n\n    seq = Sequence([String(\"a\"), String(\"b\")])\n    assert _ensure_json_quoted(seq) is seq\n\n\ndef test_list_of_literals_quoted():\n    \"\"\"Literal strings inside List are JSON-quoted.\"\"\"\n    list_type = list[Literal[\"cat\", \"dog\"]]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert result.terms[0] == String(\"[\")\n    item = result.terms[1]\n    assert isinstance(item, Alternatives)\n    for branch in item.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\ndef test_tuple_of_literals_quoted():\n    \"\"\"Literal strings inside fixed Tuple are JSON-quoted.\"\"\"\n    tuple_type = Tuple[Literal[\"x\"], Literal[\"y\"]]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    assert result.terms[0] == String(\"(\")\n    first_item = result.terms[1]\n    assert isinstance(first_item, Alternatives)\n    assert isinstance(first_item.terms[0], String)\n    assert first_item.terms[0].value.startswith('\"')\n\n\ndef test_dict_literal_key_quoted():\n    \"\"\"Literal string keys in Dict are JSON-quoted.\"\"\"\n    dict_type = dict[Literal[\"k1\", \"k2\"], int]\n    result = _handle_dict(get_args(dict_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    inner = result.terms[1]\n    assert isinstance(inner, Optional)\n    key_term = inner.term.terms[0]\n    assert isinstance(key_term, Alternatives)\n    for branch in key_term.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\ndef test_list_of_int_unchanged():\n    \"\"\"Non-string types in List are not wrapped in quotes.\"\"\"\n    list_type = list[int]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    assert result.terms[1] == types.integer\n\n\ndef test_ensure_json_quoted_sequence_passthrough():\n    \"\"\"A Sequence term (already structured) passes through unchanged.\"\"\"\n    seq = Sequence([String(\"a\"), String(\"b\")])\n    assert _ensure_json_quoted(seq) is seq\n\n\ndef test_ensure_json_quoted_regex_passthrough():\n    \"\"\"Regex terms (e.g. types.string) already include quotes internally.\"\"\"\n    assert _ensure_json_quoted(types.string) is types.string\n    assert _ensure_json_quoted(types.integer) is types.integer\n    assert _ensure_json_quoted(types.boolean) is types.boolean\n\n\ndef test_list_single_literal():\n    \"\"\"A single-variant Literal inside list is still quoted.\"\"\"\n    list_type = list[Literal[\"only\"]]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    item = result.terms[1]\n    assert isinstance(item, Alternatives)\n    branch = item.terms[0]\n    assert isinstance(branch, String)\n    assert branch == String('\"only\"')\n\n\ndef test_dict_literal_value_quoted():\n    \"\"\"Literal string values (not just keys) in Dict are JSON-quoted.\"\"\"\n    dict_type = dict[str, Literal[\"yes\", \"no\"]]\n    result = _handle_dict(get_args(dict_type), recursion_depth=0)\n    inner = result.terms[1]\n    assert isinstance(inner, Optional)\n    value_term = inner.term.terms[2]\n    assert isinstance(value_term, Alternatives)\n    for branch in value_term.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\ndef test_tuple_ellipsis_literal_quoted():\n    \"\"\"Variable-length Tuple with Literal element type is JSON-quoted.\"\"\"\n    tuple_type = Tuple[Literal[\"a\", \"b\"], ...]\n    result = _handle_tuple(get_args(tuple_type), recursion_depth=0)\n    assert isinstance(result, Sequence)\n    item = result.terms[1]\n    assert isinstance(item, Alternatives)\n    for branch in item.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\ndef test_list_of_bool_unchanged():\n    \"\"\"Boolean types in List are not wrapped in quotes.\"\"\"\n    list_type = list[bool]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    assert result.terms[1] == types.boolean\n\n\ndef test_dict_int_value_unchanged():\n    \"\"\"Non-string value type in Dict is not wrapped in quotes.\"\"\"\n    dict_type = dict[str, int]\n    result = _handle_dict(get_args(dict_type), recursion_depth=0)\n    inner = result.terms[1]\n    assert isinstance(inner, Optional)\n    value_term = inner.term.terms[2]\n    assert value_term == types.integer\n\n\ndef test_ensure_json_quoted_nested_alternatives():\n    \"\"\"Nested Alternatives are recursively quoted.\"\"\"\n    inner_alt = Alternatives([String(\"x\"), String(\"y\")])\n    outer_alt = Alternatives([inner_alt, String(\"z\")])\n    result = _ensure_json_quoted(outer_alt)\n    assert isinstance(result, Alternatives)\n    inner_result = result.terms[0]\n    assert isinstance(inner_result, Alternatives)\n    for branch in inner_result.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n    z_result = result.terms[1]\n    assert isinstance(z_result, String)\n    assert z_result == String('\"z\"')\n\n\ndef test_literal_with_special_characters():\n    \"\"\"Literal strings with spaces and punctuation are quoted correctly.\"\"\"\n    list_type = list[Literal[\"hello world\", \"foo-bar\"]]\n    result = _handle_list(get_args(list_type), recursion_depth=0)\n    item = result.terms[1]\n    assert isinstance(item, Alternatives)\n    assert len(item.terms) == 2\n    for branch in item.terms:\n        assert isinstance(branch, String)\n        assert branch.value.startswith('\"') and branch.value.endswith('\"')\n\n\n# ---------------------------------------------------------------------------\n# End-to-end regex tests for JSON quoting in containers\n# These verify the full pipeline: python_types_to_terms → to_regex → re.fullmatch\n# ---------------------------------------------------------------------------\n\n\ndef test_e2e_list_literal_matches_quoted_json():\n    \"\"\"List[Literal[...]] regex matches JSON-quoted strings and rejects bare words.\"\"\"\n    pattern = to_regex(python_types_to_terms(list[Literal[\"Paris\", \"London\"]]))\n    assert _re.fullmatch(pattern, '[\"Paris\"]')\n    assert _re.fullmatch(pattern, '[\"Paris\", \"London\"]')\n    assert _re.fullmatch(pattern, '[\"London\", \"Paris\", \"London\"]')\n    assert not _re.fullmatch(pattern, \"[Paris]\")\n    assert not _re.fullmatch(pattern, \"['Paris']\")\n\n\ndef test_e2e_standalone_literal_no_quotes():\n    \"\"\"Standalone Literal (not inside container) should NOT add quotes.\"\"\"\n    pattern = to_regex(python_types_to_terms(Literal[\"cat\", \"dog\"]))\n    assert _re.fullmatch(pattern, \"cat\")\n    assert _re.fullmatch(pattern, \"dog\")\n    assert not _re.fullmatch(pattern, '\"cat\"')\n\n\ndef test_e2e_list_literal_empty_string():\n    \"\"\"Empty string literal inside List produces quoted empty string.\"\"\"\n    pattern = to_regex(python_types_to_terms(list[Literal[\"\"]]))\n    assert _re.fullmatch(pattern, '[\"\"]')\n    assert _re.fullmatch(pattern, '[\"\", \"\"]')\n    assert not _re.fullmatch(pattern, \"[]\")\n\n\ndef test_e2e_list_mixed_literal_string_and_int():\n    \"\"\"Mixed Literal with string and int: only string values are quoted.\"\"\"\n    pattern = to_regex(python_types_to_terms(list[Literal[\"a\", 1]]))\n    assert _re.fullmatch(pattern, '[\"a\"]')\n    assert _re.fullmatch(pattern, \"[1]\")\n    assert _re.fullmatch(pattern, '[\"a\", 1]')\n    assert _re.fullmatch(pattern, '[1, \"a\"]')\n    assert not _re.fullmatch(pattern, \"[a]\")\n\n\ndef test_e2e_dict_literal_keys_quoted():\n    \"\"\"Dict with Literal keys produces JSON-quoted keys.\"\"\"\n    pattern = to_regex(python_types_to_terms(dict[Literal[\"k1\", \"k2\"], int]))\n    assert _re.fullmatch(pattern, '{\"k1\":0}')\n    assert _re.fullmatch(pattern, '{\"k1\":42, \"k2\":-7}')\n    assert not _re.fullmatch(pattern, \"{k1:0}\")\n\n\ndef test_e2e_dict_literal_values_quoted():\n    \"\"\"Dict with Literal string values produces JSON-quoted values.\"\"\"\n    pattern = to_regex(python_types_to_terms(dict[str, Literal[\"yes\", \"no\"]]))\n    assert _re.fullmatch(pattern, '{\"answer\":\"yes\"}')\n    assert _re.fullmatch(pattern, '{\"a\":\"yes\", \"b\":\"no\"}')\n\n\ndef test_e2e_tuple_fixed_literal_quoted():\n    \"\"\"Fixed-length Tuple with Literal elements produces JSON-quoted strings.\"\"\"\n    pattern = to_regex(python_types_to_terms(Tuple[Literal[\"x\"], Literal[\"y\"]]))\n    assert _re.fullmatch(pattern, '(\"x\", \"y\")')\n    assert not _re.fullmatch(pattern, \"(x, y)\")\n\n\ndef test_e2e_tuple_variadic_literal_quoted():\n    \"\"\"Variable-length Tuple with Literal produces JSON-quoted strings.\"\"\"\n    pattern = to_regex(python_types_to_terms(Tuple[Literal[\"a\", \"b\"], ...]))\n    assert _re.fullmatch(pattern, '(\"a\")')\n    assert _re.fullmatch(pattern, '(\"a\", \"b\", \"a\")')\n    assert not _re.fullmatch(pattern, \"(a)\")\n\n\ndef test_e2e_list_enum_string_values_quoted():\n    \"\"\"Enum with string members inside List produces JSON-quoted values.\"\"\"\n\n    class Color(Enum):\n        RED = \"red\"\n        BLUE = \"blue\"\n\n    pattern = to_regex(python_types_to_terms(list[Color]))\n    assert _re.fullmatch(pattern, '[\"red\"]')\n    assert _re.fullmatch(pattern, '[\"red\", \"blue\"]')\n    assert not _re.fullmatch(pattern, \"[red]\")\n\n\ndef test_e2e_list_int_not_quoted():\n    \"\"\"List[int] should not have any quoting applied.\"\"\"\n    pattern = to_regex(python_types_to_terms(list[int]))\n    assert _re.fullmatch(pattern, \"[42]\")\n    assert _re.fullmatch(pattern, \"[1, 2, 3]\")\n    assert not _re.fullmatch(pattern, '[\"1\"]')\n\n\ndef test_e2e_list_literal_special_characters():\n    \"\"\"Literal strings with spaces and hyphens are quoted correctly in regex.\"\"\"\n    pattern = to_regex(python_types_to_terms(list[Literal[\"hello world\", \"foo-bar\"]]))\n    assert _re.fullmatch(pattern, '[\"hello world\"]')\n    assert _re.fullmatch(pattern, '[\"hello world\", \"foo-bar\"]')\n    assert not _re.fullmatch(pattern, \"[hello world]\")\n\n\ndef test_e2e_dict_literal_key_and_enum_value():\n    \"\"\"Dict with Literal keys and Enum values: both quoted.\"\"\"\n\n    class Status(Enum):\n        ON = \"on\"\n        OFF = \"off\"\n\n    pattern = to_regex(python_types_to_terms(dict[Literal[\"switch\"], Status]))\n    assert _re.fullmatch(pattern, '{\"switch\":\"on\"}')\n    assert _re.fullmatch(pattern, '{\"switch\":\"off\"}')\n    assert not _re.fullmatch(pattern, \"{switch:on}\")\n\n\ndef test_to_regex():\n    string_term = String(\"hello\")\n    assert to_regex(string_term) == r\"hello\"\n\n    regex_term = Regex(\"[0-9]+\")\n    assert to_regex(regex_term) == r\"([0-9]+)\"\n\n    json_schema_term = JsonSchema({\"type\": \"integer\"})\n    assert to_regex(json_schema_term) == r\"((-)?(0|[1-9][0-9]*))\"\n\n    choice_term = Choice([\"a\", \"b\", \"c\"])\n    assert to_regex(choice_term) == r\"(a|b|c)\"\n\n    kleene_star = KleeneStar(String(\"a\"))\n    assert to_regex(kleene_star) == r\"(a)*\"\n\n    kleene_plus = KleenePlus(String(\"a\"))\n    assert to_regex(kleene_plus) == r\"(a)+\"\n\n    optional_term = Optional(String(\"a\"))\n    assert to_regex(optional_term) == r\"(a)?\"\n\n    alt_term = Alternatives([String(\"a\"), String(\"b\")])\n    assert to_regex(alt_term) == r\"(a|b)\"\n\n    seq_term = Sequence([String(\"a\"), String(\"b\")])\n    assert to_regex(seq_term) == r\"ab\"\n\n    exact_term = QuantifyExact(String(\"a\"), 3)\n    assert to_regex(exact_term) == r\"(a){3}\"\n\n    min_term = QuantifyMinimum(String(\"a\"), 2)\n    assert to_regex(min_term) == r\"(a){2,}\"\n\n    max_term = QuantifyMaximum(String(\"a\"), 5)\n    assert to_regex(max_term) == r\"(a){,5}\"\n\n    between_term = QuantifyBetween(String(\"a\"), 1, 3)\n    assert to_regex(between_term) == r\"(a){1,3}\"\n\n    with pytest.raises(TypeError):\n        to_regex(Term())\n"
  },
  {
    "path": "tests/types/test_json_schema_utils.py",
    "content": "import sys\nfrom dataclasses import is_dataclass\nfrom typing import Any, List, Literal, Optional\n\nfrom pydantic import BaseModel, TypeAdapter\nfrom pydantic_core import PydanticUndefined\n\nfrom outlines.types.json_schema_utils import (\n    schema_type_to_python,\n    json_schema_dict_to_typeddict,\n    json_schema_dict_to_pydantic,\n    json_schema_dict_to_dataclass\n)\n\nif sys.version_info >= (3, 12):\n    from typing import _TypedDictMeta  # type: ignore\nelse:\n    from typing_extensions import _TypedDictMeta  # type: ignore\n\n\ndef test_schema_type_to_python_simple_types():\n    assert schema_type_to_python({\"type\": \"string\"}, \"pydantic\") is str\n    assert schema_type_to_python({\"type\": \"integer\"}, \"pydantic\") is int\n    assert schema_type_to_python({\"type\": \"number\"}, \"pydantic\") is float\n    assert schema_type_to_python({\"type\": \"boolean\"}, \"pydantic\") is bool\n    assert schema_type_to_python({\"type\": \"object\"}, \"foo\") is Any\n    assert schema_type_to_python({}, \"pydantic\") is Any\n\n\ndef test_schema_type_to_python_enum():\n    schema = {\"enum\": [\"red\", \"green\", \"blue\"]}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == Literal[(\"red\", \"green\", \"blue\")]\n\n\ndef test_schema_type_to_python_array():\n    # String items\n    schema = {\"type\": \"array\", \"items\": {\"type\": \"string\"}}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == List[str]\n\n    # Integer items\n    schema = {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == List[int]\n\n    # Without items specification\n    schema = {\"type\": \"array\"}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == List[Any]\n\n\ndef test_schema_type_to_python_object():\n    schema = {\n        \"type\": \"object\",\n        \"title\": \"TestObject\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"}\n        },\n        \"required\": [\"name\"]\n    }\n\n    # Pydantic caller\n    pydantic_result = schema_type_to_python(schema, \"pydantic\")\n    assert issubclass(pydantic_result, BaseModel)\n    assert pydantic_result.__name__ == \"TestObject\"\n    assert pydantic_result.model_fields[\"name\"].annotation is str\n    assert pydantic_result.model_fields[\"age\"].annotation == Optional[int]\n\n    # Typeddict caller\n    typeddict_result = schema_type_to_python(schema, \"typeddict\")\n    assert isinstance(typeddict_result, _TypedDictMeta)\n    assert typeddict_result.__name__ == \"TestObject\"\n    assert typeddict_result.__annotations__[\"name\"] is str\n    assert typeddict_result.__annotations__[\"age\"] == Optional[int]\n\n    # Dataclass caller\n    dataclass_result = schema_type_to_python(schema, \"dataclass\")\n    print(TypeAdapter(dataclass_result).json_schema())\n    assert hasattr(dataclass_result, \"__dataclass_fields__\")\n    assert dataclass_result.__annotations__[\"name\"] is str\n    assert not hasattr(dataclass_result, \"name\")\n    assert dataclass_result.__annotations__[\"age\"] is int\n    assert dataclass_result.age is None\n\n\ndef test_schema_type_to_python_unknown_type():\n    # Unknown type\n    schema = {\"type\": \"unknown\"}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == Any\n\n    # Schema without type\n    schema = {}\n    result = schema_type_to_python(schema, \"pydantic\")\n    assert result == Any\n\n\ndef test_json_schema_dict_to_typeddict_basic():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"}\n        },\n        \"required\": [\"name\"]\n    }\n\n    result = json_schema_dict_to_typeddict(schema, \"Person\")\n    assert isinstance(result, _TypedDictMeta)\n    assert result.__name__ == \"Person\"\n\n    annotations = result.__annotations__\n    assert annotations[\"name\"] is str\n    assert annotations[\"age\"] == Optional[int]\n\n\ndef test_json_schema_dict_to_typeddict_array_enum():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"tags\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"}\n            },\n            \"preferences\": {\n                \"enum\": [\"light\", \"dark\"]\n            }\n        },\n        \"required\": [\"tags\"]\n    }\n\n    result = json_schema_dict_to_typeddict(schema)\n    assert isinstance(result, _TypedDictMeta)\n    assert result.__name__ == \"AnonymousTypedDict\"\n\n    annotations = result.__annotations__\n    assert annotations[\"tags\"] == List[str]\n    assert annotations[\"preferences\"] == Optional[Literal[(\"light\", \"dark\")]]\n\n\ndef test_json_schema_dict_to_typeddict_nested_object():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"field\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"name\": {\"type\": \"string\"},\n                    \"age\": {\"type\": \"integer\"}\n                },\n                \"required\": [\"name\"]\n            }\n        },\n        \"required\": [\"field\"]\n    }\n\n    result = json_schema_dict_to_typeddict(schema)\n    assert isinstance(result, _TypedDictMeta)\n    assert result.__name__ == \"AnonymousTypedDict\"\n\n    annotations = result.__annotations__\n    assert isinstance(annotations[\"field\"], _TypedDictMeta)\n    assert annotations[\"field\"].__name__ == \"AnonymousTypedDict\"\n    assert annotations[\"field\"].__annotations__[\"name\"] is str\n    assert annotations[\"field\"].__annotations__[\"age\"] == Optional[int]\n\n\ndef test_json_schema_dict_to_pydantic_basic():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"}\n        },\n        \"required\": [\"name\"]\n    }\n\n    result = json_schema_dict_to_pydantic(schema, \"Person\")\n    assert issubclass(result, BaseModel)\n    assert result.__name__ == \"Person\"\n\n    assert result.model_fields[\"name\"].annotation is str\n    assert result.model_fields[\"age\"].annotation == Optional[int]\n    assert result.model_fields[\"name\"].default == PydanticUndefined\n    result.model_fields[\"age\"].default is None\n\n\ndef test_json_schema_dict_to_pydantic_array_enum():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"tags\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"}\n            },\n            \"status\": {\n                \"enum\": [\"active\", \"inactive\", \"pending\"]\n            },\n        },\n        \"required\": [\"status\"]\n    }\n\n    result = json_schema_dict_to_pydantic(schema)\n    assert issubclass(result, BaseModel)\n    assert result.__name__ == \"AnonymousPydanticModel\"\n\n    assert result.model_fields[\"tags\"].annotation == Optional[List[str]]\n    assert result.model_fields[\"status\"].annotation == Literal[(\"active\", \"inactive\", \"pending\")]\n    assert result.model_fields[\"tags\"].default is None\n    assert result.model_fields[\"status\"].default == PydanticUndefined\n\n\ndef test_json_schema_dict_to_pydantic_nested_object():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"field\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"name\": {\"type\": \"string\"},\n                    \"age\": {\"type\": \"integer\"}\n                },\n                \"required\": [\"name\"]\n            }\n        },\n        \"required\": [\"field\"]\n    }\n\n    result = json_schema_dict_to_pydantic(schema)\n    assert issubclass(result, BaseModel)\n    assert result.__name__ == \"AnonymousPydanticModel\"\n\n    assert issubclass(result.model_fields[\"field\"].annotation, BaseModel)\n    assert result.model_fields[\"field\"].annotation.__name__ == \"AnonymousPydanticModel\"\n\n    field = result.model_fields[\"field\"].annotation\n    assert field.model_fields[\"name\"].annotation is str\n    assert field.model_fields[\"age\"].annotation == Optional[int]\n    assert field.model_fields[\"name\"].default == PydanticUndefined\n    assert field.model_fields[\"age\"].default is None\n\n\ndef test_json_schema_dict_to_dataclass_basic():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"name\": {\"type\": \"string\"},\n            \"age\": {\"type\": \"integer\"}\n        },\n        \"required\": [\"name\"]\n    }\n\n    result = json_schema_dict_to_dataclass(schema, \"Person\")\n    assert is_dataclass(result)\n    assert result.__name__ == \"Person\"\n\n    annotations = result.__annotations__\n    assert annotations[\"name\"] is str\n    assert annotations[\"age\"] is int\n    assert not hasattr(result, \"name\")\n    assert result.age is None\n\n\ndef test_json_schema_dict_to_dataclass_array_enum():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"status\": {\n                \"enum\": [\"active\", \"inactive\", \"pending\"]\n            },\n            \"tags\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"}\n            },\n        },\n        \"required\": [\"status\"]\n    }\n\n    result = json_schema_dict_to_dataclass(schema)\n    assert is_dataclass(result)\n    assert result.__name__ == \"AnonymousDataclass\"\n\n    annotations = result.__annotations__\n    assert annotations[\"tags\"] == List[str]\n    assert annotations[\"status\"] == Literal[(\"active\", \"inactive\", \"pending\")]\n    assert not hasattr(result, \"status\")\n    assert result.tags is None\n\n\ndef test_json_schema_dict_to_dataclass_nested_object():\n    schema = {\n        \"type\": \"object\",\n        \"properties\": {\n            \"field\": {\n                \"type\": \"object\",\n                \"properties\": {\n                    \"name\": {\"type\": \"string\"},\n                    \"age\": {\"type\": \"integer\"}\n                },\n                \"required\": [\"name\"]\n            }\n        },\n        \"required\": [\"field\"]\n    }\n\n    result = json_schema_dict_to_dataclass(schema)\n    assert is_dataclass(result)\n    assert result.__name__ == \"AnonymousDataclass\"\n\n    annotations = result.__annotations__\n    assert is_dataclass(annotations[\"field\"])\n    assert annotations[\"field\"].__name__ == \"AnonymousDataclass\"\n\n    field = annotations[\"field\"]\n    assert field.__annotations__[\"name\"] is str\n    assert field.__annotations__[\"age\"] is int\n    assert not hasattr(field, \"name\")\n    assert field.age is None\n"
  },
  {
    "path": "tests/types/test_to_regex.py",
    "content": "import pytest\n\n\nfrom outlines.types.dsl import (\n    Choice,\n    String,\n    Regex,\n    JsonSchema,\n    KleeneStar,\n    KleenePlus,\n    QuantifyBetween,\n    QuantifyExact,\n    QuantifyMaximum,\n    QuantifyMinimum,\n    Sequence,\n    Alternatives,\n    Optional,\n    Term,\n    to_regex,\n)\n\n\ndef test_to_regex_simple():\n    a = String(\"a\")\n    assert to_regex(a) == \"a\"\n    assert a.matches(\"a\") is True\n\n    a = Regex(\"[0-9]\")\n    assert to_regex(a) == \"([0-9])\"\n    assert a.matches(0) is True\n    assert a.matches(10) is False\n    assert a.matches(\"a\") is False\n\n    a = JsonSchema({\"type\": \"integer\"})\n    assert to_regex(a) == r\"((-)?(0|[1-9][0-9]*))\"\n    assert a.matches(1) is True\n    assert a.matches(\"1\") is True\n    assert a.matches(\"a\") is False\n\n    a = Choice([\"a\", \"b\"])\n    assert to_regex(a) == \"(a|b)\"\n    assert a.matches(\"a\") is True\n    assert a.matches(\"b\") is True\n    assert a.matches(\"c\") is False\n\n    a = Optional(String(\"a\"))\n    assert to_regex(a) == \"(a)?\"\n    assert a.matches(\"\") is True\n    assert a.matches(\"a\") is True\n\n    a = KleeneStar(String(\"a\"))\n    assert to_regex(a) == \"(a)*\"\n    assert a.matches(\"\") is True\n    assert a.matches(\"a\") is True\n    assert a.matches(\"aaaaa\") is True\n\n    a = KleenePlus(String(\"a\"))\n    assert to_regex(a) == \"(a)+\"\n    assert a.matches(\"\") is False\n    assert a.matches(\"a\") is True\n    assert a.matches(\"aaaaa\") is True\n\n    a = QuantifyExact(String(\"a\"), 2)\n    assert to_regex(a) == \"(a){2}\"\n    assert a.matches(\"a\") is False\n    assert a.matches(\"aa\") is True\n    assert a.matches(\"aaa\") is False\n\n    a = QuantifyMinimum(String(\"a\"), 2)\n    assert to_regex(a) == \"(a){2,}\"\n    assert a.matches(\"a\") is False\n    assert a.matches(\"aa\") is True\n    assert a.matches(\"aaa\") is True\n\n    a = QuantifyMaximum(String(\"a\"), 2)\n    assert to_regex(a) == \"(a){,2}\"\n    assert a.matches(\"aa\") is True\n    assert a.matches(\"aaa\") is False\n\n    a = QuantifyBetween(String(\"a\"), 1, 2)\n    assert to_regex(a) == \"(a){1,2}\"\n    assert a.matches(\"\") is False\n    assert a.matches(\"a\") is True\n    assert a.matches(\"aa\") is True\n    assert a.matches(\"aaa\") is False\n\n    with pytest.raises(TypeError, match=\"Cannot convert\"):\n        to_regex(Term())\n\n\ndef test_to_regex_combinations():\n    a = Sequence([Regex(\"dog|cat\"), String(\"fish\")])\n    assert to_regex(a) == \"(dog|cat)fish\"\n"
  },
  {
    "path": "tests/types/test_types_utils.py",
    "content": "import datetime\nimport pytest\nimport sys\nfrom dataclasses import dataclass\nfrom enum import Enum\nif sys.version_info >= (3, 11):\n    from enum import member\nelse:\n    # Python < 3.11 doesn't have enum.member, but also doesn't warn about partial in enums\n    def member(x):  # type: ignore[no-redef]\n        return x\nfrom functools import partial\nfrom typing import (\n    Annotated,\n    Any,\n    Dict,\n    List,\n    Literal,\n    NewType,\n    Optional,\n    Tuple,\n    Union\n)\n\nfrom genson import SchemaBuilder\nfrom pydantic import BaseModel\n\nfrom outlines.types.dsl import Choice, JsonSchema\nfrom outlines.types.utils import (\n    get_enum_from_choice,\n    get_enum_from_literal,\n    get_schema_from_enum,\n    get_schema_from_signature,\n    is_bool,\n    is_callable,\n    is_date,\n    is_dataclass,\n    is_datetime,\n    is_enum,\n    is_float,\n    is_float_instance,\n    is_genson_schema_builder,\n    is_int,\n    is_int_instance,\n    is_literal,\n    is_native_dict,\n    is_pydantic_model,\n    is_str,\n    is_str_instance,\n    is_time,\n    is_typed_dict,\n    is_typing_dict,\n    is_typing_list,\n    is_typing_tuple,\n    is_union\n)\n\nif sys.version_info >= (3, 12):\n    from typing import TypedDict\nelse:\n    from typing_extensions import TypedDict\n\n\n# Type identification\n\n\n@pytest.fixture\ndef sample_enum():\n    class SampleEnum(Enum):\n        A = 1\n        B = 2\n\n    return SampleEnum\n\n@pytest.fixture\ndef sample_complex_enum():\n    def add_func(a: float, b: float) -> float:\n        return a + b\n\n    class SampleComplexEnum(Enum):\n        add = member(partial(add_func))\n        a = \"a\"\n        b = 2\n\n    return SampleComplexEnum\n\n@pytest.fixture\ndef sample_empty_enum():\n    def add_func(a: float, b: float) -> float:\n        return a + b\n\n    # the enum is empty because the function is not registered as callable\n    class SampleEmptyEnum(Enum):\n        add = add_func\n\n    return SampleEmptyEnum\n\n@pytest.fixture\ndef sample_class():\n    class SampleClass:\n        pass\n\n    return SampleClass\n\n@pytest.fixture\ndef sample_dataclass():\n    @dataclass\n    class SampleDataclass:\n        field1: str\n        field2: int\n\n    return SampleDataclass\n\n@pytest.fixture\ndef sample_typed_dict():\n    class SampleTypedDict(TypedDict):\n        name: str\n        age: int\n\n    return SampleTypedDict\n\n@pytest.fixture\ndef sample_pydantic_model():\n    class SamplePydanticModel(BaseModel):\n        name: str\n        age: int\n\n    return SamplePydanticModel\n\n@pytest.fixture\ndef sample_schema_builder():\n    builder = SchemaBuilder()\n    builder.add_schema({\"type\": \"object\", \"properties\": {}})\n    builder.add_object({\"hi\": \"there\"})\n    builder.add_object({\"hi\": 5})\n    return builder\n\n@pytest.fixture\ndef sample_function():\n    def sample_function(foo: str, bar: List[int]):\n        pass\n\n    return sample_function\n\n@pytest.fixture\ndef sample_function_missing_type():\n    def sample_function(foo, bar: List[int]):\n        pass\n\n    return sample_function\n\n\ndef test_is_int():\n    assert is_int(int)\n    assert not is_int(float)\n    assert not is_int(1)\n    assert not is_int(List[int])\n    assert not is_int(Dict[int, int])\n    assert is_int(Annotated[int, \"some metadata\"])\n    assert not is_int(Annotated[str, \"some metadata\"])\n    assert is_int(NewType(\"UserId\", int))\n    assert not is_int(NewType(\"UserId\", str))\n\n\ndef test_is_int_instance():\n    assert is_int_instance(1)\n    assert not is_int_instance(True)\n    assert not is_int_instance(1.0)\n    assert not is_int_instance(\"1\")\n    assert not is_int_instance(int)\n\n\ndef test_is_float():\n    assert is_float(float)\n    assert not is_float(int)\n    assert not is_float(1.0)\n    assert not is_float(List[float])\n    assert not is_float(Dict[float, float])\n    assert is_float(Annotated[float, \"some metadata\"])\n    assert not is_float(Annotated[int, \"some metadata\"])\n    assert is_float(NewType(\"UserId\", float))\n    assert not is_float(NewType(\"UserId\", int))\n\n\ndef test_is_float_instance():\n    assert is_float_instance(1.0)\n    assert not is_float_instance(1)\n    assert not is_float_instance(\"1.0\")\n    assert not is_float_instance(float)\n\n\ndef test_is_str():\n    assert is_str(str)\n    assert not is_str(int)\n    assert not is_str(\"hello\")\n    assert not is_str(List[str])\n    assert not is_str(Dict[str, str])\n    assert is_str(Annotated[str, \"some metadata\"])\n    assert not is_str(Annotated[int, \"some metadata\"])\n    assert is_str(NewType(\"UserId\", str))\n    assert not is_str(NewType(\"UserId\", int))\n\n\ndef test_is_str_instance():\n    assert is_str_instance(\"hello\")\n    assert is_str_instance(\"\")\n    assert is_str_instance(\"123\")\n    assert not is_str_instance(123)\n    assert not is_str_instance(str)\n\n\ndef test_is_bool():\n    assert is_bool(bool)\n    assert not is_bool(int)\n    assert not is_bool(True)\n    assert is_bool(Annotated[bool, \"some metadata\"])\n    assert not is_bool(Annotated[int, \"some metadata\"])\n    assert is_bool(NewType(\"UserId\", bool))\n    assert not is_bool(NewType(\"UserId\", int))\n\n\ndef test_is_datetime():\n    assert is_datetime(datetime.datetime)\n    assert not is_datetime(datetime.date)\n    assert not is_datetime(datetime.time)\n    assert not is_datetime(datetime.datetime.now())\n\n\ndef test_is_date():\n    assert is_date(datetime.date)\n    assert not is_date(datetime.datetime)\n    assert not is_date(datetime.time)\n    assert not is_date(datetime.date.today())\n\n\ndef test_is_time():\n    assert is_time(datetime.time)\n    assert not is_time(datetime.datetime)\n    assert not is_time(datetime.date)\n    assert not is_time(datetime.time(12, 30))\n\n\ndef test_is_native_dict():\n    assert is_native_dict(dict)\n    assert not is_native_dict({})\n    assert not is_native_dict({\"key\": \"value\"})\n    assert not is_native_dict(list)\n    assert not is_native_dict(dict[str, int])\n\n\ndef test_is_typing_dict():\n    assert is_typing_dict(dict[str, int])\n    assert is_typing_dict(Dict[int, str])\n    assert not is_typing_dict(dict)\n    assert not is_typing_dict({})\n\n\ndef test_is_typing_list():\n    assert is_typing_list(list[int])\n    assert is_typing_list(List[int])\n    assert not is_typing_list(list)\n    assert not is_typing_list([])\n    assert not is_typing_list(dict)\n\n\ndef test_is_typing_tuple():\n    assert is_typing_tuple(tuple[int, str])\n    assert is_typing_tuple(Tuple[int, str])\n    assert not is_typing_tuple(tuple)\n    assert not is_typing_tuple(())\n    assert not is_typing_tuple(list)\n\n\ndef test_is_union():\n    assert is_union(Union[int, str])\n    assert is_union(Optional[int])\n    assert not is_union(list)\n    assert not is_union([\"a\", \"b\"])\n    assert not is_union(Literal[int, str])\n\n\ndef test_is_literal():\n    assert is_literal(Literal[\"a\", \"b\"])\n    assert not is_literal(str)\n    assert not is_literal(\"a\")\n    assert not is_literal([\"a\", \"b\"])\n    assert not is_literal(Union[str, int])\n\n\ndef test_is_dataclass(\n    sample_dataclass,\n    sample_class,\n    sample_typed_dict,\n    sample_pydantic_model\n):\n    assert is_dataclass(sample_dataclass)\n    assert not is_dataclass(sample_dataclass(field1=\"test\", field2=123))\n    assert not is_dataclass(dict)\n    assert not is_dataclass(sample_class)\n    assert not is_dataclass(sample_typed_dict)\n    assert not is_dataclass(sample_pydantic_model)\n\n\ndef test_is_typed_dict(\n    sample_typed_dict,\n    sample_class,\n    sample_dataclass,\n    sample_pydantic_model\n):\n    assert is_typed_dict(sample_typed_dict)\n    assert not is_typed_dict(sample_typed_dict(name=\"test\", age=30))\n    assert not is_typed_dict(dict)\n    assert not is_typed_dict(sample_class)\n    assert not is_typed_dict(sample_dataclass)\n    assert not is_typed_dict(sample_pydantic_model)\n\n\ndef test_is_pydantic_model(\n    sample_pydantic_model,\n    sample_class,\n    sample_dataclass,\n    sample_typed_dict\n):\n    assert is_pydantic_model(sample_pydantic_model)\n    assert not is_pydantic_model(sample_pydantic_model(name=\"test\", age=30))  # Instance\n    assert not is_pydantic_model(dict)\n    assert not is_pydantic_model(sample_class)\n    assert not is_pydantic_model(sample_dataclass)\n    assert not is_pydantic_model(sample_typed_dict)\n\n\ndef test_is_genson_schema_builder(\n    sample_schema_builder,\n    sample_class,\n    sample_dataclass,\n    sample_typed_dict,\n    sample_pydantic_model\n):\n    assert is_genson_schema_builder(sample_schema_builder)\n    assert not is_genson_schema_builder(dict)\n    assert not is_genson_schema_builder(str)\n    assert not is_genson_schema_builder({\"type\": 'object', \"properties\": {}})\n    assert not is_genson_schema_builder('{\"type\": \"object\", \"properties\": {}}')\n    assert not is_genson_schema_builder(sample_class)\n    assert not is_genson_schema_builder(sample_dataclass)\n    assert not is_genson_schema_builder(sample_typed_dict)\n    assert not is_genson_schema_builder(sample_pydantic_model)\n\n\ndef test_is_enum(sample_enum):\n    assert is_enum(sample_enum)\n    assert not is_enum(sample_enum.A)\n    assert not is_enum(dict)\n    assert not is_enum(Literal[\"a\", \"b\"])\n    assert not is_enum([\"a\", \"b\"])\n\n\ndef test_is_callable(sample_function, sample_class, sample_dataclass, sample_typed_dict, sample_pydantic_model):\n    assert is_callable(sample_function)\n    assert is_callable(lambda x: x)\n    assert not is_callable(dict)\n    assert not is_callable(sample_class)\n    assert not is_callable(sample_dataclass)\n    assert not is_callable(sample_typed_dict)\n    assert not is_callable(sample_pydantic_model)\n\n\n# Type conversion\n\n\ndef test_get_enum_from_choice(sample_enum):\n    choice = Choice([\"a\", \"b\", sample_enum.A])\n    enum = get_enum_from_choice(choice)\n    assert is_enum(enum)\n    assert enum.a.value == \"a\"\n    assert enum.b.value == \"b\"\n    assert getattr(enum, \"SampleEnum.A\").value == sample_enum.A\n\n\ndef test_get_enum_from_literal(sample_enum):\n    basic_enum = get_enum_from_literal(Literal[\"a\", \"b\"])\n    assert(is_enum(basic_enum))\n    assert basic_enum.a.value == \"a\"\n    assert basic_enum.b.value == \"b\"\n\n    complex_enum = get_enum_from_literal(Literal[\"a\", 1, True, None, sample_enum.A])\n    assert is_enum(complex_enum)\n    assert complex_enum.a.value == \"a\"\n    assert getattr(complex_enum, \"1\").value == 1\n    assert getattr(complex_enum, \"True\").value\n    assert getattr(complex_enum, \"None\").value is None\n    assert getattr(complex_enum, \"SampleEnum.A\").value == sample_enum.A\n\n\ndef test_get_schema_from_signature(sample_function, sample_function_missing_type):\n    result = get_schema_from_signature(sample_function)\n    assert result[\"type\"] == \"object\"\n    assert list(result[\"properties\"].keys()) == [\"foo\", \"bar\"]\n    assert result[\"properties\"][\"foo\"][\"type\"] == \"string\"\n    assert result[\"properties\"][\"bar\"][\"type\"] == \"array\"\n    assert result[\"properties\"][\"bar\"][\"items\"][\"type\"] == \"integer\"\n\n    # in case of a function missing type annotations\n    with pytest.raises(ValueError):\n        get_schema_from_signature(sample_function_missing_type)\n\n\ndef test_get_schema_from_enum(sample_complex_enum, sample_empty_enum):\n    schema = get_schema_from_enum(sample_complex_enum)\n    assert JsonSchema(schema)\n    assert schema[\"title\"] == sample_complex_enum.__name__\n    assert len(schema[\"oneOf\"]) == len(sample_complex_enum)\n    for elt in schema[\"oneOf\"]:\n        assert type(elt) in [int, float, bool, type(None), str, dict]\n\n    # in case of an empty enum because the function member is not registered as callable\n    with pytest.raises(ValueError):\n        get_schema_from_enum(sample_empty_enum)\n"
  }
]