Repository: dottxt-ai/outlines Branch: main Commit: 54827e6d539b Files: 239 Total size: 1.2 MB Directory structure: gitextract_sobc03i9/ ├── .devcontainer/ │ └── devcontainer.json ├── .editorconfig ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ └── config.yml │ ├── PULL_REQUEST_TEMPLATE/ │ │ └── pull_request_template.md │ ├── scripts/ │ │ └── build_sdist_and_wheel.sh │ └── workflows/ │ ├── build_documentation.yml │ ├── publish_documentation.yml │ ├── release_pypi.yaml │ ├── tests.yml │ └── tests_api_models.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pydocstyle ├── .readthedocs.yaml ├── .vscode/ │ └── settings.json ├── LICENSE ├── README.md ├── docs/ │ ├── api_reference/ │ │ └── index.md │ ├── blog/ │ │ └── index.md │ ├── community/ │ │ ├── contribute.md │ │ ├── examples.md │ │ ├── feedback.md │ │ ├── index.md │ │ └── versioning.md │ ├── core_concepts.md │ ├── examples/ │ │ ├── chain_of_density.md │ │ ├── chain_of_thought.md │ │ ├── classification.md │ │ ├── dating_profiles.md │ │ ├── deploy-using-bentoml.md │ │ ├── deploy-using-cerebrium.md │ │ ├── deploy-using-modal.md │ │ ├── earnings-reports.md │ │ ├── extract_event_details.md │ │ ├── extract_event_details.py │ │ ├── extraction.md │ │ ├── index.md │ │ ├── knowledge_graph_extraction.md │ │ ├── models_playing_chess.md │ │ ├── prompt_templates/ │ │ │ ├── chain_of_density.txt │ │ │ ├── classification.txt │ │ │ ├── react_agent.txt │ │ │ ├── simtom_prospective_taking.txt │ │ │ └── simtom_simulation.txt │ │ ├── qa-with-citations.md │ │ ├── react_agent.md │ │ ├── read-pdfs.md │ │ ├── receipt-digitization.md │ │ ├── simtom.md │ │ └── structured_generation_workflow.md │ ├── features/ │ │ ├── advanced/ │ │ │ ├── backends.md │ │ │ └── logits_processors.md │ │ ├── core/ │ │ │ ├── generator.md │ │ │ ├── inputs.md │ │ │ └── output_types.md │ │ ├── index.md │ │ ├── models/ │ │ │ ├── anthropic.md │ │ │ ├── dottxt.md │ │ │ ├── gemini.md │ │ │ ├── index.md │ │ │ ├── llamacpp.md │ │ │ ├── mistral.md │ │ │ ├── mlxlm.md │ │ │ ├── ollama.md │ │ │ ├── openai.md │ │ │ ├── openai_compatible.md │ │ │ ├── openrouter.md │ │ │ ├── sglang.md │ │ │ ├── tgi.md │ │ │ ├── transformers.md │ │ │ ├── transformers_multimodal.md │ │ │ ├── vllm.md │ │ │ └── vllm_offline.md │ │ └── utility/ │ │ ├── application.md │ │ ├── regex_dsl.md │ │ └── template.md │ ├── guide/ │ │ ├── architecture.md │ │ ├── chat_templating.md │ │ ├── core_concepts.md │ │ ├── fastapi_vllm_deployment.md │ │ ├── getting_started.md │ │ ├── installation.md │ │ ├── migration.md │ │ ├── selecting_an_inference_backend.md │ │ └── vlm.md │ ├── index.md │ ├── overrides/ │ │ ├── home.html │ │ └── main.html │ └── stylesheets/ │ └── extra.css ├── environment.yml ├── examples/ │ ├── babyagi.py │ ├── beam-cloud/ │ │ ├── README.md │ │ └── app.py │ ├── bentoml/ │ │ ├── .bentoignore │ │ ├── bentofile.yaml │ │ ├── import_model.py │ │ ├── requirements.txt │ │ └── service.py │ ├── cerebrium/ │ │ ├── cerebrium.toml │ │ └── main.py │ ├── dating_profile.py │ ├── llamacpp_example.py │ ├── llamacpp_processor.py │ ├── math_generate_code.py │ ├── meta_prompting.py │ ├── modal_example.py │ ├── pick_odd_one_out.py │ ├── prompts/ │ │ ├── babyagi_create_task.txt │ │ ├── babyagi_perform_task.txt │ │ ├── babyagi_prioritize_task.txt │ │ ├── dating_profile.txt │ │ ├── pick_odd_one_out.txt │ │ └── self_consistency.txt │ ├── react.py │ ├── sampling.ipynb │ ├── self_consistency.py │ ├── simulation_based_inference.ipynb │ └── vllm_offline_integration.py ├── flake.nix ├── llm.txt ├── mkdocs.yml ├── outlines/ │ ├── __init__.py │ ├── applications.py │ ├── backends/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── llguidance.py │ │ ├── outlines_core.py │ │ └── xgrammar.py │ ├── caching.py │ ├── generator.py │ ├── grammars/ │ │ ├── arithmetic.lark │ │ ├── common.lark │ │ └── json.lark │ ├── grammars.py │ ├── inputs.py │ ├── models/ │ │ ├── __init__.py │ │ ├── anthropic.py │ │ ├── base.py │ │ ├── dottxt.py │ │ ├── gemini.py │ │ ├── llamacpp.py │ │ ├── lmstudio.py │ │ ├── mistral.py │ │ ├── mlxlm.py │ │ ├── ollama.py │ │ ├── openai.py │ │ ├── sglang.py │ │ ├── tgi.py │ │ ├── tokenizer.py │ │ ├── transformers.py │ │ ├── utils.py │ │ ├── vllm.py │ │ └── vllm_offline.py │ ├── processors/ │ │ ├── __init__.py │ │ ├── base_logits_processor.py │ │ └── tensor_adapters/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── mlx.py │ │ ├── numpy.py │ │ └── torch.py │ ├── py.typed │ ├── release_note.md │ ├── templates.py │ └── types/ │ ├── __init__.py │ ├── airports.py │ ├── countries.py │ ├── dsl.py │ ├── json_schema_utils.py │ ├── locale/ │ │ ├── __init__.py │ │ └── us.py │ └── utils.py ├── pyproject.toml ├── requirements-doc.txt ├── scripts/ │ └── gen_ref_pages.py ├── setup.cfg ├── shell.nix └── tests/ ├── __init__.py ├── backends/ │ ├── test_backends.py │ ├── test_backends_utils.py │ ├── test_llguidance.py │ ├── test_outlines_core.py │ └── test_xgrammar.py ├── cfg_samples/ │ ├── arithmetic/ │ │ ├── lots_of_ops.arithmetic.test │ │ └── simple_math.arithmetic.test │ └── json/ │ ├── outlines.generate.samplers.mypy.json.test │ ├── simple_fruit.json.test │ └── simple_fruit_no_indent.json.test ├── conftest.py ├── models/ │ ├── test_anthopic_type_adapter.py │ ├── test_anthropic.py │ ├── test_dottxt.py │ ├── test_dottxt_type_adapter.py │ ├── test_gemini.py │ ├── test_gemini_type_adapter.py │ ├── test_llamacpp.py │ ├── test_llamacpp_tokenizer.py │ ├── test_llamacpp_type_adapter.py │ ├── test_lmstudio.py │ ├── test_lmstudio_type_adapter.py │ ├── test_mistral.py │ ├── test_mistral_type_adapter.py │ ├── test_mlxlm.py │ ├── test_mlxlm_type_adapter.py │ ├── test_ollama.py │ ├── test_ollama_type_adapter.py │ ├── test_openai.py │ ├── test_openai_type_adapter.py │ ├── test_sglang.py │ ├── test_sglang_type_adapter.py │ ├── test_tgi.py │ ├── test_tgi_model_adapter.py │ ├── test_tokenizer.py │ ├── test_transformers.py │ ├── test_transformers_multimodal.py │ ├── test_transformers_multimodal_type_adapter.py │ ├── test_transformers_tokenizer.py │ ├── test_transformers_type_adapter.py │ ├── test_utils.py │ ├── test_vllm.py │ ├── test_vllm_offline.py │ ├── test_vllm_offline_type_adapter.py │ └── test_vllm_type_adapter.py ├── processors/ │ ├── test_base_processor.py │ └── test_tensor_adapters.py ├── test_applications.py ├── test_cache.py ├── test_generator.py ├── test_inputs.py ├── test_templates.py ├── test_utils/ │ ├── mock_lmstudio_client.py │ ├── mock_openai_client.py │ ├── mock_tgi_client.py │ └── utils.py └── types/ ├── test_custom_types.py ├── test_dsl.py ├── test_json_schema_utils.py ├── test_to_regex.py └── test_types_utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/devcontainer.json ================================================ { "name": "dottxt-ai", "image": "mcr.microsoft.com/devcontainers/python:3.12", "runArgs": [ "--device=nvidia.com/gpu=all" ], "hostRequirements": { "gpu": "optional" }, "features": { "ghcr.io/devcontainers/features/conda:1": {}, "ghcr.io/devcontainers/features/nvidia-cuda:1": { "installCudnn": true, "installToolkit": true, "cudaVersion": "12.4" }, "ghcr.io/devcontainers/features/rust:1": {} } } ================================================ FILE: .editorconfig ================================================ # EditorConfig is awesome: https://EditorConfig.org # top-most EditorConfig file root = true [*] indent_style = space indent_size = 4 end_of_line = lf charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true [*.yaml] indent_size = 2 ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ # Issue template inspired by NumPy's excellent template: # https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLATE/bug-report.yml name: 🐞 Bug report description: Create a bug report to help us reproduce and fix it. title: "" labels: ["bug"] body: - type: markdown attributes: value: >- Thank you for taking the time to file a bug report. First, carefully read the following before everything else: - Does your issue only arise in a library that uses Outlines? If so, submit your issue to this library's issue tracker. - Did you check the issue tracker for open and closed issues that may be related to your bug? - type: textarea attributes: label: "Describe the issue as clearly as possible:" validations: required: true - type: textarea attributes: label: "Steps/code to reproduce the bug:" description: > A short code example that reproduces the problem/missing feature. It should be self-contained, i.e., can be copy-pasted into the Python interpreter or run as-is via `python myproblem.py`. placeholder: | import outlines << your code here >> render: python validations: required: true - type: textarea attributes: label: "Expected result:" description: > Please describe what you expect the above example to output. placeholder: | << the expected result here >> render: shell validations: required: true - type: textarea attributes: label: "Error message:" description: > Please include the full error message, if any. placeholder: | << Full traceback starting from `Traceback: ...` >> render: shell - type: textarea attributes: label: "Outlines/Python version information:" description: | Please run the following code and paste the output here. python -c "from outlines import _version; print(_version.__version__)"; python -c "import sys; print('Python', sys.version)"; pip freeze; value: | Version information
``` (command output here) ```
validations: required: true - type: textarea attributes: label: "Context for the issue:" description: | Please explain how this issue affects your work or why it should be prioritized. placeholder: | << your explanation here >> validations: required: false ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ contact_links: - name: 🤔 Questions & Help url: https://github.com/dottxt-ai/outlines/discussions/new about: "If you have a question about how to use Outlines, please start a discussion." ================================================ FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template.md ================================================ # 🚧 Thank you for opening a PR! A few important guidelines and requirements before we can merge your PR: - [ ] We should be able to understand what the PR does from its title only; - [ ] There is a high-level description of the changes; - [ ] *If I add a new feature*, there is an [issue][issues] discussing it already; - [ ] There are links to *all* the relevant issues, discussions and PRs; - [ ] The branch is rebased on the latest `main` commit; - [ ] **Commit messages** follow these [guidelines][git-guidelines]; - [ ] One commit per logical change; - [ ] The code respects the current **naming conventions**; - [ ] Docstrings follow the [numpy style guide][docstring-guidelines]; - [ ] `pre-commit` is installed and configured on your machine, and you ran it before opening the PR; - [ ] There are tests covering the changes; - [ ] The documentation is up-to-date; Consider opening a **Draft PR** if your work is still in progress but you would like some feedback from other contributors. [issues]: https://github.com/dottxt-ai/outlines/issues [git-guidelines]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html [docstring-guidelines]: https://numpydoc.readthedocs.io/en/latest/format.html ================================================ FILE: .github/scripts/build_sdist_and_wheel.sh ================================================ #!/bin/bash # Build sdist and wheel python -m pip install -U pip python -m pip install build python -m build # Check sdist install and imports mkdir -p test-sdist cd test-sdist python -m venv venv-sdist venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz venv-sdist/bin/python -c "import outlines" cd .. # Check wheel install and imports mkdir -p test-wheel cd test-wheel python -m venv venv-wheel venv-wheel/bin/python -m pip install ../dist/outlines-*.whl venv-wheel/bin/python -c "import outlines" cd .. ================================================ FILE: .github/workflows/build_documentation.yml ================================================ name: Build the documentation on: pull_request: types: [opened, synchronize, reopened, closed] branches: [main] workflow_dispatch: permissions: contents: write pull-requests: write jobs: build: name: Build and Deploy Documentation Preview runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: "3.10" - name: Install dependencies if: github.event.action != 'closed' run: pip install -r requirements-doc.txt - name: Build the documentation if: github.event.action != 'closed' env: GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} PR_NUMBER: ${{ github.event.pull_request.number }} run: | sed -i "1i site_url: https://dottxt-ai.github.io/outlines/pr-preview/pr-${PR_NUMBER}/" mkdocs.yml mkdocs build - name: Deploy to PR preview if: github.event_name == 'pull_request' uses: rossjrw/pr-preview-action@v1 with: source-dir: site/ preview-branch: gh-pages umbrella-dir: pr-preview comment: false - name: Comment PR with preview link if: github.event_name == 'pull_request' && github.event.action != 'closed' uses: actions/github-script@v7 with: script: | const prNumber = context.issue.number; const previewUrl = `https://dottxt-ai.github.io/outlines/pr-preview/pr-${prNumber}/`; // Find existing preview comment const comments = await github.rest.issues.listComments({ issue_number: prNumber, owner: context.repo.owner, repo: context.repo.repo, }); const botComment = comments.data.find(comment => comment.user.type === 'Bot' && comment.body.includes('Documentation preview') ); const commentBody = `📚 **Documentation preview**: ${previewUrl}\n\n*Preview updates automatically with each commit.*`; // Update existing comment or create new one if (botComment) { await github.rest.issues.updateComment({ comment_id: botComment.id, owner: context.repo.owner, repo: context.repo.repo, body: commentBody }); } else { await github.rest.issues.createComment({ issue_number: prNumber, owner: context.repo.owner, repo: context.repo.repo, body: commentBody }); } ================================================ FILE: .github/workflows/publish_documentation.yml ================================================ name: Publish the documentation on: workflow_dispatch: push: branches: - main release: types: - created permissions: contents: write jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v4 with: python-version: 3.x - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - uses: actions/cache@v3 with: key: mkdocs-material-${{ env.cache_id }} path: .cache restore-keys: | mkdocs-material- - run: pip install -r requirements-doc.txt - run: mkdocs build - name: Set up Git run: | git config user.name ${{ github.actor }} git config user.email ${{ github.actor }}@users.noreply.github.com - name: Publish Tag as latest env: GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} if: github.event_name == 'release' run: | mike deploy --push --update-aliases ${{ github.ref_name }} latest mike set-default --push latest - name: Publish main as unstable env: GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} if: github.event_name == 'push' run: | mike deploy --push --update-aliases ${{ github.ref_name }} unstable ================================================ FILE: .github/workflows/release_pypi.yaml ================================================ name: Release PyPi on: release: types: - created jobs: release-job: name: Build and publish on PyPi runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: "3.10" - name: Build SDist and Wheel run: ./.github/scripts/build_sdist_and_wheel.sh - name: Check that the package version matches the Release name run: | grep -Rq "^Version: ${GITHUB_REF:10}$" outlines.egg-info/PKG-INFO - name: Publish to PyPi uses: pypa/gh-action-pypi-publish@v1.4.2 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} ================================================ FILE: .github/workflows/tests.yml ================================================ name: Tests on: pull_request: branches: [main,v1.0] push: branches: [main] jobs: style: name: Check the code style runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: "3.13" - uses: pre-commit/action@v3.0.0 tests: name: Run the tests runs-on: ubuntu-latest strategy: matrix: python-version: ["3.10", "3.13"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: 'pip' cache-dependency-path: 'pyproject.toml' - name: Free disk space run: | set -eux sudo rm -rf /usr/share/dotnet || true sudo rm -rf /opt/ghc || true sudo rm -rf /usr/local/lib/android || true sudo apt-get clean df -h - name: Install Ollama run: | curl -fsSL https://ollama.com/install.sh | sh ollama --version ollama pull tinyllama - name: Set up test environment run: | python -m pip install --upgrade pip pip install uv uv sync --no-group test-gpu --extra test - name: cache HuggingFace models uses: actions/cache@v4 with: path: ~/.cache/huggingface key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }} restore-keys: | hf-${{ runner.os }}- - name: Create matrix id id: matrix-id env: MATRIX_CONTEXT: ${{ toJson(matrix) }} run: | echo $MATRIX_CONTEXT export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32` echo $MATRIX_ID echo "::set-output name=id::$MATRIX_ID" - name: Run tests run: | rm -f .coverage* uv run coverage erase uv run python -m coverage run --branch --source=outlines --parallel-mode -m pytest -x -m 'not api_call' - name: Upload coverage data uses: actions/upload-artifact@v4 with: name: coverage-data-${{ matrix.python-version }} path: .coverage.* if-no-files-found: ignore include-hidden-files: true coverage: name: Combine & check coverage. needs: tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - uses: actions/setup-python@v4 with: cache: pip python-version: "3.11" - name: Set up environment run: | pip install --upgrade "coverage[toml]>=5.1" diff-cover - uses: actions/download-artifact@v4 with: pattern: coverage-data-* merge-multiple: true - name: Combine coverage & fail if it's <100%. run: | python -m coverage combine python -m coverage html --skip-covered --skip-empty python -m coverage xml python -m coverage report --fail-under=100 || (python -m coverage report && exit 1) - name: Upload HTML report if check failed. uses: actions/upload-artifact@v4 with: name: html-report path: htmlcov overwrite: true if: ${{ failure() }} build-wheel: name: Build Wheel and Test SDist runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Build SDist and Wheel run: ./.github/scripts/build_sdist_and_wheel.sh ================================================ FILE: .github/workflows/tests_api_models.yml ================================================ name: API Models Tests on: workflow_dispatch: jobs: tests: name: Run API Models Tests runs-on: ubuntu-latest env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} DOTTXT_API_KEY: ${{ secrets.DOTTXT_API_KEY }} strategy: fail-fast: false matrix: python-version: ["3.10"] steps: - uses: actions/checkout@v3 with: ref: ${{ github.ref }} - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: 'pip' cache-dependency-path: 'pyproject.toml' - name: Free disk space run: | set -eux sudo rm -rf /usr/share/dotnet || true sudo rm -rf /opt/ghc || true sudo rm -rf /usr/local/lib/android || true sudo apt-get clean df -h - name: Install Ollama run: | curl -fsSL https://ollama.com/install.sh | sh ollama --version ollama pull tinyllama - name: Set up test environment run: | python -m pip install --upgrade pip pip install uv uv sync --no-group test-gpu --extra test - name: cache HuggingFace models uses: actions/cache@v4 with: path: ~/.cache/huggingface key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }} restore-keys: | hf-${{ runner.os }}- - name: Create matrix id id: matrix-id env: MATRIX_CONTEXT: ${{ toJson(matrix) }} run: | echo $MATRIX_CONTEXT export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32` echo $MATRIX_ID echo "::set-output name=id::$MATRIX_ID" - name: Run tests run: | uv run pytest -m 'api_call' --ignore=tests/models/test_dottxt.py env: COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }} ================================================ FILE: .gitignore ================================================ __pycache__ .benchmarks .cache .coverage .direnv .env .idea .pytest_cache .python-version .venv *_version.py *.egg-info *.gguf benchmarks/results build docs/build logs .worktrees/ ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-merge-conflict - id: debug-statements - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.14.1 hooks: - id: mypy args: [--allow-redefinition] exclude: ^examples/ additional_dependencies: [types-tqdm, types-Pillow] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.9.1 hooks: - id: ruff args: ["--config=pyproject.toml"] ================================================ FILE: .pydocstyle ================================================ [pydocstyle] convention = numpy ================================================ FILE: .readthedocs.yaml ================================================ version: 2 python: version: "3.8" install: - method: pip path: . extra_requirements: - rtd - requirements: requirements-doc.txt sphinx: builder: html configuration: docs/source/conf.py fail_on_warning: true ================================================ FILE: .vscode/settings.json ================================================ { "python.testing.pytestArgs": [ "tests" ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true } ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2023- The Outlines developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================
Outlines Logo Outlines Logo 🗒️ *Structured outputs for LLMs* 🗒️ Made with ❤👷️ by the team at [.txt](https://dottxt.co)
Trusted by NVIDIA, Cohere, HuggingFace, vLLM, etc. [![PyPI Version][pypi-version-badge]][pypi] [![Downloads][downloads-badge]][pypistats] [![Stars][stars-badge]][stars] [![Discord][discord-badge]][discord] [![Blog][dottxt-blog-badge]][dottxt-blog] [![Twitter][twitter-badge]][twitter]
## 🚀 Building the future of structured generation We're working with select partners to develop new interfaces to structured generation. Need XML, FHIR, custom schemas or grammars? Let's talk. Audit your schema: share one schema, we show you what breaks under generation, the constraints that fix it, and compliance rates before and after. Sign up [here](https://h1xbpbfsf0w.typeform.com/to/rtFUraA2?typeform). ## Table of Contents - [Why Outlines?](#why-outlines) - [Quickstart](#quickstart) - [Real-World Examples](#real-world-examples) - [🙋‍♂️ Customer Support Triage](#customer-support-triage) - [📦 E-commerce Product Categorization](#e-commerce-product-categorization) - [📊 Parse Event Details with Incomplete Data](#parse-event-details-with-incomplete-data) - [🗂️ Categorize Documents into Predefined Types](#categorize-documents-into-predefined-types) - [📅 Schedule a Meeting with Function Calling](#schedule-a-meeting-with-function-calling) - [📝 Dynamically Generate Prompts with Re-usable Templates](#dynamically-generate-prompts-with-re-usable-templates) - [They Use Outlines](#they-use-outlines) - [Model Integrations](#model-integrations) - [Core Features](#core-features) - [Other Features](#other-features) - [About .txt](#about-txt) - [Community](#community)
## Why Outlines? LLMs are powerful but their outputs are unpredictable. Most solutions attempt to fix bad outputs after generation using parsing, regex, or fragile code that breaks easily. Outlines guarantees structured outputs during generation — directly from any LLM. - **Works with any model** - Same code runs across OpenAI, Ollama, vLLM, and more - **Simple integration** - Just pass your desired output type: `model(prompt, output_type)` - **Guaranteed valid structure** - No more parsing headaches or broken JSON - **Provider independence** - Switch models without changing code ### The Outlines Philosophy
Outlines follows a simple pattern that mirrors Python's own type system. Simply specify the desired output type, and Outlines will ensure your data matches that structure exactly: - For a yes/no response, use `Literal["Yes", "No"]` - For numerical values, use `int` - For complex objects, define a structure with a [Pydantic model](https://docs.pydantic.dev/latest/) ## Quickstart Getting started with outlines is simple: ### 1. Install outlines ``` shell pip install outlines ``` ### 2. Connect to your preferred model ``` python import outlines from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) ``` ### 3. Start with simple structured outputs ``` python from typing import Literal from pydantic import BaseModel # Simple classification sentiment = model( "Analyze: 'This product completely changed my life!'", Literal["Positive", "Negative", "Neutral"] ) print(sentiment) # "Positive" # Extract specific types temperature = model("What's the boiling point of water in Celsius?", int) print(temperature) # 100 ``` ### 4. Create complex structures ``` python from pydantic import BaseModel from enum import Enum class Rating(Enum): poor = 1 fair = 2 good = 3 excellent = 4 class ProductReview(BaseModel): rating: Rating pros: list[str] cons: list[str] summary: str review = model( "Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.", ProductReview, max_new_tokens=200, ) review = ProductReview.model_validate_json(review) print(f"Rating: {review.rating.name}") # "Rating: good" print(f"Pros: {review.pros}") # "Pros: ['great battery life', 'stunning display']" print(f"Summary: {review.summary}") # "Summary: Good laptop with great display but thermal issues" ``` ## Real-world examples Here are production-ready examples showing how Outlines solves common problems:
🙋‍♂️ Customer Support Triage
This example shows how to convert a free-form customer email into a structured service ticket. By parsing attributes like priority, category, and escalation flags, the code enables automated routing and handling of support issues.
``` python import outlines from enum import Enum from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM from typing import List MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) def alert_manager(ticket): print("Alert!", ticket) class TicketPriority(str, Enum): low = "low" medium = "medium" high = "high" urgent = "urgent" class ServiceTicket(BaseModel): priority: TicketPriority category: str requires_manager: bool summary: str action_items: List[str] customer_email = """ Subject: URGENT - Cannot access my account after payment I paid for the premium plan 3 hours ago and still can't access any features. I've tried logging out and back in multiple times. This is unacceptable as I have a client presentation in an hour and need the analytics dashboard. Please fix this immediately or refund my payment. """ prompt = f""" <|im_start|>user Analyze this customer email: {customer_email} <|im_end|> <|im_start|>assistant """ ticket = model( prompt, ServiceTicket, max_new_tokens=500 ) # Use structured data to route the ticket ticket = ServiceTicket.model_validate_json(ticket) if ticket.priority == "urgent" or ticket.requires_manager: alert_manager(ticket) ```
📦 E-commerce product categorization
This use case demonstrates how outlines can transform product descriptions into structured categorization data (e.g., main category, sub-category, and attributes) to streamline tasks such as inventory management. Each product description is processed automatically, reducing manual categorization overhead.
```python import outlines from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForCausalLM from typing import List, Optional MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) def update_inventory(product, category, sub_category): print(f"Updated {product.split(',')[0]} in category {category}/{sub_category}") class ProductCategory(BaseModel): main_category: str sub_category: str attributes: List[str] brand_match: Optional[str] # Process product descriptions in batches product_descriptions = [ "Apple iPhone 15 Pro Max 256GB Titanium, 6.7-inch Super Retina XDR display with ProMotion", "Organic Cotton T-Shirt, Men's Medium, Navy Blue, 100% Sustainable Materials", "KitchenAid Stand Mixer, 5 Quart, Red, 10-Speed Settings with Dough Hook Attachment" ] template = outlines.Template.from_string(""" <|im_start|>user Categorize this product: {{ description }} <|im_end|> <|im_start|>assistant """) # Get structured categorization for all products categories = model( [template(description=desc) for desc in product_descriptions], ProductCategory, max_new_tokens=200 ) # Use categorization for inventory management categories = [ ProductCategory.model_validate_json(category) for category in categories ] for product, category in zip(product_descriptions, categories): update_inventory(product, category.main_category, category.sub_category) ```
📊 Parse event details with incomplete data
This example uses outlines to parse event descriptions into structured information (like event name, date, location, type, and topics), even handling cases where the data is incomplete. It leverages union types to return either structured event data or a fallback “I don’t know” answer, ensuring robust extraction in varying scenarios.
```python import outlines from typing import Union, List, Literal from pydantic import BaseModel from enum import Enum from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) class EventType(str, Enum): conference = "conference" webinar = "webinar" workshop = "workshop" meetup = "meetup" other = "other" class EventInfo(BaseModel): """Structured information about a tech event""" name: str date: str location: str event_type: EventType topics: List[str] registration_required: bool # Create a union type that can either be a structured EventInfo or "I don't know" EventResponse = Union[EventInfo, Literal["I don't know"]] # Sample event descriptions event_descriptions = [ # Complete information """ Join us for DevCon 2023, the premier developer conference happening on November 15-17, 2023 at the San Francisco Convention Center. Topics include AI/ML, cloud infrastructure, and web3. Registration is required. """, # Insufficient information """ Tech event next week. More details coming soon! """ ] # Process events results = [] for description in event_descriptions: prompt = f""" <|im_start>system You are a helpful assistant <|im_end|> <|im_start>user Extract structured information about this tech event: {description} If there is enough information, return a JSON object with the following fields: - name: The name of the event - date: The date where the event is taking place - location: Where the event is taking place - event_type: either 'conference', 'webinar', 'workshop', 'meetup' or 'other' - topics: a list of topics of the conference - registration_required: a boolean that indicates whether registration is required If the information available does not allow you to fill this JSON, and only then, answer 'I don't know'. <|im_end|> <|im_start|>assistant """ # Union type allows the model to return structured data or "I don't know" result = model(prompt, EventResponse, max_new_tokens=200) results.append(result) # Display results for i, result in enumerate(results): print(f"Event {i+1}:") if isinstance(result, str): print(f" {result}") else: # It's an EventInfo object print(f" Name: {result.name}") print(f" Type: {result.event_type}") print(f" Date: {result.date}") print(f" Topics: {', '.join(result.topics)}") print() # Use structured data in downstream processing structured_count = sum(1 for r in results if isinstance(r, EventInfo)) print(f"Successfully extracted data for {structured_count} of {len(results)} events") ```
🗂️ Categorize documents into predefined types
In this case, outlines classifies documents into predefined categories (e.g., “Financial Report,” “Legal Contract”) using a literal type specification. The resulting classifications are displayed in both a table format and through a category distribution summary, illustrating how structured outputs can simplify content management.
```python import outlines from typing import Literal, List import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) # Define classification categories using Literal DocumentCategory = Literal[ "Financial Report", "Legal Contract", "Technical Documentation", "Marketing Material", "Personal Correspondence" ] # Sample documents to classify documents = [ "Q3 Financial Summary: Revenue increased by 15% year-over-year to $12.4M. EBITDA margin improved to 23% compared to 19% in Q3 last year. Operating expenses...", "This agreement is made between Party A and Party B, hereinafter referred to as 'the Parties', on this day of...", "The API accepts POST requests with JSON payloads. Required parameters include 'user_id' and 'transaction_type'. The endpoint returns a 200 status code on success." ] template = outlines.Template.from_string(""" <|im_start|>user Classify the following document into exactly one category among the following categories: - Financial Report - Legal Contract - Technical Documentation - Marketing Material - Personal Correspondence Document: {{ document }} <|im_end|> <|im_start|>assistant """) # Classify documents def classify_documents(texts: List[str]) -> List[DocumentCategory]: results = [] for text in texts: prompt = template(document=text) # The model must return one of the predefined categories category = model(prompt, DocumentCategory, max_new_tokens=200) results.append(category) return results # Perform classification classifications = classify_documents(documents) # Create a simple results table results_df = pd.DataFrame({ "Document": [doc[:50] + "..." for doc in documents], "Classification": classifications }) print(results_df) # Count documents by category category_counts = pd.Series(classifications).value_counts() print("\nCategory Distribution:") print(category_counts) ```
📅 Schedule a meeting from requests with Function Calling
This example demonstrates how outlines can interpret a natural language meeting request and translate it into a structured format matching a predefined function’s parameters. Once the meeting details are extracted (e.g., title, date, duration, attendees), they are used to automatically schedule the meeting.
```python import outlines import json from typing import List, Optional from datetime import date from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "microsoft/phi-4" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) # Define a function with typed parameters def schedule_meeting( title: str, date: date, duration_minutes: int, attendees: List[str], location: Optional[str] = None, agenda_items: Optional[List[str]] = None ): """Schedule a meeting with the specified details""" # In a real app, this would create the meeting meeting = { "title": title, "date": date, "duration_minutes": duration_minutes, "attendees": attendees, "location": location, "agenda_items": agenda_items } return f"Meeting '{title}' scheduled for {date} with {len(attendees)} attendees" # Natural language request user_request = """ I need to set up a product roadmap review with the engineering team for next Tuesday at 2pm. It should last 90 minutes. Please invite john@example.com, sarah@example.com, and the product team at product@example.com. """ # Outlines automatically infers the required structure from the function signature prompt = f""" <|im_start|>user Extract the meeting details from this request: {user_request} <|im_end|> <|im_start|>assistant """ meeting_params = model(prompt, schedule_meeting, max_new_tokens=200) # The result is a dictionary matching the function parameters meeting_params = json.loads(meeting_params) print(meeting_params) # Call the function with the extracted parameters result = schedule_meeting(**meeting_params) print(result) # "Meeting 'Product Roadmap Review' scheduled for 2023-10-17 with 3 attendees" ```
📝 Dynamically generate prompts with re-usable templates
Using Jinja-based templates, this example shows how to generate dynamic prompts for tasks like sentiment analysis. It illustrates how to easily re-use and customize prompts—including few-shot learning strategies—for different content types while ensuring the outputs remain structured.
```python import outlines from typing import List, Literal from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_NAME = "microsoft/phi-4" model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"), AutoTokenizer.from_pretrained(MODEL_NAME) ) # 1. Create a reusable template with Jinja syntax sentiment_template = outlines.Template.from_string(""" <|im_start>user Analyze the sentiment of the following {{ content_type }}: {{ text }} Provide your analysis as either "Positive", "Negative", or "Neutral". <|im_end> <|im_start>assistant """) # 2. Generate prompts with different parameters review = "This restaurant exceeded all my expectations. Fantastic service!" prompt = sentiment_template(content_type="review", text=review) # 3. Use the templated prompt with structured generation result = model(prompt, Literal["Positive", "Negative", "Neutral"]) print(result) # "Positive" # Templates can also be loaded from files example_template = outlines.Template.from_file("templates/few_shot.txt") # Use with examples for few-shot learning examples = [ ("The food was cold", "Negative"), ("The staff was friendly", "Positive") ] few_shot_prompt = example_template(examples=examples, query="Service was slow") print(few_shot_prompt) ```
## They use outlines
Users Logo Users Logo
## Model Integrations | Model type | Description | Documentation | |---------|-------------|:-------------:| | **Server Support** | vLLM and Ollama | [Server Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) | | **Local Model Support** | transformers and llama.cpp | [Model Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) | | **API Support** | OpenAI and Gemini | [API Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) | ## Core Features | Feature | Description | Documentation | |---------|-------------|:-------------:| | **Multiple Choices** | Constrain outputs to predefined options | [Multiple Choices Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#multiple-choices) | | **Function Calls** | Infer structure from function signatures | [Function Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) | | **JSON/Pydantic** | Generate outputs matching JSON schemas | [JSON Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) | | **Regular Expressions** | Generate text following a regex pattern | [Regex Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#regex-patterns) | | **Grammars** | Enforce complex output structures | [Grammar Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#context-free-grammars) | ## Other Features | Feature | Description | Documentation | |---------|-------------|:-------------:| | **Prompt templates** | Separate complex prompts from code | [Template Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/template/) | | **Custome types** | Intuitive interface to build complex types | [Python Types Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#basic-python-types) | | **Applications** | Encapsulate templates and types into functions | [Application Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/application/) | ## About .txt
dottxt logo dottxt logo
Outlines is developed and maintained by [.txt](https://dottxt.co), a company dedicated to making LLMs more reliable for production applications. Our focus is on advancing structured generation technology through: - 🧪 **Cutting-edge Research**: We publish our findings on [structured generation](http://blog.dottxt.co/performance-gsm8k.html) - 🚀 **Enterprise-grade solutions**: You can license [our enterprise-grade libraries](https://docs.dottxt.co). - 🧩 **Open Source Collaboration**: We believe in building in public and contributing to the community Follow us on [Twitter](https://twitter.com/dottxtai) or check out our [blog](https://blog.dottxt.co/) to stay updated on our latest work in making LLMs more reliable. ## Community
[![Contributors][contributors-badge]][contributors] [![Stars][stars-badge]][stars] [![Downloads][downloads-badge]][pypistats] [![Discord badge][discord-badge]][discord]
- 💡 **Have an idea?** Come chat with us on [Discord][discord] - 🐞 **Found a bug?** Open an [issue](https://github.com/dottxt-ai/outlines/issues) - 🧩 **Want to contribute?** Consult our [contribution guide](https://dottxt-ai.github.io/outlines/latest/community/contribute/). ## Cite Outlines ``` @article{willard2023efficient, title={Efficient Guided Generation for Large Language Models}, author={Willard, Brandon T and Louf, R{\'e}mi}, journal={arXiv preprint arXiv:2307.09702}, year={2023} } ``` [contributors]: https://github.com/dottxt-ai/outlines/graphs/contributors [contributors-badge]: https://img.shields.io/github/contributors/dottxt-ai/outlines?style=flat-square&logo=github&logoColor=white&color=ECEFF4 [dottxt-blog]: https://blog.dottxt.co/ [dottxt-blog-badge]: https://img.shields.io/badge/dottxt%20blog-a6b4a3 [dottxt-twitter]: https://twitter.com/dottxtai [dottxt-twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=social [discord]: https://discord.gg/R9DSu34mGd [discord-badge]: https://img.shields.io/discord/1182316225284554793?color=ddb8ca&logo=discord&logoColor=white&style=flat-square [downloads-badge]: https://img.shields.io/pypi/dm/outlines?color=A6B4A3&logo=python&logoColor=white&style=flat-square [pypistats]: https://pypistats.org/packages/outlines [pypi-version-badge]: https://img.shields.io/pypi/v/outlines?style=flat-square&logoColor=white&color=ddb8ca [pypi]: https://pypi.org/project/outlines/ [stars]: https://github.com/dottxt-ai/outlines/stargazers [stars-badge]: https://img.shields.io/github/stars/dottxt-ai/outlines?style=flat-square&logo=github&color=BD932F&logoColor=white [twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=flat-square&logo=x&logoColor=white&color=bd932f [twitter]: https://x.com/dottxtai ================================================ FILE: docs/api_reference/index.md ================================================ # API Reference ================================================ FILE: docs/blog/index.md ================================================ # Blog ================================================ FILE: docs/community/contribute.md ================================================ --- title: Contribute --- ## What contributions? - **Documentation** contributions are very valuable to us! - **Examples.** Show us what you did with Outlines :) - **Bug reports** with a minimum working examples in the [issue tracker][issues] - **Bug fixes** are always a pleasure to review. - **New features**. Please start a new [discussion][discussions], or [come chat with us][discord] beforehand! Note that the [issue tracker][issues] is only intended for actionable items. In doubt, open a [discussion][discussions] or [come talk to us][discord]. ## How to contribute? ### Setup First, [fork the repository on GitHub](https://github.com/dottxt-ai/outlines/fork) and clone the fork locally: ```shell git clone git@github.com/YourUserName/outlines.git cd outlines ``` Create a new virtual environment: *If you are using `uv`*: ```shell uv venv source .venv/bin/activate alias pip="uv pip" # ... or just remember to prepend any pip command with uv in the rest of this guide ``` *If you are using `venv`*: ```shell python -m venv .venv source .venv/bin/activate ``` *If you are using `conda`*: ```shell conda env create -f environment.yml ``` Then install the dependencies in editable mode, and install the `pre-commit` hooks: ```shell pip install -e ".[test]" pre-commit install ``` If you own a GPU and want to run the vLLM tests you will have to run: ```shell pip install -e ".[test-gpu]" ``` instead. Outlines provides optional dependencies for different supported backends, which you can install with ```shell pip install ".[vllm]" ``` A list of supported optional dependencies can be found in the [installation guide](/installation). ### Using VSCode DevContainer / GitHub Codespaces If you want a fully pre-configured development environment, you can use VSCode DevContainers or GitHub Codespaces. #### VSCode DevContainer 1. Ensure that the [Docker](https://www.docker.com/get-started/) daemon is running on your machine. 2. Install the [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension in VSCode. 3. Open the Outlines repository in VSCode. When prompted, **Reopen in Container** (or press `F1` and select "Remote-Containers: Reopen in Container"). 4. Run the normal setup steps. Your environment will not complain about missing system dependencies! #### GitHub Codespaces 1. Navigate to the Outlines repository on GitHub. 2. Click on the **Code** button and select the **Codespaces** tab. 3. Click **Create codespace on main** (or another branch you are working on). 4. GitHub will launch a pre-configured cloud development environment. You will not have access to a GPU, but you'll be able to make basic contributions to the project on the go while using a fully featured web-based IDE. ### Before pushing your code Run the tests: ```shell pytest ``` And run the code style checks: ```shell pre-commit run --all-files ``` ### Benchmarking Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degradation. You can run the benchmark test suite locally with the following command: ```shell asv run --config benchmarks/asv.conf.json ``` Caveats: - If you're on a device with CUDA, you must add the argument `--launch-method spawn` - Uncommitted code will not be benchmarked, you must first commit your changes. #### Run a specific test: ```shell asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm ``` #### Profile a specific test: ```shell asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm ``` #### Compare to `origin/main` ```shell get fetch origin asv continuous origin/main HEAD --config benchmarks/asv.conf.json ``` #### ASV PR Behavior - **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section. - Merging is blocked unless benchmarks are run for the latest commit. - Benchmarks fail if performance degrades by more than 10% for any individual benchmark. - The "Benchmark PR" workflow runs when it is manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit. ### Contribute to the documentation To work on the *documentation* you will need to install the related dependencies: ```shell pip install -r requirements-doc.txt ``` To build the documentation and serve it locally, run the following command in the repository's root folder: ```shell mkdocs serve ``` By following the instruction you will be able to view the documentation locally. It will be updated every time you make a change. ## Open a Pull Request Create a new branch on your fork, commit and push the changes: ```shell git checkout -b new-branch git add . git commit -m "Changes I made" git push origin new-branch ``` Then you can [open a pull request][pull-requests] on GitHub. It should prompt you to do so. Every subsequent change that you make on your branch will update the pull request. Do not hesitate to open a draft PR before your contribution is ready, especially if you have questions and/or need feedback. If you need help, come tell us on [Discord][discord]. [discord]: https://discord.gg/R9DSu34mGd [discussions]: https://github.com/dottxt-ai/outlines/discussions [issues]: https://github.com/dottxt-ai/outlines/issues [pull-requests]: https://github.com/dottxt-ai/outlines/pulls ================================================ FILE: docs/community/examples.md ================================================ # Community projects and articles Publishing examples and articles about Outlines are a meaningful way to contribute to the community. Here is a list of projects we are aware of. Drop us a line if we forgot yours! [MMSG](https://github.com/leloykun/mmsg) is a Python library for generating interleaved text and image content in a structured format you can directly pass to downstream APIs. [Multimodal Structured Generation: CVPR's 2nd MMFM Challenge Technical Report](https://arxiv.org/abs/2406.11403) shows that Structured Generation can outperform finetuning, and maybe even multimodality, in document-image understanding tasks as part of CVPR's 2nd MMFM Challenge. [Chess LLM Arena](https://huggingface.co/spaces/mlabonne/chessllm) is a HuggingFace Space where you can make LLMs compete in a chess match. [LLM Data Gen](https://huggingface.co/spaces/lhoestq/LLM_DataGen) is a HuggingFace Space that generates synthetic dataset files in JSONLines format. [Fast, High-Fidelity LLM Decoding with Regex Constraints ](https://vivien000.github.io/blog/journal/llm-decoding-with-regex-constraints.html) presents an efficient alternative to Outlines's structured generation. [gigax](https://github.com/GigaxGames/gigax) is an Open-Source library that allows to create real-time LLM-powered NPCs for video games. [Improving Prompt Consistency with Structured Generations](https://huggingface.co/blog/evaluation-structured-outputs) shows how structured generation can improve consistency of evaluation runs by reducing sensitivity to changes in prompt format. [AskNews](https://asknews.app) is a news curation service processing 300k news articles per day in a structured way, with Outlines. ================================================ FILE: docs/community/feedback.md ================================================ --- title: Feedback --- # Feedback If Outlines has been helpful to you, let us know on [Discord][discord] or give us a shoutout on [Twitter][twitter]! It's always heartwarming ❤️
# Let us know! We highly value the insights of our users, and we would love to hear from you. If you are using Outlines for your projects and would like to share your experience with us, let's connect: - What are you building with it? - What do you like about it? - What challenges are you facing? - What do you think could be improved? To schedule an appointment follow [this link](https://cal.com/dottxt/outlines). This is exclusively intended to share your experience, please go on [Discord][discord] or [GitHub](https://github.com/dottxt-ai/outlines/discussions) for support. [discord]: https://discord.gg/UppQmhEpe8 [twitter]: https://twitter.com/dottxtai ================================================ FILE: docs/community/index.md ================================================ # Community Outlines exists for a community of users who believe software doesn't need to be complicated. Who share the same passion for Large Language Models but don't want to compromise on robustness. Together, we are bringing these powerful models back to the world of software. ## Connect on Discord The Outlines community lives on our Discord server. There you can ask questions, share ideas or just chat with people like you. Don't be a stranger and [join us][discord]. [discord]: https://discord.gg/UppQmhEpe8 ================================================ FILE: docs/community/versioning.md ================================================ --- title: Versioning Guide --- # Versioning Guide The Outlines project follows a structured versioning scheme designed to provide clarity and minimize risk for downstream dependents. Each part of the version number (`major.minor.patch`) conveys information about the nature and impact of the changes included in the release. - **Major Releases** includes compatibility-breaking changes to core interfaces, such as `LogitsProcessor`s and `Guides`. - **Minor Releases** introduce changes of substance to internal or unexposed functionality. These changes are well tested and intended to maintain compatibility with existing use of core interfaces. - **Patch Releases** address bug fixes and incorporate low-risk changes to improve stability and performance. !!! note "Breaking Changes" Outlines v1.0 introduced several breaking changes to the core interface. See [the migration guide](/user_guide/migration) for more details. ## Releases Releases along with release notes can be found on the [Outlines Releases GitHub Page](https://github.com/dottxt-ai/outlines/releases). ## Version Pinning Recommendations Here are our recommendations for managing dependencies on the Outlines package: **Small, Risk-Tolerant Projects:** Pin to a specific major version. **Large, Conservative Projects:** Pin to a specific minor version. ================================================ FILE: docs/core_concepts.md ================================================ --- title: Core concepts --- # Core concepts Coming soon. This will document various concepts at a high level, so users can understand Outlines before diving into specific implementations. 1. Constrained decoding, tokens, and the basics of logit biasing 2. Different ways to define output structure (regex, JSON schema, Pydantic models, context-free grammars) 3. How finite state machines are used to guarantee output structure 4. `Generator`, `Application`, `Template`, 5. Prompt engineering vs. structured generation ================================================ FILE: docs/examples/chain_of_density.md ================================================ # Summarize documents using Chain of Density prompting A good summary should be informative, concise and clear. While large language models are generally good at summarizing documents, their summaries tend to be long and contain redundant information; their information density tends to be on the lower end. This is where [chain of Density](https://arxiv.org/abs/2309.04269), a new prompting technique, comes in. In this example we will show how one can implement chain of density with a few lines of code using Outlines, leveraging both Outline's prompt templating and its structured generation capabilities. The article we will try to summarize is the first three paragraphs of the [Alan Turing page on Wikipedia](https://en.wikipedia.org/wiki/Alan_Turing): ```python article = """ Alan Mathison Turing OBE FRS (/ˈtjʊərɪŋ/; 23 June 1912 – 7 June 1954) was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.[5] Turing was highly influential in the development of theoretical computer science, providing a formalisation of the concepts of algorithm and computation with the Turing machine, which can be considered a model of a general-purpose computer.[6][7][8] He is widely considered to be the father of theoretical computer science and artificial intelligence.[9] Born in Maida Vale, London, Turing was raised in southern England. He graduated at King's College, Cambridge, with a degree in mathematics. Whilst he was a fellow at Cambridge, he published a proof demonstrating that some purely mathematical yes–no questions can never be answered by computation. He defined a Turing machine and proved that the halting problem for Turing machines is undecidable. In 1938, he obtained his PhD from the Department of Mathematics at Princeton University. During the Second World War, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence. For a time he led Hut 8, the section that was responsible for German naval cryptanalysis. Here, he devised a number of techniques for speeding the breaking of German ciphers, including improvements to the pre-war Polish bomba method, an electromechanical machine that could find settings for the Enigma machine. Turing played a crucial role in cracking intercepted coded messages that enabled the Allies to defeat the Axis powers in many crucial engagements, including the Battle of the Atlantic.[10][11] After the war, Turing worked at the National Physical Laboratory, where he designed the Automatic Computing Engine, one of the first designs for a stored-program computer. In 1948, Turing joined Max Newman's Computing Machine Laboratory at the Victoria University of Manchester, where he helped develop the Manchester computers[12] and became interested in mathematical biology. He wrote a paper on the chemical basis of morphogenesis[1] and predicted oscillating chemical reactions such as the Belousov–Zhabotinsky reaction, first observed in the 1960s. Despite these accomplishments, Turing was never fully recognised in Britain during his lifetime because much of his work was covered by the Official Secrets Act.[13] """ ``` ## How Chain Of Density works Chain Of Density starts with asking the model to generate a first long and non-specific summary. Then it asks the model to generate 4 extra summaries by proceeding in the following way: 1. Identify 1-3 entities missing in the previous summary; 2. Add all entities marked as missing in the previous step, while not dropping entities; 3. Make the summary more concise; The prompt also asks the model to return a list of JSON objects that contain the missing entities and the new summary. This is where structured generation will come in handy :) The paper provides the prompt and an example: ![Figure 2 in the paper](./images/chain_of_density.png) We can now implement the prompt provided in the paper. We stored the prompt template in a text file, and we can load it using the `Template` class: ```python from outlines import Template chain_of_density = Template.from_file("prompt_templates/chain_of_density.txt") ``` ??? Note Note that we modified the prompt slightly so it returns a JSON object that contains the summaries, instead of a list of summaries. ## Outlines implementation We will use Outline's JSON-structured generation to ensure that the model's output is consistent with the format specified in the prompt. We start with defining the JSON objects that the model is asked to return using Pydantic. One JSON object that contains a list of `Summary` objects that contain the missing entities and new summary: ```python from pydantic import BaseModel, conlist class Summary(BaseModel): missing_entities: str denser_summary: str class Summaries(BaseModel): summaries: conlist(Summary, max_length=5, min_length=5) ``` We now generate the prompt by passing the article we want to summarize to the prompt template previously loaded. We load a quantized version of Mistral-7B using the AutoAWQ library, and then use the `Summaries` schema to generate the summaries with structured generation: ```python import outlines import transformers MODEL_NAME = "TheBloke/Mistral-7B-OpenOrca-AWQ" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME), transformers.AutoTokenizer.from_pretrained(MODEL_NAME) ) prompt = chain_of_density(article=article) result = model(prompt, Summaries, max_new_tokens=2000) ``` We can now check the results: ```python print(result) # {'summaries': [ # { # 'missing_entities': 'English mathematician, cryptanalyst, philosopher', # 'denser_summary': 'Alan Mathison Turing was an English mathematician, cryptanalyst, philosopher.' # }, # { # 'missing_entities': '', # 'denser_summary': "Alan Mathison Turing was an English mathematician who was a crucial figure in WW2's Bletchley Park codebreaking centre and designed one of the first computers." # }, # { # 'missing_entities': 'cryptanalyst, studied, biology, father', # 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and contributed to mathematical biology.' # }, # { # 'missing_entities': 'biology, morphogenesis, chemical', # 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and predicted chemical reactions in morphogenesis. # '}, # { # 'missing_entities': '', # 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, developed computer science, and made strides in mathematical biology research.' # } # ]} ``` Not bad, considering we used a smallish model to generate the summary! Chain of Density seems to be a very effective prompting technique to generate dense summaries, even with small quantized models. Its implementation in Outlines is also very short. Note that this is the first article I tried and it worked out of the box. Try it out on other articles, and please share the results on Twitter, or by opening [a new discussion](https://github.com/dottxt-ai/outlines/discussions/categories/show-and-tell) on the Outlines repository! ================================================ FILE: docs/examples/chain_of_thought.md ================================================ # Chain of thought Chain of thought is a prompting technique introduced in the paper ["Chain-of-Thought Prompting Elicits Reasoning in Large Language Models"](https://arxiv.org/abs/2201.11903) where throught prompting the authors generate a series of intermediate reasoning steps which improves the ability of LLMs to perform complex reasoning. In this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to apply chain of thought through structured output. We use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves: ```shell pip install llama-cpp-python ``` To create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub): ```python import llama_cpp import outlines llm = llama_cpp.Llama( "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B" ), n_gpu_layers=-1, flash_attn=True, n_ctx=8192, verbose=False ) model = outlines.from_llamacpp(llm) ``` ??? note "(Optional) Store the model weights in a custom folder" By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/): ```shell wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf ``` We initialize the model: ```python from llama_cpp import Llama llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...) ``` ## Chain of thought We first define our Pydantic class for a reasoning step: ```python from pydantic import BaseModel, Field class Reasoning_Step(BaseModel): reasoning_step: str = Field(..., description="Reasoning step") ``` We then define the Pydantic class for reasoning which will consist on a list of reasoning steps and a conclusion, and we get its JSON schema: ```python from typing import List class Reasoning(BaseModel): reasoning: List[Reasoning_Step] = Field(..., description="List of reasoning steps") conclusion: str = Field(..., description="Conclusion") json_schema = Reasoning.model_json_schema() ``` We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs): ```python from outlines import Template generate_hermes_prompt = Template.from_string( """ <|im_start|>system You are a world class AI model who answers questions in JSON Here's the json schema you must adhere to: {{ json_schema }} <|im_end|> <|im_start|>user {{ user_prompt }} <|im_end|> <|im_start|>assistant """ ) ``` For a given user prompt: ```python user_prompt = "9.11 and 9.9 -- which is bigger?" ``` We can use `outlines.Generator` with the Pydantic class we previously defined, and call the generator with the Hermes prompt: ```python generator = outlines.Generator(model, regex_str) prompt = generate_hermes_prompt(json_schema=json_schema, user_prompt=user_prompt) response = generator(prompt, max_tokens=1024, temperature=0, seed=42) ``` We obtain a series of intermediate reasoning steps as well as the conclusion: ```python import json json_response = json.loads(response) print(json_response["reasoning"]) print(json_response["conclusion"]) # [{'reasoning_step': 'Both 9.11 and 9.9 are decimal numbers.'}, # {'reasoning_step': 'When comparing decimal numbers, we look at the numbers after the decimal point.'}, # {'reasoning_step': 'In this case, 9.11 has the number 1 after the decimal point, while 9.9 has the number 9.'}, # {'reasoning_step': 'Since 1 is greater than 9, 9.11 is greater than 9.9.'}] # '9.11 is bigger.' ``` We notice that the 4th reasoning step is wrong ``Since 1 is greater than 9, 9.11 is greater than 9.9.'', so we should probably give the model some examples for this particular task. This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende). ================================================ FILE: docs/examples/classification.md ================================================ # Classification Classification is a classic problem in NLP and finds many applications: spam detection, sentiment analysis, triaging of incoming requests, etc. We will use the example of a company that wants to sort support requests between those that require immediate attention (`URGENT`), those that can wait a little (`STANDARD`). You could easily extend the example by adding new labels. This tutorial shows how one can implement multi-label classification using Outlines. As always, we start with initializing the model. Since we are GPU poor we will be using a quantized version of Mistal-7B-v0.1: ```python import outlines import transformers MODEL_NAME = "TheBloke/Mistral-7B-OpenOrca-AWQ" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME), transformers.AutoTokenizer.from_pretrained(MODEL_NAME) ) ``` We will use a prompt template stored in a text file: ```python from outlines import Template customer_support = Template.from_file("prompt_templates/classification.txt") ``` ## Choosing between multiple choices Outlines provides a convenient way to do multi-label classification, passing a Literal type hint to the `outlines.Generator` object: ```python from typing import Literal import outlines generator = outlines.Generator(model, Literal["URGENT", "STANDARD"]) ``` Outlines supports batched requests, so we will pass two requests to the model: ```python requests = [ "My hair is one fire! Please help me!!!", "Just wanted to say hi" ] prompts = [customer_support(request=request) for request in requests] ``` We can now ask the model to classify the requests: ```python labels = generator(prompts) print(labels) # ['URGENT', 'STANDARD'] ``` ## Using JSON-structured generation Another (convoluted) way to do multi-label classification is to JSON-structured generation in Outlines. We first need to define our Pydantic schema that contains the labels: ```python from enum import Enum from pydantic import BaseModel class Label(str, Enum): urgent = "URGENT" standard = "STANDARD" class Classification(BaseModel): label: Label ``` We can then create a generator with the Pydantic model we just defined and call it: ```python generator = outlines.Generator(model, Classification) labels = generator(prompts) print(labels) # ['{"label":"URGENT"}', '{ "label": "STANDARD" }'] ``` ================================================ FILE: docs/examples/dating_profiles.md ================================================ # Generate a synthetic dating profile from a description In this example we will see how we can use Outlines to generate synthetic data for a dating application. This example was originally contributed by [Vibhor Kumar](https://github.com/veezbo). ```python import json from dataclasses import dataclass from enum import Enum import torch import transformers from pydantic import BaseModel, conlist, constr import outlines ``` ## Defining the profile with Pydantic Here a dating profile will consist in a biography, a job, a list of interests and two question-answer pairs. The questions are written in advance by the team, and the users are asked to provide an answer: ```python class QuestionChoice(str, Enum): A = "The key to my heart is" B = "The first item on my bucket list is" C = "Perks of dating me" D = "Message me if you also love" E = "People would describe me as" F = "I can beat you in a game of" @dataclass class QuestionAnswer: question: QuestionChoice answer: str ``` Users need to provide a short biography, with a minimum of 10 and a maximum of 300 characters. The application also limits job descriptions to 50 characters. In addition to the question-answer pairs, the user is required to provide a list of between 1 and 5 interests: ```python class DatingProfile(BaseModel): bio: constr(str, min_length=10, max_length=300) job: constr(str, max_lengt=50) interests: conlist(str, min_length=1, max_length=5) # type: ignore qna1: QuestionAnswer qna2: QuestionAnswer ``` ## Prompt template and examples We will ask the model to generate profiles from a high-level description: ```python @dataclass class Example: description: str profile: DatingProfile ``` We will use Outlines' prompt templating abilities to generate the prompt for us. This help clearly separate the general prompting logic from what is specific to an example. ```python from outlines import Template dating_profile_prompt = Template.from_string( """ You are a world-renowned matchmaker who understands the modern dating market. Your job is to generate dating app profiles for male clients interested in women based on a provided description. The profiles should be authentic, show off their strengths, and maximize their likelihood of getting matches on dating apps. Here are some examples of past clients that you have successfully created profiles for: {% for example in examples %} Description: {{ example.description }} Profile: {{ example.profile }} {% endfor %} Here is the new client who you need to create a profile for: Description: {{ description }} Profile: """ ) ``` We will provide the model with several few-shot examples: ```python samples: list[Example] = [ Example( description="I'm an author and former professional soccer player living in Seattle who publishes popular fiction books. A typical day for me starts by hanging out with my cat, drinking a coffee, and reading as much as I can in a few hours. Then, I'll prepare a quick smoothie before starting to write for a few hours, take a break with soccer or running a few miles, and finally meet friends for dinner at a new, hip restaurant in the evening. Sometimes we go axe-throwing afterwards, or play poker, or watch a comedy show, or visit a dive bar. On my vacations, I travel extensively to countries South America, Europe, and Asia, with the goal of visiting them all!", profile=DatingProfile( bio="Adventurer, dreamer, author, and soccer enthusiast. Life’s too short to waste time so I make the most of each day by exploring new places and playing with my friends on the pitch. What’s your favorite way to get out and have fun?", job="Famous Soccer Player -> Famous Author", interests=["Soccer", "Travel", "Friends", "Books", "Fluffy Animals"], qna1=QuestionAnswer( question=QuestionChoice.B, answer="swim in all seven oceans!" ), qna2=QuestionAnswer( question=QuestionChoice.E, answer="fun-loving, adventurous, and a little bit crazy", ), ), ), Example( description="I run my company and build houses for a living. I'm a big fan of the outdoors and love to go hiking, camping, and fishing. I don't like video games, but do like to watch movies. My love language is home-cooked food, and I'm looking for someone who isn't afraid to get their hands dirty.", profile=DatingProfile( bio="If you're looking for a Montana man who loves to get outdoors and hunt, and who's in-tune with his masculinity then I'm your guy!", job="House Construction Manager / Entrepreneur", interests=["Hunting", "Hiking", "The outdoors", "Home-cooked food"], qna1=QuestionAnswer(question=QuestionChoice.A, answer="food made at home"), qna2=QuestionAnswer( question=QuestionChoice.C, answer="having a man in your life who can fix anything", ), ), ), Example( description="I run my own Youtube channel with 10M subscribers. I love working with kids, and my audience skews pretty young too. In my free time, I play Fortnite and Roblox. I'm looking for someone who is also a gamer and likes to have fun. I'm learning Japanese in my free time as well as how to cook.", profile=DatingProfile( bio="Easy on the eyes (find me on Youtube!) and great with kids. What more do you need?", job="Youtuber 10M+ subscribers", interests=["Kids", "Gaming", "Japanese"], qna1=QuestionAnswer(question=QuestionChoice.D, answer="anime and gaming!"), qna2=QuestionAnswer(question=QuestionChoice.F, answer="Fortnite, gg ez"), ), ), ] ``` ## Load the model We will use Mosaic's MPT-7B model (requires 13GB of GPU memory) which can fit on a single GPU with a reasonable context window. We initialize it with Outlines: ```python MODEL_NAME = "mosaicml/mpt-7b-8k-instruct" config = transformers.AutoConfig.from_pretrained( MODEL_NAME, trust_remote_code=True ) config.init_device = "meta" model_kwargs = { "config": config, "trust_remote_code": True, "torch_dtype": torch.bfloat16, "device_map": "cuda", } tf_model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME, **model_kwargs) tf_tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) model = outlines.from_transformers(tf_model, tokenizer=tf_tokenizer) ``` ## JSON-structured generation of profiles We will now generate a dating profile from a textual description of oneself: ``` python new_description = """I'm a laid-back lawyer who spends a lot of his free-time gaming. I work in a corporate office, but ended up here after the start-up I cofounded got acquired, so still play ping pong with my cool coworkers every day. I have a bar at home where I make cocktails, which is great for entertaining friends. I secretly like to wear suits and get a new one tailored every few months. I also like weddings because I get to wear those suits, and it's a good excuse for a date. I watch the latest series because I'm paying, with my hard-earned money, for every streaming service.""" prompt = dating_profile_prompt(description=new_description, examples=samples) profile = model(prompt, DatingProfile) parsed_profile = DatingProfile.model_validate_json(json.loads(profile)) ``` ## Results Here are a couple of results: ```json { "bio": """I'm an ambitious lawyer with a casual and fashionable style. I love games and sports, but my true passion is preparing refreshing cocktails at home and dressing to the nines at weddings. I'm currently looking for a woman to show a good time to and get a kiss on the opulent suit I just had made. Send resume to this inbox.""", "job": "Lawyer", "interests": [ "Stylish guys", "Gaming", "Ping pong", "Cocktails", "Weddings" ], "qna1": { "question": "The first item on my bucket list is", "answer": "be married and have a family." }, "qna2": { "question": "People would describe me as", "answer": "charming, stylish, and funny." } } ``` ```json { "bio": """I’m a sexy lawyer with time on my hands. I love to game and play ping pong, but the real reason you should swipe to the right is because I look great in a suit. Who doesn’t love a man in a suit? Just saying. Send me a message if you think it’s time to take your dating life to the next level.""", "job": "Lawyer", "interests": [ "Gaming", "Ping Pong", "Tailored Suits", "Weddings", "Streaming Services" ], "qna1": { "question": "The first item on my bucket list is", "answer": "simulate space but stay alive for as long as possible" }, "qna2": { "question": "People would describe me as", "answer": "easy-going, a little nerdy but with a mature essence" } } ``` ================================================ FILE: docs/examples/deploy-using-bentoml.md ================================================ # Run Outlines using BentoML [BentoML](https://github.com/bentoml/BentoML) is an open-source model serving library for building performant and scalable AI applications with Python. It comes with tools that you need for serving optimization, model packaging, and production deployment. In this guide, we will show you how to use BentoML to run programs written with Outlines on GPU locally and in [BentoCloud](https://www.bentoml.com/), an AI Inference Platform for enterprise AI teams. The example source code in this guide is also available in the [examples/bentoml/](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/) directory. ## Import a model First we need to download an LLM (Mistral-7B-v0.1 in this example and you can use any other LLM) and import the model into BentoML's [Model Store](https://docs.bentoml.com/en/latest/guides/model-store.html). Let's install BentoML and other dependencies from PyPi (preferably in a virtual environment): ```shell pip install -r requirements.txt ``` Then save the code snippet below as `import_model.py` and run `python import_model.py`. **Note**: You need to accept related conditions on [Hugging Face](https://huggingface.co/mistralai/Mistral-7B-v0.1) first to gain access to Mistral-7B-v0.1. ```python import bentoml MODEL_ID = "mistralai/Mistral-7B-v0.1" BENTO_MODEL_TAG = MODEL_ID.lower().replace("/", "--") def import_model(model_id, bento_model_tag): import torch from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16, low_cpu_mem_usage=True, ) with bentoml.models.create(bento_model_tag) as bento_model_ref: tokenizer.save_pretrained(bento_model_ref.path) model.save_pretrained(bento_model_ref.path) if __name__ == "__main__": import_model(MODEL_ID, BENTO_MODEL_TAG) ``` You can verify the download is successful by running: ```shell $ bentoml models list Tag Module Size Creation Time mistralai--mistral-7b-v0.1:m7lmf5ac2cmubnnz 13.49 GiB 2024-04-25 06:52:39 ``` ## Define a BentoML Service As the model is ready, we can define a [BentoML Service](https://docs.bentoml.com/en/latest/guides/services.html) to wrap the capabilities of the model. We will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema: ```python DEFAULT_SCHEMA = """{ "title": "Character", "type": "object", "properties": { "name": { "title": "Name", "maxLength": 10, "type": "string" }, "age": { "title": "Age", "type": "integer" }, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": { "title": "Strength", "type": "integer" } }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string" }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string" } } }""" ``` First, we need to define a BentoML service by decorating an ordinary class (`Outlines` here) with `@bentoml.service` decorator. We pass to this decorator some configuration and GPU on which we want this service to run in BentoCloud (here an L4 with 24GB memory): ```python import typing as t import bentoml from import_model import BENTO_MODEL_TAG @bentoml.service( traffic={ "timeout": 300, }, resources={ "gpu": 1, "gpu_type": "nvidia-l4", }, ) class Outlines: bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG) def __init__(self) -> None: import outlines import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load tokenizer and model from the BentoML model reference path hf_tokenizer = AutoTokenizer.from_pretrained(self.bento_model_ref.path) hf_model = AutoModelForCausalLM.from_pretrained( self.bento_model_ref.path, torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map="cuda" ) # Then use the loaded model with Outlines self.model = outlines.from_transformers(hf_model, hf_tokenizer) ... ``` We then need to define an HTTP endpoint using `@bentoml.api` to decorate the method `generate` of `Outlines` class: ```python ... @bentoml.api async def generate( self, prompt: str = "Give me a character description.", json_schema: t.Optional[str] = DEFAULT_SCHEMA, ) -> t.Dict[str, t.Any]: import json import outlines from outlines.types import JsonSchema generator = outlines.Generator(self.model, JsonSchema(json_schema)) character = generator(prompt) return json.loads(character) ``` Here `@bentoml.api` decorator defines `generate` as an HTTP endpoint that accepts a JSON request body with two fields: `prompt` and `json_schema` (optional, which allows HTTP clients to provide their own JSON schema). The type hints in the function signature will be used to validate incoming JSON requests. You can define as many HTTP endpoints as you want by using `@bentoml.api` to decorate other methods of `Outlines` class. Now you can save the above code to `service.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/)), and run the code using the BentoML CLI. ## Run locally for testing and debugging Then you can run a server locally by: ```shell bentoml serve . ``` The server is now active at . You can interact with it using the Swagger UI or in other different ways:
CURL ```shell curl -X 'POST' \ 'http://localhost:3000/generate' \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "prompt": "Give me a character description." }' ```
Python client ```python import bentoml with bentoml.SyncHTTPClient("http://localhost:3000") as client: response = client.generate( prompt="Give me a character description" ) print(response) ```
Expected output: ```shell { "name": "Aura", "age": 15, "armor": "plate", "weapon": "sword", "strength": 20 } ``` ## Deploy to BentoCloud After the Service is ready, you can deploy it to [BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/get-started.html) for better management and scalability. [Sign up](https://cloud.bentoml.com/signup) if you haven't got a BentoCloud account. Make sure you have [logged in to BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html), then run the following command to deploy it. ```shell bentoml deploy . ``` Once the application is up and running on BentoCloud, you can access it via the exposed URL. **Note**: For custom deployment in your own infrastructure, use [BentoML to generate an OCI-compliant image](https://docs.bentoml.com/en/latest/guides/containerization.html). ================================================ FILE: docs/examples/deploy-using-cerebrium.md ================================================ # Run Outlines using Cerebrium [Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications. They offer Serverless GPU's with low cold start times with over 12 varieties of GPU chips that auto scale and you only pay for the compute you use. In this guide we will show you how you can use Cerebrium to run programs written with Outlines on GPUs in the cloud. # Setup Cerebrium First, we install Cerebrium and login to get authenticated. ```shell pip install cerebrium cerebrium login ``` Then let us create our first project ```shell cerebrium init outlines-project ``` ## Setup Environment and Hardware You set up your environment and hardware in the cerebrium.toml file that was created using the init function above. ```toml [cerebrium.deployment] docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04" [cerebrium.hardware] cpu = 2 memory = 14.0 gpu = "AMPERE A10" gpu_count = 1 provider = "aws" region = "us-east-1" [cerebrium.dependencies.pip] outline = "==1.0.0" transformers = "==4.38.2" datasets = "==2.18.0" accelerate = "==0.27.2" ``` ## Setup inference Running code in Cerebrium is like writing normal python with no special syntax. In a `main.py` file specify the following: ```python import outlines import transformers from outlines.types import JsonSchema model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) schema = """{ "title": "Character", "type": "object", "properties": { "name": { "title": "Name", "maxLength": 10, "type": "string" }, "age": { "title": "Age", "type": "integer" }, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": { "title": "Strength", "type": "integer" } }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string" }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string" } } }""" generator = outlines.Generator(model, JsonSchema(schema)) ``` On first deploy, it will download the model and store it on disk therefore for subsequent calls it will load the model from disk. Every function in Cerebrium is callable through an API endpoint. Code at the top most layer (ie: not in a function) is instantiated only when the container is spun up the first time so for subsequent calls, it will simply run the code defined in the function you call. To deploy an API that creates a new character when called with a prompt you can add the following code to `main.py`: ```python def generate( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): character = generator( f"[INST]Give me a character description. Describe {prompt}.[/INST]" ) return character ``` ## Run on the cloud ```shell cerebrium deploy ``` You will see your application deploy, install pip packages and download the model. Once completed it will output a CURL request you can use to call your endpoint. Just remember to end the url with the function you would like to call - in this case /generate. You should see your response returned! ================================================ FILE: docs/examples/deploy-using-modal.md ================================================ # Run Outlines using Modal [Modal](https://modal.com/) is a serverless platform that allows you to easily run code on the cloud, including GPUs. It can come very handy for those of us who don't have a monster GPU at home and want to be able to quickly and easily provision, configure and orchestrate cloud infrastructure. In this guide we will show you how you can use Modal to run programs written with Outlines on GPU in the cloud. ## Requirements We recommend installing `modal` and `outlines` in a virtual environment. You can create one with: ```shell python -m venv venv source venv/bin/activate ``` Then install the required packages: ```shell pip install modal outlines ``` ## Build the image First we need to define our container image. If you need to access a gated model, you will need to provide an [access token](https://huggingface.co/settings/tokens). See the `.env` call below for how to provide a HuggingFace token. Setting a token is best done by setting an environment variable `HF_TOKEN` with your token. If you do not wish to do this, we provide a commented-out line in the code to set the token directly in the code. ```python from modal import Image, App, gpu import os # This creates a modal App object. Here we set the name to "outlines-app". # There are other optional parameters like modal secrets, schedules, etc. # See the documentation here: https://modal.com/docs/reference/modal.App app = App(name="outlines-app") # Specify a language model to use. # Another good model to use is "NousResearch/Hermes-2-Pro-Mistral-7B" language_model = "mistral-community/Mistral-7B-v0.2" # Please set an environment variable HF_TOKEN with your Hugging Face API token. # The code below (the .env({...}) part) will copy the token from your local # environment to the container. # More info on Image here: https://modal.com/docs/reference/modal.Image outlines_image = Image.debian_slim(python_version="3.11").pip_install( "outlines", "transformers", "datasets", "accelerate", "sentencepiece", ).env({ # This will pull in your HF_TOKEN environment variable if you have one. 'HF_TOKEN':os.environ['HF_TOKEN'] # To set the token directly in the code, uncomment the line below and replace # 'YOUR_TOKEN' with the HuggingFace access token. # 'HF_TOKEN':'YOUR_TOKEN' }) ``` ## Setting the container up When running longer Modal apps, it's recommended to download your language model when the container starts, rather than when the function is called. This will cache the model for future runs. ```python # This function imports the model from Hugging Face. The modal container # will call this function when it starts up. This is useful for # downloading models, setting up environment variables, etc. def import_model(): import outlines import transformers outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(language_model), transformers.AutoTokenizer.from_pretrained(language_model) ) # This line tells the container to run the import_model function when it starts. outlines_image = outlines_image.run_function(import_model) ``` ## Define a schema We will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema: ```python # Specify a schema for the character description. In this case, # we want to generate a character with a name, age, armor, weapon, and strength. schema = """{ "title": "Character", "type": "object", "properties": { "name": { "title": "Name", "maxLength": 10, "type": "string" }, "age": { "title": "Age", "type": "integer" }, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": { "title": "Strength", "type": "integer" } }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string" }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string" } } }""" ``` To make the inference work on Modal we need to wrap the corresponding function in a `@app.function` decorator. We pass to this decorator the image and GPU on which we want this function to run. Let's choose an A100 with 80GB memory. Valid GPUs can be found [here](https://modal.com/docs/reference/modal.gpu). ```python # Define a function that uses the image we chose, and specify the GPU # and memory we want to use. @app.function(image=outlines_image, gpu=gpu.A100(size='80GB')) def generate( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): # Remember, this function is being executed in the container, # so we need to import the necessary libraries here. You should # do this with any other libraries you might need. import outlines import transformers from outlines.types import JsonSchema # Load the model into memory. The import_model function above # should have already downloaded the model, so this call # only loads the model into GPU memory. outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(language_model, device_map="cuda"), transformers.AutoTokenizer.from_pretrained(language_model) ) # Generate a character description based on the prompt. # We use the .json generation method -- we provide the # - model: the model we loaded above # - schema: the JSON schema we defined above generator = outlines.Generator(model, JsonSchema(schema)) # Make sure you wrap your prompt in instruction tags ([INST] and [/INST]) # to indicate that the prompt is an instruction. Instruction tags can vary # by models, so make sure to check the model's documentation. character = generator( f"[INST]Give me a character description. Describe {prompt}.[/INST]" ) # Print out the generated character. print(character) ``` We then need to define a `local_entrypoint` to call our function `generate` remotely. ```python @app.local_entrypoint() def main( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): # We use the "generate" function defined above -- note too that we are calling # .remote() on the function. This tells modal to run the function in our cloud # machine. If you want to run the function locally, you can call .local() instead, # though this will require additional setup. generate.remote(prompt) ``` Here `@app.local_entrypoint()` decorator defines `main` as the function to start from locally when using the Modal CLI. You can save above code to `example.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/modal_example.py)). Let's now see how to run the code on the cloud using the Modal CLI. ## Run on the cloud First install the Modal client from PyPi, if you have not already: ```shell pip install modal ``` You then need to obtain a token from Modal. Run the following command: ```shell modal setup ``` Once that is set you can run inference on the cloud using: ```shell modal run example.py ``` You should see the Modal app initialize, and soon after see the result of the `print` function in your terminal. That's it! ================================================ FILE: docs/examples/earnings-reports.md ================================================ # Extracting financial data from earnings reports A common task in finance is to extract financial data from earnings reports. Earnings reports are infamously poorly formatted, as the SEC does not have requirements for producing machine-readable documents. Earnings reports are often provided as HTML documents, which can be difficult to parse. Investors often use complicated parsing systems or manual review to extract data. Entire companies are built around automating this task. This cookbook is a proof of concept about how we can use LLMs to extract financial data directly into CSV. Comma-separated values are well-structured and can be defined by a regular expression, which Outlines can use to guide the LLM's output. The example is a smaller subset of a full demo found [here](https://github.com/dottxt-ai/demos/tree/main/earnings-reports). The demo contains the full set of pre-processing steps needed to convert raw HTML into a structured CSV file, and tests the results across three company's 10k reports. ## Setup Install outlines and required dependencies: ```shell # Later versions of torch can have difficulty with certain CUDA drivers. # We recommend using 2.4.0 for now, but you may wish to experiment with # other versions. pip install outlines pandas transformers torch==2.4.0 accelerate ``` ## Load the model Choose your language model. We'll use Phi-3 mini, which is small enough to run on reasonably small machines. ```python import outlines import torch import transformers model_name = 'microsoft/Phi-3-mini-4k-instruct' tf_model = transformers.AutoModelForCausalLM.from_pretrained( model_name, device_map="cuda", torch_dtype=torch.bfloat16 ) tf_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = outlines.from_transformers(tf_model, tf_tokenizer) ``` ## Set up the data For brevity, we've attached the markdown version of Nvidia's 10k report. The [full demonstration](https://github.com/dottxt-ai/demos/tree/main/earnings-reports) processes the raw HTML version of the report to these markdown tables. Pages are filtered by whether they seem to contain income statements, and then compacted into the string you see below. ```python income_statement = """ Table of ContentsNVIDIA Corporation and SubsidiariesConsolidated Statements of Income(In millions, except per share data) | | | | | | | | | | | | | | | | | | | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | | | | Year Ended | | | | | | | | | | | | | | | | | | | Jan 28, 2024 | | | | | | Jan 29, 2023 | | | | | | Jan 30, 2022 | | | | Revenue | | | $ | 60,922 | | | | | $ | 26,974 | | | | | $ | 26,914 | | | Cost of revenue | | | 16,621 | | | | | | 11,618 | | | | | | 9,439 | | | | Gross profit | | | 44,301 | | | | | | 15,356 | | | | | | 17,475 | | | | Operating expenses | | | | | | | | | | | | | | | | | | | Research and development | | | 8,675 | | | | | | 7,339 | | | | | | 5,268 | | | | Sales, general and administrative | | | 2,654 | | | | | | 2,440 | | | | | | 2,166 | | | | Acquisition termination cost | | | — | | | | | | 1,353 | | | | | | — | | | | Total operating expenses | | | 11,329 | | | | | | 11,132 | | | | | | 7,434 | | | | Operating income | | | 32,972 | | | | | | 4,224 | | | | | | 10,041 | | | | Interest income | | | 866 | | | | | | 267 | | | | | | 29 | | | | Interest expense | | | (257) | | | | | | (262) | | | | | | (236) | | | | Other, net | | | 237 | | | | | | (48) | | | | | | 107 | | | | Other income (expense), net | | | 846 | | | | | | (43) | | | | | | (100) | | | | Income before income tax | | | 33,818 | | | | | | 4,181 | | | | | | 9,941 | | | | Income tax expense (benefit) | | | 4,058 | | | | | | (187) | | | | | | 189 | | | | Net income | | | $ | 29,760 | | | | | $ | 4,368 | | | | | $ | 9,752 | | | | | | | | | | | | | | | | | | | | | | Net income per share: | | | | | | | | | | | | | | | | | | | Basic | | | $ | 12\.05 | | | | | $ | 1\.76 | | | | | $ | 3\.91 | | | Diluted | | | $ | 11\.93 | | | | | $ | 1\.74 | | | | | $ | 3\.85 | | | | | | | | | | | | | | | | | | | | | | Weighted average shares used in per share computation: | | | | | | | | | | | | | | | | | | | Basic | | | 2,469 | | | | | | 2,487 | | | | | | 2,496 | | | | Diluted | | | 2,494 | | | | | | 2,507 | | | | | | 2,535 | | | """ ``` The markdown tables extracted from the earnings reports can vary widely in row names, column counts, data types, etc. The advantage of LLMs here is that we can define the data we want in terms of the data types, and the LLM will output the data in the desired format. For comparison, here is how the income statement looks in the original HTML: ![Nvidia income statement](./images/nvidia-income.png) ## Define the data we want Outlines is often used for JSON output, but it can also be used for CSV. We know the columns we want to extract, and we know the data types of the columns. Year for example is always a four-digit number, revenue is a number with commas, and so on. We can define a regex pattern for each column type: ```python # Define the column type regex patterns column_types = { # Year is always a four-digit number "year": r"\d{4}", # Revenue, operating income, and net income are always numbers with commas. # This regex permits integers that may begin with a minus sign, and may have # commas separating the thousands, millions, etc. "integer_comma": r"((-?\d+),?\d+|(-?\d+))", # Number is currently not used, but it represents a number with up to two decimal places. "number": r"(-?\d+(?:\.\d{1,2})?)", } ``` Next, let's choose the columns we want to extract. We want - Year, always a four-digit number - Revenue, a number with commas - Operating income, a number with commas - Net income, a number with commas ```python # Define the columns to extract, and their data types. columns_to_extract = { "year": "year", "revenue": "integer_comma", "operating_income": "integer_comma", "net_income": "integer_comma", } ``` You can modify `column_type_regex` to match the data types of the columns you want to extract. Adding a new financial metric to extract is as simple as adding a new key/value pair to `columns_to_extract`: ```python columns_to_extract["diluted_earnings_per_share"] = "number" ``` Additional columns are not well tested for accuracy, so use with caution. ## Create the regex describing the data we want ```python # Create the header line. This is the requested column names # separated by commas, i.e. "year,revenue,..." header = ",".join(columns_to_extract.keys()) # Create the data capture patterns. These are the regex patterns # that will be used to capture the data in each column data_patterns = [column_types[dtype] for dtype in columns_to_extract.values()] data_line = ",".join(data_patterns) # Our final regex pattern. max_rows = 3 # We expect 3 rows of data, firms usually report 3 years of income statements csv_regex = f"{header}(\n{data_line}){{,{max_rows}}}\n\n" print(csv_regex) ``` which gives us ``` year,revenue,operating_income,net_income,basic_earnings_per_share( \d{4},((-?\d+),?\d+|(-?\d+)),((-?\d+),?\d+|(-?\d+)),((-?\d+),?\d+|(-?\d+)),(-?\d+(?:\.\d{1,2})?)){,3} ``` Pretty hairy, right? Thankfully, we have a simple function to construct this regex for you. The regex defines a header line, followed by a data line that repeats for each row of data we want to extract. Passing the regex to `outlines.Generator` will produce a function that will __always__ produce a CSV string that is consistent with the regex. ## Prompting the model Outlines does not add system or instruction tokens by default, so we need to use `transformers.AutoTokenizer` to add them for whatever model we're using. ```python from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) def add_instruction(prompt): return tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True) print(add_instruction("Howdy")) ``` ``` <|user|> Howdy<|end|> <|assistant|> ``` Our prompt roughly describes the task we want the model to perform, and a few pieces of information it may need to know about income statements. ```python def extract_financial_data_prompt(columns_to_extract, income_statement): user_prompt = f""" Extract annual financial data from this set of pages. Pages are from a 10k filing and were chosen because they may contain a comprehensive income statement. Note that selected pages may be incorrectly extracted, so you should verify that you are extracting from the comprehensive income statement and not some other financial statement. Create a row for each year available in the income statement with the following columns: {', '.join(columns_to_extract.keys())}. Firms typically report the most recent 3 years of data, but this can vary. Each column has types: {', '.join(columns_to_extract.values())}. # Relevant pages: {income_statement} # Key instructions: 1. Look ONLY at the "Consolidated Statements of Income" table 2. For operating income, look for "Income from operations" or "Operating income" 3. For net income, use the TOTAL net income figure, not amounts allocated to specific share classes 4. Use NULL for missing values 5. Operating income must be less than revenue 6. Net income must be less than operating income 7. Ignore segment breakdowns, quarterly data, or per-share amounts # Output format: - CSV format with headers: {','.join(columns_to_extract.keys())} - Use NULL for missing values - If no data are found, do not create a row. - Enter two newline characters to terminate the CSV when no more data are found. # Definitions: - Revenue: Total sales of goods and services. Usually this is at the top of the income statement. - Operating income: Revenue minus operating expenses for the entire company. This is revenue minus costs. Operating income is also called operating profit, EBIT, or income from operations. - Net income: Operating income minus taxes. This is the bottom line of the income statement. """ return add_instruction(user_prompt) ``` ## Running the model Now that we have our prompt and regular expression, we can run the model. Construct our regex extractor function. ```python from outlines.types import Regex csv_extractor = outlines.Generator(model, Regex(csv_regex)) ``` Provide the prompt to the model and run it: ```python csv_data = csv_extractor( extract_financial_data_prompt(columns_to_extract, income_statement), max_new_tokens=1024, ) print(csv_data) ``` ``` year,revenue,operating_income,net_income 2024,60922,32972,29760 2023,26974,4224,4368 2022,26914,10041,9752 ``` Voila! We've extracted the financial data from the income statement, and it's correct upon inspection. You can even load this into a `pandas` DataFrame for further analysis: ```python import pandas as pd from io import StringIO df = pd.read_csv(StringIO(csv_data)) print(df) ``` ``` year revenue operating_income net_income 0 2024 60922 32972 29760 1 2023 26974 4224 4368 2 2022 26914 10041 9752 ``` ================================================ FILE: docs/examples/extract_event_details.md ================================================ This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message. We will extract the title, location, and start date and time from messages like the following: ```plaintext Hello Kitty, my grandmother will be here, I think it's better to postpone our appointment to review math lessons to next Monday at 2pm at the same place, 3 avenue des tanneurs, one hour will be enough see you 😘 ``` Let see how to extract the event details from the message with the MLX library dedicated to Apple Silicon processor (M series). ```python --8<-- "docs/cookbook/extract_event_details.py" ``` The output will be: ```plaintext Today: Saturday 16 November 2024 and it's 10:55 ``` and the extracted event information will be: ```json { "title":"Math Review", "location":"3 avenue des tanneurs", "start":"2024-11-22T14:00:00Z" } ``` To find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator) ================================================ FILE: docs/examples/extract_event_details.py ================================================ from datetime import datetime from mlx_lm import load from pydantic import BaseModel, Field import outlines from outlines import Generator, Template # Load the model model = outlines.from_mlxlm(*load("mlx-community/Hermes-3-Llama-3.1-8B-8bit")) # Define the event schema using Pydantic class Event(BaseModel): title: str = Field(description="title of the event") location: str start: datetime = Field( default=None, description="date of the event if available in iso format" ) # Load the prompt template from a string prompt_template = Template.from_string( """ Today's date and time are {{ now }} Given a user message, extract information of the event like date and time in iso format, location and title. If the given date is relative, think step by step to find the right date. Here is the message: {{ message }} """ ) # Get the current date and time now = datetime.now().strftime("%A %d %B %Y and it's %H:%M") # Sample message message = """Hello Kitty, my grandmother will be here, I think it's better to postpone our appointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough see you 😘 """ # Create the generator generator = Generator(model, Event) # Create the prompt prompt = prompt_template(now=now, message=message) # Extract the event information event = generator(prompt) # Print the current date and time print(f"Today: {now}") # Print the extracted event information print(event) ================================================ FILE: docs/examples/extraction.md ================================================ # Named entity extraction Named Entity Extraction is a fundamental problem in NLP. It involves identifying and categorizing named entities within a document: people, organization, dates, places, etc. It is usually the first step in a more complex NLP worklow. Here we will use the example of a pizza restaurant that receives orders via their website and need to identify the number and types of pizzas that are being ordered. Getting LLMs to output the extracted entities in a structured format can be challenging. In this tutorial we will see how we can use Outlines' JSON-structured generation to extract entities from a document and return them in a valid JSON data structure 100% of the time. As always, we start with initializing the model. We will be using a quantized version of Mistal-7B-v0.1 (we're GPU poor): ```python import transformers import outlines model_name = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda"), transformers.AutoTokenizer.from_pretrained(model_name), ) ``` And we will be using the following prompt template: ```python from outlines import Template take_order = Template.from_string( """You are the owner of a pizza parlor. Customers \ send you orders from which you need to extract: 1. The pizza that is ordered 2. The number of pizzas # EXAMPLE ORDER: I would like one Margherita pizza RESULT: {"pizza": "Margherita", "number": 1} # OUTPUT INSTRUCTIONS Answer in valid JSON. Here are the different objects relevant for the output: Order: pizza (str): name of the pizza number (int): number of pizzas Return a valid JSON of type "Order" # OUTPUT ORDER: {{ order }} RESULT: """ ) ``` We now define our data model using Pydantic: ```python from enum import Enum from pydantic import BaseModel class Pizza(str, Enum): margherita = "Margherita" pepperonni = "Pepperoni" calzone = "Calzone" class Order(BaseModel): pizza: Pizza number: int ``` We can now define our generator and call it on several incoming orders: ```python orders = [ "Hi! I would like to order two pepperonni pizzas and would like them in 30mins.", "Is it possible to get 12 margheritas?" ] prompts = [take_order(order=order) for order in orders] generator = outlines.Generator(model, Order) results = generator(prompts) print(results) # ['{"pizza": "Pepperoni", "number": 2}', # '{"pizza": "Margherita", "number": 12}'] ``` There are several ways you could improve this example: - Clients may order several types of pizzas. - Clients may order drinks as well. - If the pizza place has a delivery service we need to extract the client's address and phone number - Clients may specify the time for which they want the pizza. We could then check against a queuing system and reply to them with the estimated delivery time. How would you change the Pydantic model to account for these use cases? ================================================ FILE: docs/examples/index.md ================================================ # Examples This part of the documentation provides a few cookbooks that you can browse to get acquainted with the library and get some inspiration about what you could do with structured generation. Remember that you can easily change the model that is being used! - [Classification](classification.md): Classify customer requests. - [Named Entity Extraction](extraction.md): Extract information from pizza orders. - [Dating Profiles](dating_profiles.md): Build dating profiles from descriptions using prompt templating and JSON-structured generation. - [Chain Of Density](chain_of_density.md): Summarize documents using chain of density prompting and JSON-structured generation. - [Playing Chess](models_playing_chess.md): Make Phi-3 Mini play chess against itself using regex-structured generation. - [SimToM](simtom.md): Improve LLMs' Theory of Mind capabilities with perspective-taking prompting and JSON-structured generation. - [Q&A with Citations](qa-with-citations.md): Answer questions and provide citations using JSON-structured generation. - [Knowledge Graph Generation](knowledge_graph_extraction.md): Generate a Knowledge Graph from unstructured text using JSON-structured generation. - [Structured Generation Workflow](structured_generation_workflow.md): - [Chain Of Thought (CoT)](chain_of_thought.md): Generate a series of intermediate reasoning steps using regex-structured generation. - [ReAct Agent](react_agent.md): Build an agent with open weights models using regex-structured generation. - [Structured Generation from PDFs](read-pdfs.md): Use Outlines with vision-language models to read PDFs and produce structured output. - [Earnings reports to CSV](earnings-reports.md): Extract data from earnings reports to CSV using regex-structured generation. - [Receipt Digitization](receipt-digitization.md): Extract information from a picture of a receipt using structured generation. - [Extract Events Details](extract_event_details.md): Run Outlines on the cloud: - [BentoML](deploy-using-bentoml.md) - [Cerebrium](deploy-using-cerebrium.md) - [Modal](deploy-using-modal.md) ================================================ FILE: docs/examples/knowledge_graph_extraction.md ================================================ # Knowledge Graph Extraction In this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to extract a knowledge graph from unstructured text. We will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves: ```shell pip install llama-cpp-python ``` To create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub): ```python import llama_cpp import outlines llm = llama_cpp.Llama( "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B" ), n_gpu_layers=-1, flash_attn=True, n_ctx=8192, verbose=False ) model = outlines.from_llamacpp(llm) ``` ??? note "(Optional) Store the model weights in a custom folder" By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/): ```shell wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf ``` We initialize the model: ```python from llama_cpp import Llama llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...) ``` ## Knowledge Graph Extraction We first need to define our Pydantic class for each node and each edge of the knowledge graph: ```python from pydantic import BaseModel, Field class Node(BaseModel): """Node of the Knowledge Graph""" id: int = Field(..., description="Unique identifier of the node") label: str = Field(..., description="Label of the node") property: str = Field(..., description="Property of the node") class Edge(BaseModel): """Edge of the Knowledge Graph""" source: int = Field(..., description="Unique source of the edge") target: int = Field(..., description="Unique target of the edge") label: str = Field(..., description="Label of the edge") property: str = Field(..., description="Property of the edge") ``` We then define the Pydantic class for the knowledge graph and get its JSON schema: ```python from typing import List class KnowledgeGraph(BaseModel): """Generated Knowledge Graph""" nodes: List[Node] = Field(..., description="List of nodes of the knowledge graph") edges: List[Edge] = Field(..., description="List of edges of the knowledge graph") schema = KnowledgeGraph.model_json_schema() ``` We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs): ```python from outlines import Template generate_hermes_prompt = Template.from_string( """ <|im_start|>system You are a world class AI model who answers questions in JSON Here's the json schema you must adhere to: {{ schema }} <|im_end|> <|im_start|>user {{ user_prompt }} <|im_end|> <|im_start|>assistant """ ) ``` For a given user prompt, for example: ```python user_prompt = "Alice loves Bob and she hates Charlie." ``` We can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with the Hermes prompt: ```python from outlines import Generator generator = Generator(model, KnowledgeGraph) prompt = generate_hermes_prompt(schema=schema, user_prompt=user_prompt) response = generator(prompt, max_tokens=1024, temperature=0, seed=42) ``` We obtain the nodes and edges of the knowledge graph: ```python print(response) # {"nodes":[{"id":1,"label":"Alice","property":"loves,hates"}, # {"id":2,"label":"Bob","property":"loved_by"}, # {"id":3,"label":"Charlie","property":"hated_by"}], # "edges":[{"source":1,"target":2,"label":"loves","property":"love"}, # {"source":1,"target":3,"label":"hates","property":"hate"}]} ``` ## (Optional) Visualizing the Knowledge Graph We can use the [Graphviz library](https://graphviz.readthedocs.io/en/stable/) to visualize the generated knowledge graph. For detailed installation instructions, see [here](https://graphviz.readthedocs.io/en/stable/#installation). ```python from graphviz import Digraph dot = Digraph() for node in response["nodes"]: dot.node(str(node["id"]), node["label"], shape='circle', width='1', height='1') for edge in response["edges"]: dot.edge(str(edge["source"]), str(edge["target"]), label=edge["label"]) dot.render('knowledge-graph.gv', view=True) ``` ![Image of the Extracted Knowledge Graph](./images/knowledge-graph-extraction.png) This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende). ================================================ FILE: docs/examples/models_playing_chess.md ================================================ # Large language models playing chess In this example we will make a Phi-3 model play chess against itself. On its own the model easily generates invalid moves, so we will give it a little help. At each step we will generate a regex that only matches valid move, and use it to help the model only generating valid moves. ## The chessboard The game will be played on a standard checkboard. We will use the `chess` [library](https://github.com/niklasf/python-chess) to track the opponents' moves, and check that the moves are valid. ```python %pip install outlines -q %pip install chess -q %pip install transformers accelerate einops -q import chess board = chess.Board("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1") ``` ## The opponents Phi-3 will be playing against itself: ```python import transformers import outlines model_name = "microsoft/Phi-3-mini-4k-instruct" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name), transformers.AutoTokenizer.from_pretrained(model_name), ) ``` ## A little help for the language model To make sure Phi-3 generates valid chess moves we will use Outline's regex-structured generation. We define a function that takes the current state of the board and returns a regex that matches all possible legal moves: ```python import re from outlines.types.dsl import either, String def legal_moves_regex(board): """Build a regex that only matches valid moves.""" legal_moves = list(board.legal_moves) legal_modes_str = [board.san(move) for move in legal_moves] legal_modes_str = [re.sub(r"[+#]", "", move) for move in legal_modes_str] regex_pattern = either(*[String(move) for move in legal_modes_str]) return regex_pattern ``` ## Prompting the language model The prompt corresponds to the current state of the board, so we start with: ```python prompt = "Let's play Chess. Moves: " ``` We update the prompt at each step so it reflects the state of the board after the previous move. ## Let's play ```python board_state = " " turn_number = 0 while not board.is_game_over(): regex_pattern = legal_moves_regex(board) structured = model(prompt + board_state, regex_pattern) move = board.parse_san(structured) if turn_number % 2 == 0 : # It's White's turn board_state += board.san(move) + " " else: board_state += board.san(move) + " " + str(turn_number) + "." turn_number += 1 board.push(move) print(board_state) ``` Interestingly enough, Phi-3 hates capturing. ```pgn e4 e5 1.Nf3 Ne7 3.b4 Nf5 5.Nc3 Ne7 7.Bb5 a6 9.Na4 b6 11.c3 Nec6 13.c4 a5 15.d4 Qg5 17.Nd2 Bb7 19.dxe5 ``` *This example was originally authored by [@903124S](https://x.com/903124S) in [this gist](https://gist.github.com/903124/cfbefa24da95e2316e0d5e8ef8ed360d).* ================================================ FILE: docs/examples/prompt_templates/chain_of_density.txt ================================================ Article: {{ article }} You will generate increasingly concise, entity-dense summaries of the above Article. Repeat the following 2 steps 5 times. Step 1. Identify 1-3 informative Entities ("; " delimited) from the Article which are missing from the previously generated summary. Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities. A Missing Entity is: - Relevant: to the main story. - Specific: descriptive yet concise (5 words or fewer). - Novel: not in the previous summary. - Faithful: present in the Article. - Anywhere: located anywhere in the Article. Guidelines: - The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., "this article discusses") to reach ~80 words. - Make every word count: rewrite the previous summary to improve flow and make space for additional entities. - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses". - The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article. - Missing entities can appear anywhere in the new summary. - Never drop entities from the previous summary. If space cannot be made, add fewer new entities. Remember, use the exact same number of words for each summary. Answer in JSON. The JSON should be a a dictionary with key "summaries" that contains a list (length 5) of dictionaries whose keys are "Missing_Entities" and "Denser_Summary". ================================================ FILE: docs/examples/prompt_templates/classification.txt ================================================ You are an experienced customer success manager. Given a request from a client, you need to determine when the request is urgent using the label "URGENT" or when it can wait a little with the label "STANDARD". # Examples Request: "How are you?" Label: STANDARD Request: "I need this fixed immediately!" Label: URGENT # TASK Request: {{ request }} Label: ================================================ FILE: docs/examples/prompt_templates/react_agent.txt ================================================ <|im_start|>system You are a world class AI model who answers questions in JSON with correct Pydantic schema. Here's the json schema you must adhere to: {{ schema }} Today is {{ today }} You run in a loop of Scratchpad, Thought, Action, Action Input, PAUSE, Observation. At the end of the loop you output a Final Answer. Use Scratchpad to store the information from the Observation useful to answer the question Use Thought to describe your thoughts about the question you have been asked and reflect carefully about the Observation if it exists. Use Action to run one of the actions available to you. Use Action Input to input the arguments of the selected action - then return PAUSE. Observation will be the result of running those actions. Your available actions are: calculate: e.g. calulate: 4**2 / 3 Runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary wikipedia: e.g. wikipedia: Django Returns a summary from searching Wikipedia DO NOT TRY TO GUESS THE ANSWER. Begin! <|im_end|> <|im_start|>user {{ question }} <|im_end|> <|im_start|>assistant ================================================ FILE: docs/examples/prompt_templates/simtom_prospective_taking.txt ================================================ [INST] The following is a sequence of events about some characters, that takes place in multiple locations. Your job is to output only the events that the specified character, {{character}}, knows about. Here are a few rules: 1. A character knows about all events that they do. 2. If a character is in a certain room/location, that character knows about all other events that happens in the room. This includes other characters leaving or exiting the location, the locations of objects in that location, and whether somebody moves an object to another place. 3. If a character leaves a location, and is NOT in that location, they no longer know about any events that happen within that location. However, they can re-enter the location. Story: {{story}} What events does {{character}} know about? Only output the events according to the above rules, do not provide an explanation. [/INST] ================================================ FILE: docs/examples/prompt_templates/simtom_simulation.txt ================================================ [INST] {% for event in events %} {{event}} {% endfor %} You are {{name}}. Based on the above information, answer the following question: {{question}} You must choose one of the above choices, do not say there is not enough information. Answer with a single word, do not output anything else. [/INST] ================================================ FILE: docs/examples/qa-with-citations.md ================================================ # Generate Synthetic Data and Q&A with Citations This tutorial is adapted from the [instructor-ollama notebook](https://github.com/alonsosilvaallende/Hermes-Function-Calling/blob/main/examples/instructor_ollama.ipynb). We start with a simple example to generate synthetic data and then we approach the problem of question answering by providing citations. We will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves: ```shell pip install llama-cpp-python ``` We download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern): ```python import llama_cpp import outlines llm = llama_cpp.Llama( "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B" ), n_gpu_layers=-1, flash_attn=True, n_ctx=8192, verbose=False ) model = outlines.from_llamacpp(llm) ``` ??? note "(Optional) Store the model weights in a custom folder" By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/): ```shell wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf ``` We initialize the model: ```python from llama_cpp import Llama llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...) ``` ## Generate Synthetic Data We first need to define our Pydantic class for a user: ```python from pydantic import BaseModel, Field class UserDetail(BaseModel): id: int = Field(..., description="Unique identifier") # so the model keeps track of the number of users first_name: str last_name: str age: int ``` We then define a Pydantic class for a list of users: ```python from typing import List class Users(BaseModel): users: List[UserDetail] ``` We can use a `outlines.Generator` by passing this Pydantic class we just defined, and call the generator: ```python import json generator = outlines.Generator(model, Users) response = generator("Create 5 fake users", max_tokens=1024, temperature=0, seed=42) response = json.loads(response) print(response['users']) # [{'id': 1, 'first_name': 'John', 'last_name': 'Doe', 'age': 25}, # {'id': 2, 'first_name': 'Jane', 'last_name': 'Doe', 'age': 30}, # {'id': 3, 'first_name': 'Bob', 'last_name': 'Smith', 'age': 40}, # {'id': 4, 'first_name': 'Alice', 'last_name': 'Smith', 'age': 35}, # {'id': 5, 'first_name': 'John', 'last_name': 'Smith', 'age': 20}] ``` ```python for user in response['users']: print(user['first_name']) print(user['last_name']) print(user['age']) print("#####") # John # Doe # 25 # ##### # Jane # Doe # 30 # ##### # Bob # Smith # 40 # ##### # Alice # Smith # 35 # ##### # John # Smith # 20 # ##### ``` ## QA with Citations We first need to define our Pydantic class for QA with citations: ```python from typing import List from pydantic import BaseModel class QuestionAnswer(BaseModel): question: str answer: str citations: List[str] schema = QuestionAnswer.model_json_schema() ``` We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs): ```python from outlines import Template hermes_prompt = Template.from_string( """ <|im_start|>system You are a world class AI model who answers questions in JSON with correct and exact citations extracted from the `Context`. Here's the json schema you must adhere to: {{ schema }} <|im_end|> <|im_start|>user `Context`: {{ context }} `Question`: {{ question }} <|im_end|> <|im_start|>assistant """ ) ``` We can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with Hermes prompt: ```python question = "What did the author do during college?" context = """ My name is Jason Liu, and I grew up in Toronto Canada but I was born in China. I went to an arts high school but in university I studied Computational Mathematics and physics. As part of coop I worked at many companies including Stitchfix, Facebook. I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years. """ generator = outlines.Generator(model, QuestionAnswer) prompt = hermes_prompt(question=question, context=context, schema=schema) response = generator(prompt, max_tokens=1024, temperature=0, seed=42) print(response) # {"question": "What did the author do during college?", "answer": "The author studied Computational Mathematics and physics in university and was also involved in starting the Data Science club, serving as its president for 2 years.", "citations": ["I went to an arts high school but in university I studied Computational Mathematics and physics.", "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years."]} ``` We can do the same for a list of question-context pairs: ```python question1 = "Where was John born?" context1 = """ John Doe is a software engineer who was born in New York, USA. He studied Computer Science at the Massachusetts Institute of Technology. During his studies, he interned at Google and Microsoft. He also founded the Artificial Intelligence club at his university and served as its president for three years. """ question2 = "What did Emily study in university?" context2 = """ Emily Smith is a data scientist from London, England. She attended the University of Cambridge where she studied Statistics and Machine Learning. She interned at IBM and Amazon during her summer breaks. Emily was also the head of the Women in Tech society at her university. """ question3 = "Which companies did Robert intern at?" context3 = """ Robert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert. He studied Information Systems at the University of Melbourne. Robert interned at several cybersecurity firms including NortonLifeLock and McAfee. He was also the leader of the Cybersecurity club at his university. """ question4 = "What club did Alice start at her university?" context4 = """ Alice Williams, a native of Dublin, Ireland, is a successful web developer. She studied Software Engineering at Trinity College Dublin. Alice interned at several tech companies including Shopify and Squarespace. She started the Web Development club at her university and was its president for two years. """ question5 = "What did Michael study in high school?" context5 = """ Michael Brown is a game developer from Tokyo, Japan. He attended a specialized high school where he studied Game Design. He later attended the University of Tokyo where he studied Computer Science. Michael interned at Sony and Nintendo during his university years. He also started the Game Developers club at his university. """ for question, context in [ (question1, context1), (question2, context2), (question3, context3), (question4, context4), (question5, context5), ]: prompt = hermes_prompt(question=question, context=context, schema=schema) generator = outlines.Generator(model, QuestionAnswer) response = generator(prompt, max_tokens=1024, temperature=0, seed=42) response = json.loads(response) print(question) print(response['answer']) print(response['citations']) print("\n\n") # 'Where was John born?' # 'John Doe was born in New York, USA.' # ['John Doe is a software engineer who was born in New York, USA.'] # # # 'What did Emily study in university?' # 'Emily studied Statistics and Machine Learning in university.' # ['She attended the University of Cambridge where she studied Statistics and Machine Learning.'] # # # 'Which companies did Robert intern at?' # 'Robert interned at NortonLifeLock and McAfee.' # ['Robert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert. He interned at several cybersecurity firms including NortonLifeLock and McAfee.'] # # # 'What club did Alice start at her university?' # 'Alice started the Web Development club at her university.' # ['Alice Williams, a native of Dublin, Ireland, is a successful web developer. She started the Web Development club at her university and was its president for two years.'] # # # 'What did Michael study in high school?' # 'Michael studied Game Design in high school.' # ['Michael Brown is a game developer from Tokyo, Japan. He attended a specialized high school where he studied Game Design.'] ``` This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende). ================================================ FILE: docs/examples/react_agent.md ================================================ # ReAct Agent This example shows how to use [outlines](https://dottxt-ai.github.io/outlines/) to build your own agent with open weights local models and structured outputs. It is inspired by the blog post [A simple Python implementation of the ReAct pattern for LLMs](https://til.simonwillison.net/llms/python-react-pattern) by [Simon Willison](https://simonwillison.net/). The ReAct pattern (for Reason+Act) is described in the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629). It's a pattern where you implement additional actions that an LLM can take - searching Wikipedia or running calculations for example - and then teach it how to request the execution of those actions, and then feed their results back into the LLM. Additionally, we give the LLM the possibility of using a scratchpad described in the paper [Show Your Work: Scratchpads for Intermediate Computation with Language Models](https://arxiv.org/abs/2112.00114) which improves the ability of LLMs to perform multi-step computations. We use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves: ```shell pip install llama-cpp-python ``` We download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern): ```python import llama_cpp import outlines llm = llama_cpp.Llama( "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained( "NousResearch/Hermes-2-Pro-Llama-3-8B" ), n_gpu_layers=-1, flash_attn=True, n_ctx=8192, verbose=False ) model = outlines.from_llamacpp(llm) ``` ??? note "(Optional) Store the model weights in a custom folder" By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/): ```shell wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf ``` We initialize the model: ```python from llama_cpp import Llama llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...) ``` ## Build a ReAct agent In this example, we use two tools: - wikipedia: \ - search Wikipedia and returns the snippet of the first result - calculate: \ - evaluate an expression using Python's eval() function ```python import httpx def wikipedia(q): return httpx.get("https://en.wikipedia.org/w/api.php", params={ "action": "query", "list": "search", "srsearch": q, "format": "json" }).json()["query"]["search"][0]["snippet"] def calculate(numexp): return eval(numexp) ``` We define the logic of the agent through a Pydantic class. First, we want the LLM to decide only between the two previously defined tools: ```python from enum import Enum class Action(str, Enum): wikipedia = "wikipedia" calculate = "calculate" ``` Our agent will loop through Thought and Action. We explicitly give the Action Input field so it doesn't forget to add the arguments of the Action. We also add a scratchpad (optional). ```python from pydantic import BaseModel, Field class Reason_and_Act(BaseModel): Scratchpad: str = Field(..., description="Information from the Observation useful to answer the question") Thought: str = Field(..., description="It describes your thoughts about the question you have been asked") Action: Action Action_Input: str = Field(..., description="The arguments of the Action.") ``` Our agent will reach a Final Answer. We also add a scratchpad (optional). ```python class Final_Answer(BaseModel): Scratchpad: str = Field(..., description="Information from the Observation useful to answer the question") Final_Answer: str = Field(..., description="Answer to the question grounded on the Observation") ``` Our agent will decide when it has reached a Final Answer and therefore to stop the loop of Thought and Action. ```python from typing import Union class Decision(BaseModel): Decision: Union[Reason_and_Act, Final_Answer] json_schema = Decision.model_json_schema() ``` We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs) and explain the agent logic. We can load a template from a file for that: ```python from outlines import Template hermes_prompt = Template.from_file("prompt_templates/react_agent.txt") ``` We define a ChatBot class ```python class ChatBot: def __init__(self, prompt=""): self.prompt = prompt def __call__(self, user_prompt): self.prompt += user_prompt result = self.execute() return result def execute(self): generator = outlines.Generator(model, Decision) result = generator(self.prompt, max_tokens=1024, temperature=0, seed=42) return result ``` We define a query function: ```python import json def query(question, max_turns=5): i = 0 next_prompt = ( "\n<|im_start|>user\n" + question + "<|im_end|>" "\n<|im_start|>assistant\n" ) previous_actions = [] while i < max_turns: i += 1 prompt = generate_hermes_prompt( question=question, schema=Decision.model_json_schema(), today=datetime.datetime.today().strftime('%Y-%m-%d') ) bot = ChatBot(prompt=prompt) result = bot(next_prompt) json_result = json.loads(result)['Decision'] if "Final_Answer" not in list(json_result.keys()): scratchpad = json_result['Scratchpad'] if i == 0 else "" thought = json_result['Thought'] action = json_result['Action'] action_input = json_result['Action_Input'] print(f"\x1b[34m Scratchpad: {scratchpad} \x1b[0m") print(f"\x1b[34m Thought: {thought} \x1b[0m") print(f"\x1b[36m -- running {action}: {str(action_input)}\x1b[0m") if action + ": " + str(action_input) in previous_actions: observation = "You already run that action. **TRY A DIFFERENT ACTION INPUT.**" else: if action=="calculate": try: observation = eval(str(action_input)) except Exception as e: observation = f"{e}" elif action=="wikipedia": try: observation = wikipedia(str(action_input)) except Exception as e: observation = f"{e}" print() print(f"\x1b[33m Observation: {observation} \x1b[0m") print() previous_actions.append(action + ": " + str(action_input)) next_prompt += ( "\nScratchpad: " + scratchpad + "\nThought: " + thought + "\nAction: " + action + "\nAction Input: " + action_input + "\nObservation: " + str(observation) ) else: scratchpad = json_result["Scratchpad"] final_answer = json_result["Final_Answer"] print(f"\x1b[34m Scratchpad: {scratchpad} \x1b[0m") print(f"\x1b[34m Final Answer: {final_answer} \x1b[0m") return final_answer print(f"\nFinal Answer: I am sorry, but I am unable to answer your question. Please provide more information or a different question.") return "No answer found" ``` We can now test our ReAct agent: ```python print(query("What's 2 to the power of 10?")) # Scratchpad: # Thought: I need to perform a mathematical calculation to find the result of 2 to the power of 10. # -- running calculate: 2**10 # # Observation: 1024 # # Scratchpad: 2 to the power of 10 is 1024. # Final Answer: 2 to the power of 10 is 1024. # 2 to the power of 10 is 1024. ``` ```python print(query("What does England share borders with?")) # Scratchpad: # Thought: To answer this question, I will use the 'wikipedia' action to gather information about England's geographical location and its borders. # -- running wikipedia: England borders # # Observation: Anglo-Scottish border (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal border of the United Kingdom separating Scotland and England which runs for # # Scratchpad: Anglo-Scottish border (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal border of the United Kingdom separating Scotland and England which runs for # Final Answer: England shares a border with Scotland. # England shares a border with Scotland. ``` As mentioned in Simon's blog post, this is not a very robust implementation at all and there's a ton of room for improvement. But it is lovely how simple it is with a few lines of Python to make these extra capabilities available to the LLM. And now you can run it locally with an open weights LLM. This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende). ================================================ FILE: docs/examples/read-pdfs.md ================================================ # PDF to structured output with vision language models A common task with language models is to ask language models questions about a PDF file. Typically, the output is unstructured text, i.e. "talking" to your PDF. In some cases, you may wish to extract structured information from the PDF, like tables, lists, citations, etc. PDFs are difficult to machine read. However, you can simply convert the PDF to images, and then use a vision language model to extract structured information from the images. This cookbook demonstrates how to 1. Convert a PDF to a list of images 2. Use a vision language model to extract structured information from the images ## Dependencies You'll need to install these dependencies: ```shell pip install outlines pillow transformers torch==2.4.0 pdf2image # Optional, but makes the output look nicer pip install rich ``` ## Import the necessary libraries ```python from PIL import Image import outlines import torch from transformers import AutoProcessor from pydantic import BaseModel from typing import List, Optional from pdf2image import convert_from_path import os from rich import print import requests ``` ## Choose a model We've tested this example with [Pixtral 12b](https://huggingface.co/mistral-community/pixtral-12b) and [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct). To use Pixtral: ```python from transformers import LlavaForConditionalGeneration, LlavaProcessor model_name="mistral-community/pixtral-12b" model_class=LlavaForConditionalGeneration processor_class = LlavaProcessor ``` To use Qwen-2-VL: ```python from transformers import Qwen2VLForConditionalGeneration, AutoProcessor model_name = "Qwen/Qwen2-VL-7B-Instruct" model_class = Qwen2VLForConditionalGeneration processor_class = AutoProcessor ``` You can load your model into memory with: ```python # This loads the model into memory. On your first run, # it will have to download the model, which might take a while. model_kwargs={"device_map": "auto", "torch_dtype": torch.bfloat16} processor_kwargs={"device_map": "cpu"} tf_model = model_class.from_pretrained(model_name, **model_kwargs) tf_processor = processor_class.from_pretrained(model_name, **processor_kwargs) model = outlines.from_transformers(tf_model, tf_processor) ``` ## Convert the PDF to images We'll use the `pdf2image` library to convert each page of the PDF to an image. `convert_pdf_to_images` is a convenience function that converts each page of the PDF to an image, and optionally saves the images to disk when `output_dir` is provided. Note: the `dpi` argument is important. It controls the resolution of the images. High DPI images are higher quality and may yield better results, but they are also larger, slower to process, and require more memory. ```python from pdf2image import convert_from_path from PIL import Image import os from typing import List, Optional def convert_pdf_to_images( pdf_path: str, output_dir: Optional[str] = None, dpi: int = 120, fmt: str = 'PNG' ) -> List[Image.Image]: """ Convert a PDF file to a list of PIL Image objects. Args: pdf_path: Path to the PDF file output_dir: Optional directory to save the images dpi: Resolution for the conversion. High DPI is high quality, but also slow and memory intensive. fmt: Output format (PNG recommended for quality) Returns: List of PIL Image objects """ # Convert PDF to list of images images = convert_from_path( pdf_path, dpi=dpi, fmt=fmt ) # Optionally save images if output_dir: os.makedirs(output_dir, exist_ok=True) for i, image in enumerate(images): image.save(os.path.join(output_dir, f'page_{i+1}.{fmt.lower()}')) return images ``` We're going to use the [Louf & Willard paper](https://arxiv.org/pdf/2307.09702) that described the method that Outlines uses for structured generation. To download the PDF, run: ```python # Download the PDF file pdf_url = "https://arxiv.org/pdf/2307.09702" response = requests.get(pdf_url) # Save the PDF locally with open("louf-willard.pdf", "wb") as f: f.write(response.content) ``` Now, we can convert the PDF to a list of images: ```python # Load the pdf images = convert_pdf_to_images( "louf-willard.pdf", dpi=120, output_dir="output_images" ) ``` ## Extract structured information from the images The structured output you can extract is exactly the same as everywhere else in Outlines -- you can use regular expressions, JSON schemas, selecting from a list of options, etc. ### Extracting data into JSON Suppose you wished to go through each page of the PDF, and extract the page description, key takeaways, and page number. You can do this by defining a JSON schema, and then using `outlines.Generator` to extract the data. First, define the structure you want to extract: ```python class PageSummary(BaseModel): description: str key_takeaways: List[str] page_number: int ``` Second, we need to set up the prompt. Adding special tokens can be tricky, so we use the transformers processor to apply the special tokens for us. To do so, we specify a list of messages, where each message is a dictionary with a `role` and `content` key. Images are denoted with `type: "image"`, and text is denoted with `type: "text"`. ```python messages = [ { "role": "user", "content": [ # The text you're passing to the model -- # this is where you do your standard prompting. {"type": "text", "text": f""" Describe the page in a way that is easy for a PhD student to understand. Return the information in the following JSON schema: {PageSummary.model_json_schema()} Here is the page: """ }, # This a placeholder, the actual image is passed in when # we call the generator function down below. {"type": "image", "image": ""}, ], } ] # Convert the messages to the final prompt prompt = tf_processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) ``` Now we iterate through each image, and extract the structured information: ```python # Page summarizer function page_summary_generator = outlines.Generator(model, PageSummary) for image in images: result = page_summary_generator({"text": prompt, "images": image}) print(result) ``` ### Regular expressions to extract the arxiv paper identifier The [arXiv paper identifier](https://info.arxiv.org/help/arxiv_identifier.html) is a unique identifier for each paper. These identifiers have the format `arXiv:YYMM.NNNNN` (five end digits) or `arXiv:YYMM.NNNN` (four end digits). arXiv identifiers are typically watermarked on papers uploaded to arXiv. arXiv identifiers are optionally followed by a version number, i.e. `arXiv:YYMM.NNNNNvX`. We can use a regular expression to define this patter: ```python from outlines.types import Regex paper_regex = Regex(r'arXiv:\d{2}[01]\d\.\d{4,5}(v\d)?') ``` We can build an extractor function from the regex: ```python id_extractor = outlines.Generator(model, paper_regex) ``` Now, we can extract the arxiv paper identifier from the first image: ```python arxiv_instruction = tf_processor.apply_chat_template( [ { "role": "user", "content": [ {"type": "text", "text": f""" Extract the arxiv paper identifier from the page. Here is the page: """}, {"type": "image", "image": ""}, ], } ], tokenize=False, add_generation_prompt=True ) # Extract the arxiv paper identifier paper_id = id_extractor({"text": arxiv_instruction, "images": images[0]}) ``` As of the time of this writing, the arxiv paper identifier is ``` arXiv:2307.09702v4 ``` Your version number may be different, but the part before `vX` should match. ### Categorize the paper into one of several categories `outlines.Generator` also allows the model to select one of several options by providing a Literal type hint with the categories. Suppose we wanted to categorize the paper into being about "language models", "cell biology", or "other". We would then define the output type as `Literal["llms", "cell biology", "other"]`. Let's define a few categories we might be interested in: ```python categories = [ "llms", "cell biology", "other" ] ``` Now we can construct the prompt: ```python categorization_instruction = tf_processor.apply_chat_template( [ { "role": "user", "content": [ {"type": "text", "text": f""" Please choose one of the following categories that best describes the paper. {categories} Here is the paper: """}, {"type": "image", "image": ""}, ], } ], tokenize=False, add_generation_prompt=True ) ``` Now we can show the model the first page and extract the category: ```python from typing import Literal # Build the choice extractor categorizer = outlines.Generator(model, Literal["llms", "cell biology", "other"]) # Categorize the paper category = categorizer({"text": categorization_instruction, "images": images[0]}) print(category) ``` Which should return: ``` llms ``` ## Additional notes You can provide multiple images to the model by 1. Adding additional image messages 2. Providing a list of images to the generator For example, to have two images, you can do: ```python two_image_prompt = tf_processor.apply_chat_template( [ { "role": "user", "content": [ {"type": "text", "text": "are both of these images of hot dogs?"}, # Tell the model there are two images {"type": "image", "image": ""}, {"type": "image", "image": ""}, ], } ], tokenize=False, add_generation_prompt=True ) # Pass two images to the model generator = outlines.Generator(model, Literal["hot dog", "not hot dog"]) result = generator({"text": two_image_prompt, "images": [images[0], images[1]]}) print(result) ``` Using the first to pages of the paper (they are not images of hot dogs), we should get ``` not hot dog ``` ================================================ FILE: docs/examples/receipt-digitization.md ================================================ # Receipt Data Extraction with VLMs ## Setup You'll need to install the dependencies: ```shell pip install outlines torch==2.4.0 transformers accelerate pillow rich ``` ## Import libraries Load all the necessary libraries: ```python # LLM stuff import outlines import torch from transformers import AutoProcessor from pydantic import BaseModel, Field from typing import Literal, Optional, List # Image stuff from PIL import Image import requests # Rich for pretty printing from rich import print ``` ## Choose a model This example has been tested with `mistral-community/pixtral-12b` ([HF link](https://huggingface.co/mistral-community/pixtral-12b)) and `Qwen/Qwen2-VL-7B-Instruct` ([HF link](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)). We recommend Qwen-2-VL as we have found it to be more accurate than Pixtral. If you want to use Qwen-2-VL, you can do the following: ```python # To use Qwen-2-VL: from transformers import Qwen2VLForConditionalGeneration, AutoProcessor model_name = "Qwen/Qwen2-VL-7B-Instruct" model_class = Qwen2VLForConditionalGeneration processor_class = AutoProcessor ``` If you want to use Pixtral, you can do the following: ```python # To use Pixtral: from transformers import LlavaForConditionalGeneration, LlavaProcessor model_name="mistral-community/pixtral-12b" model_class=LlavaForConditionalGeneration processor_class = LlavaProcessor ``` ## Load the model Load the model into memory: ```python model_kwargs={"device_map": "auto", "torch_dtype": torch.bfloat16} processor_kwargs={"device_map": "cuda"} tf_model = model_class.from_pretrained(model_name, **model_kwargs) tf_processor = processor_class.from_pretrained(model_name, **processor_kwargs) model = outlines.from_transformers(tf_model, tf_processor) ``` ## Image processing Images can be quite large. In GPU-poor environments, you may need to resize the image to a smaller size. Here's a helper function to do that: ```python def load_and_resize_image(image_path, max_size=1024): """ Load and resize an image while maintaining aspect ratio Args: image_path: Path to the image file max_size: Maximum dimension (width or height) of the output image Returns: PIL Image: Resized image """ image = Image.open(image_path) # Get current dimensions width, height = image.size # Calculate scaling factor scale = min(max_size / width, max_size / height) # Only resize if image is larger than max_size if scale < 1: new_width = int(width * scale) new_height = int(height * scale) image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) return image ``` You can change the resolution of the image by changing the `max_size` argument. Small max sizes will make the image more blurry, but processing will be faster and require less memory. ## Load an image Load an image and resize it. We've provided a sample image of a Trader Joe's receipt, but you can use any image you'd like. Here's what the image looks like: ![Trader Joe's receipt](./images/trader-joes-receipt.jpg) ```python # Path to the image image_path = "https://raw.githubusercontent.com/dottxt-ai/outlines/refs/heads/main/docs/cookbook/images/trader-joes-receipt.jpg" # Download the image response = requests.get(image_path) with open("receipt.png", "wb") as f: f.write(response.content) # Load + resize the image image = load_and_resize_image("receipt.png") ``` ## Define the output structure We'll define a Pydantic model to describe the data we want to extract from the image. In our case, we want to extract the following information: - The store name - The store address - The store number - A list of items, including the name, quantity, price per unit, and total price - The tax - The total - The date - The payment method Most fields are optional, as not all receipts contain all information. ```python class Item(BaseModel): name: str quantity: Optional[int] price_per_unit: Optional[float] total_price: Optional[float] class ReceiptSummary(BaseModel): store_name: str store_address: str store_number: Optional[int] items: List[Item] tax: Optional[float] total: Optional[float] # Date is in the format YYYY-MM-DD. We can apply a regex pattern to ensure it's formatted correctly. date: Optional[str] = Field(pattern=r'\d{4}-\d{2}-\d{2}', description="Date in the format YYYY-MM-DD") payment_method: Literal["cash", "credit", "debit", "check", "other"] ``` ## Prepare the prompt We'll use the `tf_processor` to convert the image and the text prompt into a format that the model can understand. Practically, this is the code that adds user, system, assistant, and image tokens to the prompt. ```python # Set up the content you want to send to the model messages = [ { "role": "user", "content": [ { # The image is provided as a PIL Image object "type": "image", "image": image, }, { "type": "text", "text": f"""You are an expert at extracting information from receipts. Please extract the information from the receipt. Be as detailed as possible -- missing or misreporting information is a crime. Return the information in the following JSON schema: {ReceiptSummary.model_json_schema()} """}, ], } ] # Convert the messages to the final prompt prompt = tf_processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) ``` If you are curious, the final prompt that is sent to the model looks (roughly) like this: ``` <|im_start|>system You are a helpful assistant.<|im_end|> <|im_start|>user <|vision_start|><|image_pad|><|vision_end|> You are an expert at extracting information from receipts. Please extract the information from the receipt. Be as detailed as possible -- missing or misreporting information is a crime. Return the information in the following JSON schema: <|im_end|> <|im_start|>assistant ``` ## Run the model ```python # Prepare a function to process receipts receipt_summary_generator = outlines.Generator(model, ReceiptSummary) # Generate the receipt summary result = receipt_summary_generator( {"text": prompt, "images": image}, max_new_tokens=1024 ) print(result) ``` ## Output The output should look like this: ``` { "store_name": "Trader Joe's", "store_address": "401 Bay Street, San Francisco, CA 94133", "store_number": 0, "items": [ {"name": "BANANA EACH", "quantity": 7, "price_per_unit": 0.23, "total_price": 1.61}, {"name": "BAREBELLS CHOCOLATE DOUG", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29}, {"name": "BAREBELLS CREAMY CRISP", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29}, {"name": "BAREBELLS CHOCOLATE DOUG", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29}, {"name": "BAREBELLS CARAMEL CASHEW", "quantity": 2, "price_per_unit": 2.29, "total_price": 4.58}, {"name": "BAREBELLS CREAMY CRISP", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29}, {"name": "SPINDRIFT ORANGE MANGO 8", "quantity": 1, "price_per_unit": 7.49, "total_price": 7.49}, {"name": "Bottle Deposit", "quantity": 8, "price_per_unit": 0.05, "total_price": 0.4}, {"name": "MILK ORGANIC GALLON WHOL", "quantity": 1,"price_per_unit": 6.79,"total_price": 6.79}, {"name": "CLASSIC GREEK SALAD", "quantity": 1, "price_per_unit": 3.49, "total_price": 3.49}, {"name": "COBB SALAD", "quantity": 1, "price_per_unit": 5.99, "total_price": 5.99}, {"name": "PEPPER BELL RED XL EACH", "quantity": 1, "price_per_unit": 1.29, "total_price": 1.29}, {"name": "BAG FEE.", "quantity": 1, "price_per_unit": 0.25, "total_price": 0.25}, {"name": "BAG FEE.", "quantity": 1, "price_per_unit": 0.25, "total_price": 0.25}, ], "tax": 0.68, "total": 41.98, "date": "2023-11-04", "payment_method": "debit" } ``` Voila! You've successfully extracted information from a receipt using an LLM. ## Bonus: roasting the user for their receipt You can roast the user for their receipt by adding a `roast` field to the end of the `ReceiptSummary` model. ```python class ReceiptSummary(BaseModel): ... roast: str ``` which gives you a result like ``` { ... "roast": "You must be a fan of Trader Joe's because you bought enough items to fill a small grocery bag and still had to pay for a bag fee. Maybe you should start using reusable bags to save some money and the environment." } ``` Qwen is not particularly funny, but worth a shot. ================================================ FILE: docs/examples/simtom.md ================================================ # Build perspective-taking agents with SimToM Prompting strategies like Chain-of-Thought (CoT) can improve LLMs' reasoning capabilities. However, they underwhelm in tasks that require keeping track of inconsistent world states. [SimToM](https://arxiv.org/abs/2311.10227) proposes a simple, two-stage prompting framework for LLMs inspired by Simulation Theory. The authors showed that this approach outperforms zero-shot prompting and CoT on ToMI and BigToM, two benchmarks with Theory of Mind questions. In this example, we will implement SimToM with a few lines of code using Outlines' prompt templating and structured generation capabilities. ## How SimToM works SimToM calls an LLM with two consecutive prompts: 1. **Perspective-taking**: The first prompt receives a `story` and a `character`. The goal is to understand the situation based on the character's point of view and filter out the rest of the story. 2. **Question-Answering**: The second prompt receives the character's point of view from the previous step and tasks the LLM to answer a question using that context. ![Figure 2 in the paper](./images/simtom.png) ## Outlines implementation To implement SimToM with Outlines, we will need to: 1. Write the prompts with [prompt templates](https://dottxt-ai.github.io/outlines/latest/reference/prompting/). 2. Define the JSON object each prompt will return using Pydantic. 3. Generate responses with a Mistral model using the [transformers integration](https://dottxt-ai.github.io/outlines/latest/reference/models/transformers/). Let's dive into it! ### Using Prompt Templates The authors have shared their code, prompts and data in [this GitHub repository](https://github.com/shawnsihyunlee/simulatedtom). Below, we define in Outlines the prompts they used for the ToMI dataset: ```python from outlines import Template perspective_taking = Template.from_file("prompt_templates/simtom_prospective_taking.txt") simulation = Template.from_file("prompt_templates/simtom_simulation.txt") ``` ### JSON Structured Generation Outlines guarantees that the LLM will return a valid JSON object, which we can specify as a Pydantic model. We will need two Pydantic models for SimToM, one for each prompt: ```python from pydantic import BaseModel, Field from typing import List class PerspectiveTaking(BaseModel): """This is for the first prompt.""" character: str = Field(description="The character we extract the events for.") events: List[str] = Field(description="All events that the character knows about.") class Simulation(BaseModel): """This is for the second prompt.""" answer: str ``` ### Calling an LLM Let's try SimToM with an example from the ToMI dataset: ```python story = """ 1 Aria entered the front_yard. 2 Aiden entered the front_yard. 3 The grapefruit is in the green_bucket. 4 Aria moved the grapefruit to the blue_container. 5 Aiden exited the front_yard. 6 Noah entered the playroom. """ question = "7 Where was the grapefruit at the beginning?" character = "Aria" ``` We load `Mistral-7B-Instruct-v0.3`, create the prompt using the template we defined earlier, and generate a structured response. As a reminder, the goal of the first call is to get all the events a character, `Aria`, knows about. ```python import transformers import outlines # Load an LLM from Hugging Face MODEL_NAME = "mistral-community/Mistral-7B-Instruct-v0.3" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME), transformers.AutoTokenizer.from_pretrained(MODEL_NAME), ) perspective_prompt = perspective_taking(story=story, character=character) # Call Mistral 7B with the first prompt generator = outlines.Generator(model, PerspectiveTaking) perspective = generator(perspective_prompt, max_new_tokens=1024) print(perspective) # {'character': 'Aria', 'events': ['1 Aria entered the front_yard.', '3 The grapefruit is in the green_bucket.', '4 Aria moved the grapefruit to the blue_container.']} ``` Not bad! We will now generate the second prompt with those events. ```python import json sim_prompt = simulation(events=json.loads(perspective)["events"], name=character, question=question) # Call Mistral 7B with the second prompt generator = outlines.Generator(model, Simulation) result = generator(sim_prompt, max_new_tokens=1024) print(result) # {'answer': 'green_bucket'} ``` And this is it! SimToM could be useful in agentic workflows, where agents must act based on what they know, not all available information. One caveat of SimToM is that the perspective-taking step may remove important information, leading to wrong results. As the authors note in their paper, it can feature as a simple and effective baseline for evaluating LLMs on Theory of Mind reasoning tasks. ================================================ FILE: docs/examples/structured_generation_workflow.md ================================================ # Structured Generation Workflow: Generating Synthetic Phone Numbers This is a condensed version of [Coding for Structured Generation with LLMs](https://blog.dottxt.co/coding-for-structured-generation.html). For this example we're going to be building an LLM program to generate **synthetic data** in the form of realistic looking phone numbers for Washington State. Using an LLM for this task *is a bit overkill* since we could just as easily accomplish this with a tool like [Faker](https://fakerjs.dev/), but this example still serves as a useful way to demonstrate a workflow for using structured generation. ## Unstructured approach Before diving into how to use structure generation for this task let's start with an unstructured example. We begin by loading our model: ```python import outlines import transformers model_name = 'microsoft/Phi-3-mini-4k-instruct' model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name), transformers.AutoTokenizer.from_pretrained(model_name) ) ``` Next we need a prompt for this model. Since we're focusing on structured generation, we won't be engaging in any form of "prompt hacking" and will be leaving this prompt untouched for the rest of this example. ```python prompt_phone = """ Please generate a realistic phone number for Washington State in the following format (555) 555-5555 """ ``` With our prompt ready we can now generate 10 example phone numbers ```python phone_generator_unstruct = outlines.Generator(model) for _ in range(3): print(phone_generator_unstruct(prompt_phone, max_new_tokens=12)) ``` > I'd be happy to help you generate a realistic phone\ I cannot generate a real phone number as I'm just\ I'm an AI and don't have the ability\ Sure! Here is a randomly generated phone number in the format\ Here's a phone number that fits the format for a\ In Washington State, phone numbers typically have a three-dig\ Here are a few examples of phone numbers that could be considered\ I'd be happy to help generate a realistic phone number\ I'd be happy to help you generate a random phone\ Based on the format you provided, a realistic phone number for\ As we can see, none of these outputs are even phone numbers! Let's see if we can improve this using structured generation. ## The Structured Generation Workflow In order to solve this problem we're going to introduce a *Structured Generation Workflow* outlined in this image: !["Visual of Structured Generation Workflow"](./images/coding_structure_diagram.png) Let's step through this: ### Real example We start with a real example phone number, in this case for the Seattle Public Library, that we can use to verify the structure we are creating. ```python phone_number = "(206) 386-4636" ``` For a simple example like this, we'll just be using a single phone number, for more complex examples it can be helpful to have more examples. ### Draft Structure The next step in the process is for use to define a simple regex that we feel correctly models our real data. ```python from outlines.types import Regex phone_regex_1 = Regex(r'\([0-9]{3}\) [0-9]{3}-[0-9]{4}') ``` Next we need to validate this regex against our real data. ### Validate by matching examples Whenever writing non-trivial code with structured generation it is *essential* that you first validate the code against your real data example(s). We'll start with a simple method of validation: just checking that our regex matches the data. ``` import re re.match(phone_regex_1.pattern, phone_number) # ``` Now that we have a match, we can move on to generating structured output! ### Generate Structure We're ready to see if structured generation can make an improvement over our initial unstructured approach: ```python phone_generator_v1 = outlines.Generator(model, phone_regex_1) for _ in range(3): print(phone_generator_v1(prompt_phone)) ``` > (206) 555-1234\ (206) 555-1234\ (206) 555-1234\ (206) 555-1234\ (206) 555-1234\ (206) 555-1234\ (206) 123-4567\ (206) 555-1234\ (206) 555-1234\ (206) 555-1234 At least we have phone numbers! But I think we can do better! ### Inspect output In this case the model *did* create phone numbers and, impressively, got the area code correct. So using structured generation did improve things. However these numbers are pretty boring. Let's improve that structure! ## Iteration We've walked through the loop once, so we can go quickly now through each iteration. We start by improving our structure: ```python phone_regex_2 = Regex(r'\([0-9]{3}\) [2-46-9]{3}-[02-9]{4}') ``` Before rushing to another round of generation, let's validate this new regex. We'll add just a bit more sophistication over our last check: ```python re.match(phone_regex_2.pattern, phone_number)[0] == phone_number # True ``` Now that we've validated, let's generate with this new regex! ```python phone_generator_v2 = outlines.Generator(model, phone_regex_2) for _ in range(3): print(phone_generator_v2(prompt_phone)) ``` > (206) 867-5309\ (206) 666-7777\ (206) 444-3333\ (206) 444-3333\ (206) 943-2222\ (206) 323-6789\ (206) 444-3333\ (206) 867-5309\ (206) 466-2255\ (206) 222-3333 Better, but I don't like those repeated sequences. Like good software developers, let's iterate again! ## Reiteration - with debugging Here's a fancier regex that should give us more interesting results: ```python phone_regex_3_error = r'\([0-9]{3}\) [2-4][7-9][4-6]-[3-6][2-8][1-4]' ``` This looks good to me, but there's a subtle bug, that's why we *always* need to validate our structure against real data. This time we'll make our validator do a bit more work to verify the correct string is matched: ```python if not re.match(phone_regex_3_error, phone_number): print("Regex fails match") else: matched_string = re.match(phone_regex_3_error, phone_number)[0] if matched_string == phone_number: print("Successful match") else: print(f"Error {matched_string} != {phone_number}") ``` This prints out: > Error (206) 386-463 != (206) 386-4636 Ah! We were missing the last digit, let's fix that and regenerate: ```python phone_regex_3_fixed = Regex(r'\([0-9]{3}\) [2-4][7-9][4-6]-[3-6][2-8][1-4][6-9]') phone_generator_v3 = outlines.Generator(model, phone_regex_3_fixed) for _ in range(3): print(phone_generator_v3(prompt_phone)) ``` >(206) 494-3216\ (206) 374-6218\ (206) 494-3337\ (206) 476-3216\ (206) 484-3548\ (206) 495-3218\ (206) 494-5517\ (206) 375-4636\ (206) 384-6216\ (206) 385-6218 Much better! Now you've seen a quick example of the structured generation workflow that can be used at the basis for building and iteration on much larger structured generation tasks! ================================================ FILE: docs/features/advanced/backends.md ================================================ --- title: Structured Generation Backends --- # Structured Generation Backends Outlines relies on a structured generation backend to control text generation for steerable models such thah they conform to the output type provided. One of those backends is of course `outlines-core`, but you also have access to two other libraries that fulfill the same purpose: `llguidance` and `xgrammar`. ## Overview To select the backend to use for your generation, provide a value for the `backend` argument when calling a model or a generator. For instance: ```python from typing import Literal import outlines from transformers import AutoModelForCausalLM, AutoTokenizer output_type = Literal["Paris", "London", "Rome", "Berlin"] model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("What is the capital of France?", output_type, backend="llguidance") print(result) # 'Paris' generator = outlines.Generaor(model, output_type) result = generator("What is the capital of France?", backend="xgrammar") print(result) # 'Paris' ``` If you do not provide a value for the `backend` argument, the default value will be used. The default value depends on the type of output type: - JSON schema: `outlines_core` - Regex: `outlines_core` - Context-free grammar: `llguidance` ## Features matrix As mentioned previously, selecting the structured generation backend is only applicable to steerable models, so `Transformers`, `LlmaCpp` and `MLXLM`. Additionaly, some backends do not support some models within those or some output types. | | outlines_core | llguidance | xgrammar | |---|---|---|---| | **Models** | | | | | Transformers | ✅ | ✅ | ✅ | | LlamaCpp | ✅ | ✅ | ❌ | | MLXLM | ✅ | ✅ | ✅ | | **Output Types** | | | | | JSON Schema | ✅ | ✅ | ✅ | | Regex | ✅ | ✅ | ✅ | | Grammar | ❌ | ✅ | ✅ | ================================================ FILE: docs/features/advanced/logits_processors.md ================================================ --- title: Logits Processors --- # Logits Processors Logits processors are objects that control text generation by modifying the probability distribution of possible next tokens. They do this by adjusting the logits (raw model outputs) at each generation step, effectively biasing the model's token selection. Processors can be used to: 1. Generate structured output (e.g., JSON that follows a specific schema) 2. Prevent the model from generating specific words or tokens 3. Implement custom token sampling strategies ## Overview Outlines uses logits processors with steerable models — models that run locally and allow fine-grained control over the generation process. When using such models in Outlines, the output type provided is turned into a logits processor that is then passed to the inference engine. There are three models that support logits processors: - LlamaCpp - MLXLM - Transformers Instead of providing an output type that will be turned into a logits processor, it is possible to directly provide a logits processor. To do so, you must create a `Generator` instance using the `processor` keyword argument. You cannot directly call the model with a logits processor. For instance: ```python import transformers from outlines import Generator, from_transformers from outlines.processors import RegexLogitsProcessor # Create a model model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B"), transformers.AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B") ) # Create a regex logits processor that only returns hex unicode notations logits_processor = RegexLogitsProcessor(r"U\+[0-9A-Fa-f]{4,6}", model.tokenizer, model.tensor_library_name) # Create a generator with the logits processor and use it to generate text generator = Generator(model, processor=logits_processor) response = generator("What's the unicode for the hugging face emoji") print(response) # U+1F917 ``` ## Creating Custom Logits Processors You can create your own logits processor by subclassing the `OutlinesLogitsProcessor` class. This allows you to implement specific logic to modify logits as needed. Your logits processor needs to implement the `process_logits` method to modify the logits. `process_logits` accepts: - `input_ids`: the ids of the tokens of the existing sequences in a 2D tensor. - `logits`: the logits for the current generation step in a 2D tensor. In the example below, we create a custom logits processor to force the model to provide a response using only binary representation (so only the tokens for 0 and 1 are allowed): ```python from outlines.processors.base_logits_processor import OutlinesLogitsProcessor, TensorType from outlines import Generator, from_transformers import transformers ALLOWED_TOKENS = [15, 16] # token IDs corresponding to '0' and '1' in the model's vocabulary # Subclass OutlinesLogitsProcessor class BinaryLogitsProcessor(OutlinesLogitsProcessor): def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType: # Create a mask for all tokens mask = self.tensor_adapter.boolean_ones_like(logits) # Set mask to False for the allowed tokens for token_id in ALLOWED_TOKENS: mask[:, token_id] = False # Set non-allowed tokens to -inf so they are not selected logits[mask] = float("-inf") return logits # Create a regular model tf_tokenizer = transformers.AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B") tf_model = transformers.AutoModelForCausalLM.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B") model = from_transformers(tf_model, tf_tokenizer) # Instantiate your custom logits processor logits_processor = BinaryLogitsProcessor(model.tensor_library_name) prompt = "Write the number 47 in binary. For example, 1010 is the binary representation of 10. Answer just with the binary number composed of 0s and 1s." formatted_prompt = tf_tokenizer.apply_chat_template( [{"role": "user", "content": prompt}], tokenize=False ) # Create a generator with the custom logits processor instance and use it to generate text generator = Generator(model, processor=logits_processor) response = generator(formatted_prompt) print(response) # "101111" ``` ================================================ FILE: docs/features/core/generator.md ================================================ --- title: Generator API --- # Generator The `Generator` class is the core component of Outlines v1. `Generator` accepts a [model](../models/index.md) and an optional [output type](../core/output_types.md). If no output type is provided, the `Generator` will return unstructured text. !!! note `Generator` is new as of Outlines v1, and replaces previous generator constructors: - `generate.cfg` - `generate.choice` - `generate.format` - `generate.fsm` - `generate.json` - `generate.regex` - `generate.text` ## Methods Generators implement the same methods as models: - `__call__` - `batch` - `stream` All of them take a single positional argument: the [model input](../core/inputs.md) from which text is generated. Contrarily to the equivalent methods of models, you do not need to provide an output type as it has already been defined when initializing the generator. ## Basic Usage ```python from outlines import Generator, from_transformers import transformers # Initialize a model model_name = "HuggingFaceTB/SmolLM2-135M-Instruct" model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name), transformers.AutoTokenizer.from_pretrained(model_name), ) # Create a generator for plain text generator = Generator(model) result = generator("Write a short poem about AI.") # Print the result print(result) ``` ## Structured Generation ```python from pydantic import BaseModel from outlines import Generator, from_transformers import transformers # Define a Pydantic model for structured output class BookRecommendation(BaseModel): title: str author: str year: int # Initialize a model model_name = "HuggingFaceTB/SmolLM2-135M-Instruct" model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name), transformers.AutoTokenizer.from_pretrained(model_name), ) # Create a generator for JSON output generator = Generator(model, BookRecommendation) # Generate a book recommendation result = generator("Recommend a science fiction book.") # Parse the JSON result into a Pydantic model book = BookRecommendation.model_validate_json(result) print(f"{book.title} by {book.author} ({book.year})") ``` ## Parameters - `model`: The language model to use for generation - `output_type`: Optional. The type of output to generate ## Generation Parameters When calling the generator, you can pass additional parameters to control the generation process. These parameters are passed through to the underlying model, so they depend on the specific model being used. Common parameters for most models include: - `max_new_tokens`: Maximum number of tokens to generate - `temperature`: Controls randomness (higher values = more random) - `top_p`: Controls diversity via nucleus sampling - `stop_strings`: String or list of strings at which to stop generation Example: ```python result = generator( "Write a short story.", max_new_tokens=200, temperature=0.7, top_p=0.9, stop_strings=["THE END", "###"] ) ``` ## Return Value The generator always returns a raw string containing the generated text. When generating structured outputs, you need to parse this string into the desired format. Unlike in Outlines v0, where the return type could be a parsed object, in v1 you are responsible for parsing the output when needed: ```python # Outlines v1 approach from pydantic import BaseModel from outlines import Generator class Person(BaseModel): name: str age: int generator = Generator(model, Person) result = generator("Generate a person:") # Parse the result yourself person = Person.model_validate_json(result) ``` ::: outlines.generator.Generator ================================================ FILE: docs/features/core/inputs.md ================================================ --- title: Model Inputs --- # Model Inputs Outlines models accept various types of inputs to generate text. The input format depends on the capabilities of the underlying model and the type of task you want to perform. The most basic type of input is a single string prompt, it's accepted by all models. ## Overview The model input is the first argument of the `__call__`, `stream` and `batch` methods of both models and generators. There are 3 types of model inputs: - **Text prompts** - Simple strings - **Multimodal inputs** - List containning a string prompt along with assets - **Chat inputs** - `Chat` instances containing messages ## Text Prompts The simplest form of input is a plain text string. This works with all models and is suitable for standard text generation tasks. ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Create a model model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), ) # Simple text prompt response = model("What's the capital of France?", max_new_tokens=20) print(response) # 'Paris' ``` ## Multimodal Inputs (Vision) For models that support them, you can provide a list containing a text prompt and one or more assets. There are 3 types of assets defined in Outlines: - `Image`: contains a PIL Image - `Video`: contains any object (you must choose a format that is supported by your model) - `Audio`: contains any object (you must choose a format that is supported by your model) Among those, `Image` is by far the most important as multiple models support vision inputs. For instance with vision input: ```python import io import requests import PIL import outlines import openai from outlines.inputs import Image # Create the model model = outlines.from_openai( openai.OpenAI(), "gpt-4o" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` ## Chat Inputs For conversational models, you can use the `Chat` class to provide a conversation history with multiple messages. A `Chat` instance is instantiated with an optional list of messages. Each message must be a dictionary containing two mandatory keys: - `role`: must be one of `system`, `assistant` or `user` - `content`: must be either a string or a multimodal input (if the model supports it) For instance: ```python import io import requests import PIL from outlines.inputs import Chat, Image # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) print(prompt) # {'role': 'system', 'content': 'You are a helpful assistant.'} # {'role': 'user', 'content': ['Describe the image', Image(image=)]} ``` After having created a `Chat` instance, you can add one or several messages thanks to the `append` and `extend` methods. You can also remove the last message of the Chat with the `pop` method. For instance: ```python from outlines.inputs import Chat # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, ]) # Add a message prompt.append({"role": "user", "content": "How are you doing today?"}) print(prompt) # {'role': 'system', 'content': 'You are a helpful assistant.'} # {'role': 'user', 'content': 'How are you doing today?'} # Remove the last messsage last_message = prompt.pop() print(last_message) # {'role': 'user', 'content': 'How are you doing today?'} print(prompt) # {'role': 'system', 'content': 'You are a helpful assistant.'} # RAdd several messages prompt.extend([ {"role": "user", "content": "How are you doing today?"}, {"role": "assistant", "content": "Excellent, thanks!"} ]) print(prompt) # {'role': 'system', 'content': 'You are a helpful assistant.'} # {'role': 'user', 'content': 'How are you doing today?'} # {'role': 'assistant', 'content': 'Excellent, thanks!'} ``` Finally, there are three convenience method to easily add a message: - add_system_message - add_user_message - add_assistant_message As the role is already set, you only need to provide the content. For instance: ```python from outlines.inputs import Chat # Create the chat input prompt = Chat() prompt.add_system_message("You are a helpful assistant.") prompt.add_system_message("How are you doing today?") prompt.add_system_message("Excellent, thanks!") print(prompt) # {'role': 'system', 'content': 'You are a helpful assistant.'} # {'role': 'user', 'content': 'How are you doing today?'} # {'role': 'assistant', 'content': 'Excellent, thanks!'} ``` ## Batching In the case of batching, for models that support it, you just have to provide several instances of the model inputs described above in a list. For instance: ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Create model model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Create a list of prompts that will be used in a single batch prompts = [ "What's the capital of Lithuania?", "What's the capital of Latvia?", "What's the capital of Estonia?" ] # Call it to generate text result = model.batch(prompts, max_new_tokens=20) print(result) # ['Vilnius', 'Riga', 'Tallinn'] ``` ================================================ FILE: docs/features/core/output_types.md ================================================ --- title: Output Types --- # Output Types Outlines provides a simple and intuitive way of defining the output structure of text generation. Possible output formats include basic Python types, multiple-choices, JSON schemas, regular expressions and context-free grammars. ## Overview Outlines models accept a __prompt__ and an __output type__ when they are invoked, as well as additional inference keyword arguments that are forwarded on to the underlying model. Output types can be from the general Python ecosystem, including: - Most native Python types, such as `int` or `str` - Types from the `typing` module, such as `Literal`, `List`, `Dict`, `Enum`, etc - Types from popular third party libraries such as Pydantic or GenSON. Outlines also provides special classes for certain output structures (more details below): - Multiple choices with `Choice` - JSON schemas with `JsonSchema` - Regular expressions with `Regex` - Context-free grammars with `CFG` The general idea is that you should provide as an output type what you would give as the type hint of the return type of a function. Consider the following functions for instance: ```python from datetime import date from typing import Dict, List, Literal, Union from pydantic import BaseModel class Character(BaseModel): name: str birth_date: date skills: Union[Dict, List[str]] def give_int() -> int: ... def pizza_or_burger() -> Literal["pizza", "burger"]: ... def create_character() -> Character: ... ``` With an Outlines model, you can generate text that respects the type hints above by providing those as the output type: ```python model("How many minutes are there in one hour", int) # "60" model("Pizza or burger", Literal["pizza", "burger"]) # "pizza" model("Create a character", Character, max_new_tokens=100) # '{"name": "James", "birth_date": "1980-05-10)", "skills": ["archery", "negotiation"]}' ``` An important difference with function type hints though is that an Outlines generator always returns a string. You have to cast the response into the type you want yourself. For instance: ```python result = model("Create a character", Character, max_new_tokens=100) casted_result = Character.model_validate_json(result) print(result) # '{"name": "Aurora", "birth_date": "1990-06-15", "skills": ["Stealth", "Diplomacy"]}' print(casted_result) # name=Aurora birth_date=datetime.date(1990, 6, 15) skills=['Stealth', 'Diplomacy'] ``` ## Output Type Categories We can group possible output types in several categories based on the use case they correspond to. While most of those types are native python or types coming from well-known third-party libraries, there are three Outlines-specific types: `JsonSchema`, `Regex` and `CFG`. Their use is explained below. ### Basic Python Types The most straightforward form of structured generation is to return an answer that conforms to a given basic type such as an int or a python list. You can use the basic Python types and the types from the `typing` library. For instance: ```python from typing import Dict output_type = float # example of valid value: "0.05" output_type = bool # example of valid value: "True" output_type = Dict[int, str] # example of valid value: "{1: 'hello', 2: 'there'}" ``` You can combine types to create more complex response formats by relying on collection types and types such as `Union` and `Optional`. Let's consider for instance the output type below used to represent semi-structured data: ```python from typing import Dict, List, Optional, Tuple, Union output_type = Dict[str, Union[int, str, List[Tuple[str, Optional[float]]]]] ``` Values created with this output type would be dictionaries with string as keys and values made of either an integer, a string or a list of two elements tuples: a string and either a float or None. Example of a valid response for text generated with this output type (it would be contained in a string): ```json { "name": "Alice", "age": 30, "metrics": [("engagement", 0.85), ("satisfaction", None)] } ``` ### Multiple Choices Outlines supports multiple choice classification by using the `Literal` or `Enum` output types. For instance: ```python from enum import Enum from typing import Literal class PizzaOrBurger(Enum): pizza = "pizza" burger = "burger" # Equivalent multiple-choice output types output_type = Literal["pizza", "burger"] output_type = PizzaOrBurger ``` Additionally, you can use the Outlines-specific type `Choice` that takes a `list` as an argument. This type is useful in situations in which the list of choices is dynamic. For instance: ```python from outlines.types import Choice def get_multiple_choices() -> list: # we could have something complex here return ["pizza", "burger"] output_type = Choice(get_multiple_choices()) ``` ### JSON Schemas Multiple different common Python types are often used to store information equivalent to a JSON schema. The following can be used in Outlines to generate text that respects a JSON schema: - A Pydantic class - A Dataclass - A TypedDict - A [GenSON](https://github.com/wolverdude/GenSON) `SchemaBuilder` - A Callable (the parameters are turned into the keys and the type hinting is used to define the types of the values) For instance: ```python from dataclasses import dataclass @dataclass class Character: name: str age: int output_type = Character def character(name: str, age: int): return None output_type = character ``` There are two other JSON schema formats that require Outlines-specific classes: JSON schema strings and dictionaries. As those are contained in regular Python strings or dictionaries, the associated output format would be ambiguous if they were to be provided directly. As a result, Outlines requires them to be wrapped in a `outlines.types.JsonSchema` object. For instance: ```python from outlines.types import JsonSchema schema_string = '{"type": "object", "properties": {"answer": {"type": "number"}}}' output_type = JsonSchema(schema_string) schema_dict = { "type": "object", "properties": { "answer": {"type": "number"} } } output_type = JsonSchema(schema_dict) ``` `JsonSchema` accepts two optional parameters: - `whitespace_pattern` (defaults to `None`): specifies the pattern to use for JSON syntactic whitespace. If none is provided, the default permissive JSON whitespace rules are used. - `ensure_ascii` (defaults to `True`): defines the value to use for the argument `ensure_ascii` of the `json.dumps` method. If false, non-ASCII characters will be turned into unicodes. ### Regex Patterns Outlines provides support for text generation constrained by regular expressions. Since regular expressions are expressed as simple raw string literals, regex strings must wrapped in an `outlines.types.Regex` object. ```python from outlines.types import Regex regex = r"[0-9]{3}" output_type = Regex(regex) ``` The `outlines.types` module contains a few common regex patterns stored in variables you can import and directly use as output types. Common patterns include a sentence, an email address and an [ISBN reference](https://en.wikipedia.org/wiki/ISBN). For instance: ```python from outlines.types import sentence print(type(sentence)) # outlines.types.dsl.Regex print(sentence.pattern) # [A-Z].*\s*[.!?] ``` To help you create complex regex patterns yourself, you can use the Outlines [regex DSL](../../utility/regex_dsl). ### Context-Free Grammars Outlines allows you to generate text that respects the syntax of a context-free grammar. Context-free grammars are defined using [Lark](https://lark-parser.readthedocs.io/en/latest/index.html), a grammar language. Since grammars are expressed as a string, Large CFG strings should be be wrapped in an `outlines.types.CFG` object. For instance: ```python from outlines.types import CFG grammar_string = """ start: expr expr: "{" expr "}" | "[" expr "]" | """ output_type = CFG(grammar_string) ``` You can find a few Lark grammar examples in the [grammars module](../../api_reference/grammars.md). ## Output type availability The output types presented above are not available for all models as some have only limited support for structured outputs. Please refer to the documentation of the specific model you wish to use to know what output types it supports. ================================================ FILE: docs/features/index.md ================================================ # Features This section presents in details the different features of Outlines. ## Core Concepts - [Models](./models/index.md) - [Model Inputs](./core/inputs.md) - [Output Types](./core/output_types.md) - [Generators](./core/generator.md) ## Utilities - [Applications](./utility/application.md) - [Templates](./utility/templates.md) - [Regex DSL](./utility/regex_dsl.md) ## Advanced - [Logits Processors](./advanced/logits_processors.md) ================================================ FILE: docs/features/models/anthropic.md ================================================ --- title: Anthropic --- # Anthropic !!! Installation You need to install the `anthropic` library to be able to use the Anthropic API in Outlines. Install all optional dependencies of the `Anthropic` model with: `pip install "outlines[anthropic]"`. You also need to have an Anthropic API key. This API key must either be set as an environment variable called `ANTHROPIC_API_KEY` or be provided to the `anthropic.Anthropic` class when instantiating it. ## Model Initialization To create an Anthropic model instance, you can use the `from_anthropic` function. It takes 2 arguments: - `client`: an `anthropic.Anthropic` instance - `model_name`: the name of the model you want to use in subsequent model calls (optional) For instance: ```python from anthropic import Anthropic import outlines # Create the Anthropic client client = Anthropic() # Create the model model = outlines.from_anthropic( client, "claude-3-5-sonnet-latest" ) ``` Check the [Anthropic documentation](https://docs.anthropic.com/en/docs/about-claude/models) for an up-to-date list of available models. ## Text Generation Once you've created your Outlines `Anthropic` model instance, you're all set to generate text with this provider. You can simply call the model with a text prompt. For instance: ```python from anthropic import Anthropic import outlines # Create the model model = outlines.from_anthropic( Anthropic(), "claude-3-5-sonnet-latest" ) # Call it to generate text response = model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' ``` #### Vision Some Anthropic models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL from anthropic import Anthropic from outlines import from_anthropic from outlines.inputs import Image # Create the model model = from_anthropic( Anthropic(), "claude-3-5-sonnet-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Chat You can also use chat inputs with the `Anthropic` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL from anthropic import Anthropic from outlines import from_anthropic from outlines.inputs import Chat, Image # Create the model model = from_anthropic( Anthropic(), "claude-3-5-sonnet-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "user", "content": "You are a helpful assistant that helps me described pictures."}, {"role": "assistant", "content": "I'd be happy to help you describe pictures! Please go ahead and share an image"}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `Anthropic` model supports streaming through the `stream` method. For instance: ```python from anthropic import Anthropic import outlines # Create the model model = outlines.from_anthropic( Anthropic(), "claude-3-5-sonnet-latest" ) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk) # 'Once...' ``` ## Inference arguments When calling the model or streaming, you can provide keyword arguments that will be passed down to the Anthropic client. Make sure to include all the arguments you need to configure the client's behavior to your expected behavior. Some of the most common arguments include `max_tokens`, `temperature`, `stop_sequences` and `top_k`. See the [Anthropic API documentation](https://docs.anthropic.com/en/api/messages) for the full list of available arguments. !!! Warning You must set a value for `max_tokens` with Anthropic models. ================================================ FILE: docs/features/models/dottxt.md ================================================ --- title: Dottxt --- # Dottxt !!! Installation You need to install the `dottxt` python sdk to be able to use the Dottxt API in Outlines. Install all optional dependencies of the `Dottxt` model with: `pip install "outlines[dottxt]"`. You also need to have a Dottxt API key. This API key must either be set as an environment variable called `DOTTXT_API_KEY` or be provided to the `dottxt.client.Dottxt` class when instantiating it. ## Model Initialization To create an Dottxt model instance, you can use the `from_dottxt` function. It takes 3 arguments: - `client`: a `dottxt.client.Dottxt` instance - `model_name`: the name of the model you want to use in subsequent model calls (optional) - `model_revision`: the name of the revision to use for the model selected (optional) For instance: ```python from dottxt.client import Dottxt import outlines # Create client client = Dottxt(api_key="...") # Create the model model = outlines.from_dottxt( client, "meta-llama/Llama-3.1-8B", "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b" ) ``` Use the `list_models` method of the Dottxt client to get a list of available model names and revisions for your account. ## Text Generation Dottxt only supports constrained generation with JSON schema output types. You must always provide a value for the `output_type` parameter as unconstrained generation is not available. For instance: ```python from typing import List from pydantic import BaseModel from dottxt.client import Dottxt import outlines class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_dottxt( Dottxt(), "meta-llama/Llama-3.1-8B", "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b" ) # Generate structured text result = model("Create a character", Character) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ## Inference arguments You can provide the same optional parameters you would pass to the `dottxt` sdk's client both during the initialization of the `Dottxt` class and when generating text. Some of the common inference arguments include `max_tokens`, `frequency_penalty`, `presence_penalty` and `temperature`. Consult the [dottxt python sdk GitHub repository](https://github.com/dottxt-ai/dottxt-python) for the full list of parameters. ================================================ FILE: docs/features/models/gemini.md ================================================ # Gemini !!! Installation You need to install the `google.genai` libray to be able to use the Gemini API in Outlines. Install all optional dependencies of the `Gemini` model with: `pip install "outlines[gemini]"`. You also need to have a Gemini API key. This API key must either be set as an environment variable called `GEMINI_API_KEY` or be provided to the `google.genai.Client` class when instantiating it. ## Model Initialization To create a Gemini model instance, you can use the `from_gemini` function. It takes 2 arguments: - `client`: a `google.genai.Client` instance - `model_name`: the name of the model you want to use in subsequent model calls (optional) For instance: ```python import outlines from google import genai # Create the client client = genai.Client() # Create the model model = outlines.from_gemini( client, "gemini-1.5-flash-latest" ) ``` Check the [Gemini documentation](https://github.com/googleapis/python-genai) for an up-to-date list of available models. ## Text Generation Once you've created your Outlines `Gemini` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt. For instance: ```python import outlines from google.genai import Client # Create the model model = outlines.from_gemini( Client(), "gemini-1.5-flash-latest" ) # Call it to generate text result = model("What's the capital of Latvia?", max_output_tokens=20) print(result) # 'Riga' ``` #### Vision Some Gemini models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL import outlines from google.genai import Client from outlines.inputs import Image # Create the model model = outlines.from_gemini( Client(), "gemini-1.5-flash-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_output_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Chat You can also use chat inputs with the `Gemini` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import outlines from google.genai import Client from outlines.inputs import Chat, Image # Create the model model = outlines.from_gemini( Client(), "gemini-1.5-flash-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "user", "content": "You are a helpful assistant that helps me described pictures."}, {"role": "assistant", "content": "I'd be happy to help you describe pictures! Please go ahead and share an image"}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_output_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `Gemini` model supports streaming through the `stream` method. For instance: ```python import outlines from google.genai import Client # Create the model model = outlines.from_gemini( Client(), "gemini-1.5-flash-latest" ) # Stream text for chunk in model.stream("Write a short story about a cat.", max_output_tokens=20): print(chunk) # 'In...' ``` ## Structured Generation Gemini provides supports for some forms of structured output: multiple choice, JSON schema (with caveats) and lists of structured objects. To use it, call the model with an `output_type` on top of your prompt. #### Multiple Choice ```python import outlines from google import genai from enum import Enum class PizzaOrBurger(Enum): pizza = "pizza" burger = "burger" # Create the model model = outlines.from_gemini(genai.Client(), "gemini-1.5-flash-latest") # Call it with the ouput type to generate structured text result = model("Pizza or burger?", PizzaOrBurger, max_output_tokens=20) print(result) # 'pizza' ``` #### JSON Schema Gemini supports only three types of objects used to define a JSON Schema: - Pydantic classes - Dataclasses - TypedDicts ```python from typing import List from pydantic import BaseModel from google import genai import outlines class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_gemini(genai.Client(), "gemini-1.5-flash-latest") # Call it with the ouput type to generate structured text result = model("Create a character", Character) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` #### Lists of Structured Objects A specificity of Gemini is that, despite not supporting regex, it does support a list of structured objects as an output type. To use it, put any of three available types described above in the typing `List` class ```python from dataclasses import dataclass from google import genai import outlines @dataclass class Character: name: str age: int skills: List[str] # Create the model model = outlines.from_gemini(genai.Client(), "gemini-1.5-flash-latest") # Call it with the ouput type to generate structured text result = model("Create a character", list[Character]) print(result) # '[{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}, {["name":...' ``` !!! Attention The structured objects must be in a built-in `list`, not a `List` from the `typing` library ## Inference arguments You can provide the same optional parameters you would pass to the `google.genai.Client` client both during the initialization of the Gemini model and when generating text. Some of the common inference arguments include `max_output_tokens`, `temperature`, and other generation parameters. Consult the [Google Generative AI documentation](https://github.com/googleapis/python-genai) for the full list of parameters. ================================================ FILE: docs/features/models/index.md ================================================ --- title: Models --- # Models ## Overview Outlines models are objects that wrap an inference client or engine. Models provide a standardized interface to generate structured text. All Outlines model classes have an associated loader function to facilitate initializing a model instance. The name of this function is `from_` plus the name of the model in lower-case letters. For instance, Outlines has a `Transformers` model and an associated `from_transformers` loader function. The parameters to load a model are specific to each provider, please consult the documentation of the model you want to use for more information. After having created a model instance, you can either directly call it to generate text or first create a reusable generator that you would then call. The input you must provide to a model to generate text can be a simple text prompt or a vision or chat input for models that support them. See the [model inputs section](../core/inputs.md) for more information on model inputs formats. In all cases, you can provide an `output_type` to constrain the format of the generation output. See the [output types section](../core/output_types.md) for more information on constrained generation. For instance: ```python from outlines import from_transformers, Generator import transformers # Create a model model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), ) # Call it directly response = model("How many countries are there in the world", max_new_tokens=20) print(response) # 'There are 200 countries in the world.' # Call it directly with an output_type response = model("How many countries are there in the world", int, max_new_tokens=20) print(response) # '200' # Create a generator first and then call it generator = Generator(model, int) response = generator("How many countries are there in the world") print(response) # '200' ``` Some models support streaming through a `stream` method. It takes the same argument as the `__call__` method, but returns an iterator instead of a string. For instance: ```python from outlines import from_openai, Generator import openai # Create the model model = from_openai( openai.OpenAI(), "gpt-4o" ) # Stream the response for chunk in model.stream("Tell a short story about a cat.", max_tokens=50): print(chunk) # 'This...' ``` Additionally, some models support batch processing through a `batch` method. It's similar to the `__call__` method, but takes a list of prompts instead of a single prompt and returns a list of strings. For instance: ```python from outlines import from_transformers, Generator import transformers # Create a model model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), ) # Call it directly response = model.batch(["What's the capital of Latvia?", "What's the capital of Estonia?"], max_new_tokens=20) print(response) # ['Riga', 'Tallinn'] ``` ## Features Matrix In alphabetical order: | | [Anthropic](../../models/anthropic) | [Dottxt](../../models/dottxt) | [Gemini](../../models/gemini) | [LlamaCpp](../../models/llamacpp) | [MLXLM](../../models/mlxlm) | [Mistral](../../models/mistral) | [Ollama](../../models/ollama) | [OpenAI](../../models/openai) | [SGLang](../../models/sglang) | [TGI](../../models/tgi) | [Transformers](../../models/transformers) | [Transformers MultiModal](../../models/transformers_multimodal) | [VLLM](../../models/vllm) | [VLLMOffline](../../models/vllm_offline) | |---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| | **Output Types** | | | | | | | | | | | | | | | | Simple Types | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | JSON Schema | ❌ | ✅ | 🟠 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Multiple Choice | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Regex | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Grammar | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | 🟠 | ❌ | ✅ | ✅ | ✅ | ✅ | | **Generation Features** | | | | | | | | | | | | | | | | Async | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | Streaming | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | | Vision | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | Batching | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ## Model Types Models can be divided into two categories: local models and server-based models. In the case of local models, the text generation happens within the inference library object used to instantiate the model. This gives Outlines direct access to the generation process (through a logits processor) and means all structured generation output types are available. The local models available are the following: - LlamaCpp - MLXLM - Transformers - TransformersMultiModal - VLLMOffline In the case of server-based models, the model is initialized with a client that sends a request to a server that is in charge of the actual text generation. As a result, we have limited control over text generation and some output types are not supported. The server on which the text generation happens can either be remote (with OpenAI or Anthopic for instance) or local (with SGLang for instance). The server-based models available are the following: - Anthropic - Dottxt - Gemini - Mistral - Ollama - OpenAI - SgLang - TGI - VLLM Some models have an async version. To use them, just pass the async version of the provider object to their loading function. It will then return a `Async` instance with the same methods and features as the regular sync instance. For instance: ```python from outlines import from_tgi from huggingface_hub import AsyncInferenceClient model = from_tgi( AsyncInferenceClient("http://localhost:8000/v1") ) print(type(model)) # outlines.models.tgi.AsyncTGI ``` The models that have an async version are the following: - Mistral - Ollama - OpenAI - SgLang - TGI - VLLM ================================================ FILE: docs/features/models/llamacpp.md ================================================ --- title: llama.cpp --- # llama.cpp Outlines provides an integration with [Llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python library](https://github.com/abetlen/llama-cpp-python). Llamacpp allows to run quantized models on machines with limited compute. !!! Installation You need to install the `llama-cpp-python` library to use the llama.cpp integration. Install all optional dependencies of the `LlamaCpp` model with: `"pip install "outlines[llamacpp]"`. See the [llama-cpp-python Github page](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#supported-backends) for instructions on installing with CUDA, Metal, ROCm and other backends. ## Model Initialization To load the model, you can use the `from_llamacpp` function. The first argument of the function is a `Llama` model instance from the `llama_cpp` library. Consult the [Llama class API reference](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) for detailed information on how to create a model instance and on the various available parameters. You can also pass a `chat_mode` argument to `from_llamacpp`. If `True` (default), the model will regard all `str` inputs as user messages in a chat conversation. If `False`, the model will regard all `str` inputs as plain text prompts. For instance: ```python import outlines from llama_cpp import Llama model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) ``` You can also disable chat mode: ```python import outlines from llama_cpp import Llama model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ), chat_mode=False, ) ``` ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import outlines from llama_cpp import Llama # Create the model model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) # Call it to generate text result = model("What's the capital of Latvia?", max_tokens=20) print(result) # 'Riga' ``` #### Chat You can also use chat inputs with the `LlamaCpp` model. To do so, call the model with a `Chat` instance. For instance: ```python import outlines from llama_cpp import Llama from outlines.inputs import Chat # Create the model model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) # Create the prompt containing the text and the image prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, {"role": "assistant", "content": "What's the capital of Latvia?"}, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'Riga.' ``` #### Streaming The `LlamaCpp` model also supports streaming. For instance: ```python import outlines from llama_cpp import Llama # Create the model model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) # Stream text for chunk in model.stream("Write a short story about a cat.", max_tokens=100): print(chunk) # 'In...' ``` ## Structured Generation The `LlamaCpp` model supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model. ### Basic Type ```python import outlines from llama_cpp import Llama output_type = int model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) result = model("How many countries are there in the world?", output_type) print(result) # '200' ``` ### JSON Schema ```python from typing import List from pydantic import BaseModel import outlines from llama_cpp import Llama class Character(BaseModel): name: str age: int skills: List[str] model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) result = model("Create a character.", output_type=Character, max_tokens=200) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ### Multiple Choice ```python from typing import Literal import outlines from llama_cpp import Llama output_type = Literal["Paris", "London", "Rome", "Berlin"] model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) result = model("What is the capital of France?", output_type) print(result) # 'Paris' ``` ### Regex ```python from outlines.types import Regex import outlines from llama_cpp import Llama output_type = Regex(r"\d{3}-\d{2}-\d{4}") model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) result = model("Generate a fake social security number.", output_type) print(result) # '782-32-3789' ``` ### Context-free grammar ```python from outlines.types import CFG import outlines from llama_cpp import Llama output_type = CFG(""" root ::= answer answer ::= "yes" | "no" """) model = outlines.from_llamacpp( Llama.from_pretrained( repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q5_K_M.gguf", ) ) result = model("Are you feeling good today?", output_type) print(result) # 'yes' ``` ## Inference Arguments When calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `__call__` method of the `llama_cpp.Llama` model. Some common inference arguments include `max_tokens`, `temperature`, `frequency_penalty` and `top_p`. See the [llama-cpp-python documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__) for more information on inference parameters. ================================================ FILE: docs/features/models/mistral.md ================================================ # Mistral !!! Installation You need to install the `mistralai` library to be able to use the Mistral API in Outlines. Install all optional dependencies of the `Mistral` model with: `pip install "outlines[mistral]"`. You also need to have an Mistral API key. This API key must either be set as an environment variable called `MISTRAL_API_KEY` or be provided to the `mistralai.Mistral` class when instantiating it. ## Model Initialization To create an `Mistral` or `AsyncMistral` model instance, you can use the `from_mistral` function. It takes 3 arguments: - `client`: a `mistralai.Mistral` instance - `model_name` (optional): the name of the model you want to use - `async_client` (optional): whether it should create a sync or an async model As the `mistralai` library uses a single class to handle both sync and async requests, you must set the `async_client` argument to True to get an `AsyncMistral` model. For instance: ```python import mistralai import outlines # Create the Mistral client client = mistral.Mistral() # Create a sync model model = outlines.from_mistral( client, "mistral-large-latest" ) # Create aa async model model = outlines.from_mistral( client, "mistral-large-latest", True ) ``` The mistralai python SDK provides methods to query the API for a list of [all available models](https://docs.mistral.ai/getting-started/models/models_overview/#api-versioning), including paid endpoints for [premium models](https://docs.mistral.ai/getting-started/models/models_overview/) in addition to open weights. ## Text Generation Once you've created your Outlines `Mistral` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt. For instance: ```python import mistralai import outlines # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Call it to generate text response = model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' ``` #### Vision Some Mistral models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL import outlines import mistralai from outlines.inputs import Image # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Chat You can also use chat inputs with the `Mistral` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import mistralai import outlines from outlines.inputs import Chat, Image # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `Mistral` model supports streaming through the `stream` method. For instance: ```python import mistralai import outlines # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk) # 'Once...' ``` ## Structured Generation Mistral provides supports for some forms of structured output: JSON schemas and JSON syntax. To use it, call the model with an `output_type` on top of your prompt. #### JSON Schema ```python from typing import List from pydantic import BaseModel import mistralai import outlines class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Call it with the output type to generate structured text result = model("Create a character, use the json format.", Character, top_p=0.1) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` #### JSON Syntax What we mean by JSON syntax is what is sometimes called JSON mode, meaning that the model will return a valid JSON, but you do not get to specify its structure. To use this JSON mode, provide the `dict` type as an output type. ```python import mistralai import outlines ## Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest" ) # Call it with the output type to generate structured text result = model("Create a character, use the json format.", dict, temperature=0.5) print(result) # '{"first_name": "Henri", "last_name": "Smith", "height": "170"}' ``` ## Asynchronous Calls All features presented above for the sync model are also available for the async model. For instance: ```python import asyncio import mistralai import outlines from pydantic import BaseModel from typing import List class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_mistral( mistralai.Mistral(), "mistral-large-latest", True ) async def text_generation(): # Regular generation response = await model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' # Streaming async for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk, end="") # 'Once...' # Structured generation result = await model("Create a character, use the json format.", Character, top_p=0.1) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] asyncio.run(text_generation()) ``` ## Inference arguments When calling the model, you can provide keyword arguments that will be passed down to the `chat.complete` method of the Mistral client and its async and streaming equivalents. Some of the most common arguments include `max_tokens`, `temperature`, `stop` and `top_p`. Another keyword argument of interest is `n`. If set with an integer value superior to 1, Mistral will generate several sample responses and you will receive a list of strings as a response to your model call. See the [Mistral API documentation](https://docs.mistral.ai/api/#tag/chat) for the full list of available arguments. ## Troubleshooting - **ImportError: No module named 'mistralai'** → Run `pip install mistralai`. - **Authentication Error** → Verify `MISTRAL_API_KEY` is set and valid. Test with the [Mistral Playground](https://chat.mistral.ai). - **Schema Error (e.g., "Mistral does not support your schema")** → Ensure no `pattern` fields in Pydantic (Outlines sets `additionalProperties: false`); try a simpler schema or a different Outlines model (local models in particular). - **Model Not Found Error** → Confirm the model name (e.g., `"mistral-small-latest"`) and your subscription tier. Check [docs](https://docs.mistral.ai/getting-started/models/). - **Rate Limits or Quotas** → Monitor usage in the Mistral console; upgrade your plan for higher limits. - **Input Validation Errors** → Ensure Chat messages use valid roles (`system`, `user`, `assistant`); list inputs start with strings. *Last updated: October 2, 2025* ================================================ FILE: docs/features/models/mlxlm.md ================================================ --- title: mlx-lm --- # mlx-lm Outlines provides an integration with [mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms), allowing models to be run quickly on Apple Silicon via the [mlx](https://ml-explore.github.io/mlx/build/html/index.html) library. !!! Note "Installation" You need a device that [supports Metal](https://support.apple.com/en-us/102894) to use the mlx-lm integration. You need to install the `mlx` and `mlx-lm` libraries to be able to use mlx in Outlines. Install all optional dependencies of the `MLXLM` model with: `pip install "outlines[mlxlm]"`. ## Model Initialization To create a MLXLM model instance, you can use the `from_mlxlm` function. It takes 2 arguments: - `model`: an `mlx.nn.Module` instance - `tokenizer`: a `transformers.PreTrainedTokenizer` instance However, we recommend you simply pass on the output of the `mlx_lm.load` function (it takes a model name as an argument). For instance: ```python import outlines import mlx_lm # Create the model model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) ``` ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import outlines import mlx_lm # Load the model model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) # Call it to generate text result = model("What's the capital of Latvia?", max_tokens=20) print(result) # 'Riga' ``` #### Chat You can use chat inputs with the `MLXLM` model. To do so, call the model with a `Chat` instance. For instance: ```python import outlines import mlx_lm from outlines.inputs import Chat # Load the model model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) # Create the prompt containing the text and the image prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, {"role": "assistant", "content": "What's the capital of Latvia?"}, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'Riga.' ``` #### Streaming The `MLXLM` model also supports streaming. For instance: ```python import outlines import mlx_lm # Load the model model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) # Stream text for chunk in model.stream("Write a short story about a cat.", max_tokens=100): print(chunk) # 'In...' ``` #### Batch Generation The `MLXLM` model supports generating text in batches. To do so, use the `batch` method and provide a list of strings as a model input. However, constrained generation is not supported with batching, so you cannot provide an `output_type`. For instance: ```python import outlines import mlx_lm # Load the model model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) # Generate text in batches result = model.batch(["What's the capital of Lithuania?", "What's the capital of Latvia?"], max_tokens=20) print(result) # ['Vilnius', 'Riga'] ``` ## Structured Generation As a local model, `MLXLM` supports all forms of structured generation available in Outlines. #### Basic Type ```python import outlines import mlx_lm output_type = int model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) result = model("How many countries are there in the world?", output_type) print(result) # '200' ``` #### JSON Schema ```python from pydantic import BaseModel from typing import List import outlines import mlx_lm class Character(BaseModel): name: str age: int skills: List[str] model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) result = model("Create a character.", output_type=Character) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` #### Multiple Choice ```python from typing import Literal import outlines import mlx_lm output_type = Literal["Paris", "London", "Rome", "Berlin"] model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) result = model("What is the capital of France?", output_type) print(result) # 'Paris' ``` #### Regex ```python from outlines.types import Regex import outlines import mlx_lm output_type = Regex(r"\d{3}-\d{2}-\d{4}") model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) result = model("Generate a fake social security number.", output_type) print(result) # '782-32-3789' ``` #### Context-Free Grammar ```python from outlines.types import CFG import outlines import mlx_lm arithmetic_grammar = """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ output_type = CFG(arithmetic_grammar) model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") ) result = model("Write an addition.", output_type, max_tokens=20) print(result) # '23 + 48' ``` ## Inference Arguments When calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `mlx_lm.generate` function used to generate text. See the [MLXLM documentation](https://github.com/ml-explore/mlx-lm) for more information on inference parameters. ================================================ FILE: docs/features/models/ollama.md ================================================ --- title: Ollama --- # Ollama !!! Installation To be able to use Ollama in Outlines, you must install both Ollama and the optional dependency libraries of the model. - To download Ollama: https://ollama.com/download - To install the ollama python sdk: `pip install "outlines[ollama]"` Consult the [`ollama` documentation](https://github.com/ollama/ollama-python) for detailed information on installation and client initialization. ## Model Initialization To create an Ollama model instance, you can use the `from_ollama` function. It takes 2 arguments: - `client`: an `ollama.Client` or `ollama.AsyncClient` instance - `model_name`: the name of the model you want to use Based on whether the inference client instance is synchronous or asynchronous, you will receive an `Ollama` or an `AsyncOllama` model instance. For instance: ```python import ollama import outlines # Create the client or async client client = ollama.Client() async_client = ollama.AsyncClient() # Create a sync model model = outlines.from_ollama( client, "qwen2.5vl:3b", ) # Create an async model model = outlines.from_ollama( async_client, "qwen2.5vl:3b", ) ``` You can find the list of available models on the [Ollama library](https://ollama.com/library). ## Text Generation Once you've created your Outlines `Ollama` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt. For instance: ```python import ollama import outlines # Create the model model = outlines.from_ollama(ollama.Client(), "qwen2.5vl:3b") # Call it to generate text response = model("What's the capital of Latvia?") print(response) # 'Riga' ``` #### Vision Some Ollama models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. ```python import io import requests import PIL import ollama import outlines from outlines.inputs import Image # Create the model model = outlines.from_ollama( ollama.Client(), "qwen2.5vl:3b" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Generate text response = model(prompt) print(response) # The image shows a black puppy with a curious and attentive expression. ``` #### Chat You can also use chat inputs with the `Ollama` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import ollama import outlines from outlines.inputs import Chat, Image # Create the model model = outlines.from_ollama( ollama.Client(), "qwen2.5vl:3b" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `Anthropic` model supports streaming through the `stream` method. ```python import ollama import outlines # Create the model model = outlines.from_ollama(ollama.Client(), "qwen2.5vl:3b") # Stream text for chunk in model.stream("Write a short story about a cat"): print(chunk) # 'In...' ``` ## Asynchronous Calls Ollama supports asynchronous operations by passing an `AsyncClient` instead of a regular `Client`. This returns an `AsyncOllama` model instance that supports async/await patterns. ### Basic Async Generation ```python import asyncio import outlines import ollama async def generate_text(): # Create an async model async_client = ollama.AsyncClient() async_model = outlines.from_ollama(async_client, "qwen2.5vl:3b") result = await async_model("Write a haiku about Python.") print(result) asyncio.run(generate_text()) ``` ### Async Streaming The async model also supports streaming with async iteration: ```python import asyncio import outlines import ollama async def stream_text(): async_client = ollama.AsyncClient() async_model = outlines.from_ollama(async_client, "qwen2.5vl:3b") async for chunk in async_model.stream("Tell me a story about a robot."): print(chunk, end="") asyncio.run(stream_text()) ``` ### Concurrent Async Requests One of the main benefits of async calls is the ability to make multiple concurrent requests: ```python import asyncio import outlines import ollama async def generate_multiple(): async_client = ollama.AsyncClient() async_model = outlines.from_ollama(async_client, "qwen2.5vl:3b") # Define multiple prompts prompts = [ "Write a tagline for a coffee shop.", "Write a tagline for a bookstore.", "Write a tagline for a gym." ] tasks = [async_model(prompt) for prompt in prompts] results = await asyncio.gather(*tasks) for prompt, result in zip(prompts, results): print(f"{prompt}\n{result}\n") asyncio.run(generate_multiple()) ``` ## Structured Generation Ollama only provides support for structured generation based on a JSON schema. To use it, call the model with a JSON schema object as an `output_type` on top of your prompt. For instance: ```python from typing import List from pydantic import BaseModel import ollama import outlines class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_ollama(ollama.Client(), "tinyllama") # Call it with the output type to generate structured text result = model("Create a character", Character) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ## Inference arguments When calling the model, you can provide keyword arguments that will be passed down to the `generate` method of the Ollama client. Consult the [Ollama REST API documentation](https://github.com/ollama/ollama/blob/main/docs/api#generate-a-completion) for the full list of inference parameters. ================================================ FILE: docs/features/models/openai.md ================================================ # OpenAI !!! Installation You need to install the `openai` library to be able to use the OpenAI API in Outlines. Install all optional dependencies of the `OpenAI` model with: `pip install "outlines[openai]"`. You also need to have an OpenAI API key. This API key must either be set as an environment variable called `OPENAI_API_KEY` or be provided to the `openai.OpenAI` class when instantiating it. ## Model Initialization To create an OpenAI model instance, you can use the `from_openai` function. It takes 2 arguments: - `client`: an `openai.OpenAI`, `openai.AzureOpenAI`, `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance - `model_name`: the name of the model you want to use Based on whether the inference client instance is synchronous or asynchronous, you will receive an `OpenAI` or an `AsyncOpenAI` model instance. For instance: ```python import outlines import openai # Create the client or async client client = openai.OpenAI() async_client = openai.AsyncOpenAI() # Create a sync model model = outlines.from_openai( client, "gpt-4o" ) # Create aa async model model = outlines.from_openai( async_client, "gpt-4o" ) ``` Check the [OpenAI documentation](https://platform.openai.com/docs/models) for an up-to-date list of available models. As shown above, you can use Azure OpenAI in Outlines the same way you would use OpenAI, just provide an `openai.AzureOpenAI` instance to the Outlines model class. ## Text Generation Once you've created your Outlines `OpenAI` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt. For instance: ```python import openai import outlines # Create the model model = outlines.from_openai( openai.OpenAI(), "gpt-4o" ) # Call it to generate text response = model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' ``` #### Vision Some OpenAI models support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL import outlines import openai from outlines.inputs import Image # Create the model model = outlines.from_openai( openai.OpenAI(), "gpt-4o" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Chat You can also use chat inputs with the `OpenAI` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import openai import outlines from outlines.inputs import Chat, Image # Create the model model = outlines.from_openai( openai.OpenAI(), "gpt-4o" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `OpenAI` model supports streaming through the `stream` method. For instance: ```python import openai import outlines # Create the model model = outlines.from_openai( openai.OpenAI(), "gpt-4o" ) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk) # 'Once...' ``` ## Structured Generation OpenAI provides supports for some forms of structured output: JSON schemas and JSON syntax. To use it, call the model with an `output_type` on top of your prompt. #### JSON Schema ```python from typing import List from pydantic import BaseModel import openai import outlines class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_openai(openai.OpenAI(), "gpt-4o") # Call it with the output type to generate structured text result = model("Create a character, use the json format.", Character, top_p=0.1) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` #### JSON Syntax What we mean by JSON syntax is what is sometimes called JSON mode, meaning that the model will return a valid JSON, but you do not get to specify its structure. To use this JSON mode, provide the `dict` type as an output type. ```python import openai import outlines # Create the model model = outlines.from_openai(openai.OpenAI(), "gpt-4o") # Call it with the output type to generate structured text result = model("Create a character, use the json format.", dict, temperature=0.5) print(result) # '{"first_name": "Henri", "last_name": "Smith", "height": "170"}' ``` ## Asynchronous Calls All features presented above for the sync model are also available for the async model. For instance: ```python import asyncio import openai import outlines from pydantic import BaseModel from typing import List class Character(BaseModel): name: str age: int skills: List[str] # Create the model model = outlines.from_openai( openai.AsyncOpenAI(), "gpt-4o" ) async def text_generation(): # Regular generation response = await model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' # Streaming async for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk, end="") # 'Once...' # Structured generation result = await model("Create a character, use the json format.", Character, top_p=0.1) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] asyncio.run(text_generation()) ``` ## Inference arguments When calling the model, you can provide keyword arguments that will be passed down to the `chat.completions.create` method of the OpenAI client. Some of the most common arguments include `max_tokens`, `temperature`, `stop` and `top_p`. Another keyword argument of interest is `n`. If set with an integer value superior to 1, OpenAI will generate several sample responses and you will receive a list of strings as a response to your model call. See the [OpenAI API documentation](http://platform.openai.com/docs/api-reference/chat/create) for the full list of available arguments. ================================================ FILE: docs/features/models/openai_compatible.md ================================================ # OpenAI-Compatible APIs Many inference providers offer OpenAI-compatible APIs, allowing you to use the familiar OpenAI SDK while connecting to different backends. Outlines allows you can leverage various providers while maintaining consistent code. ## What are OpenAI-Compatible APIs? OpenAI-compatible APIs implement the same REST endpoints and request/response formats as OpenAI's API, but serve different models or run on different infrastructure. This allows you to use the `openai` Python library with any compatible provider by simply changing the `base_url`. !!! Installation You need to install the `openai` library to be able to use the OpenAI-compatible APIs in Outlines. Install all optional dependencies of the `OpenAI` model with: `pip install "outlines[openai]"`. ## General Usage Pattern The standard approach is to use the OpenAI SDK with a custom base URL: ```python import openai import outlines # Point to your OpenAI-compatible endpoint client = openai.OpenAI( base_url="https://your-provider.com/v1", # Custom endpoint api_key="your-api-key" ) # Use with Outlines model = outlines.from_openai(client, "model-name") ``` ## Important: Provider-Specific Parameters !!! Warning "API-Specific Parameters" Some providers require additional parameters in the API request for structured generation to work properly. These are typically passed as extra arguments when calling the model. For example, some providers may need special parameters in the request body to enable guided generation or specify constraints. Always consult your provider's documentation for structured generation requirements. ## Popular OpenAI-Compatible Providers Many providers offer OpenAI-compatible endpoints: - **Groq** - **Together AI** - **Anyscale** - **Fireworks AI** - **Perplexity** - **Local servers** (LocalAI, etc.) ## Configuration Examples ### Basic Setup ```python import openai import outlines # Generic OpenAI-compatible setup client = openai.OpenAI( base_url="https://api.your-provider.com/v1", api_key="your-api-key" ) model = outlines.from_openai(client, "provider-model-name") ``` ### With Authentication Headers ```python import openai import outlines # Some providers need custom headers client = openai.OpenAI( base_url="https://api.your-provider.com/v1", api_key="your-api-key", default_headers={"Custom-Header": "value"} ) model = outlines.from_openai(client, "provider-model-name") ``` ## Related Documentation For specific implementations that use OpenAI-compatible APIs: - [SGLang](sglang.md): Local inference server with OpenAI-compatible endpoints - [vLLM](vllm.md): High-performance inference with OpenAI-compatible API - [OpenAI](openai.md): The original OpenAI API implementation ================================================ FILE: docs/features/models/openrouter.md ================================================ # Openrouter !!! Installation [OpenRouter](https://openrouter.ai/docs/api-reference/overview) uses the same API as OpenAI, so both services are [interoperable](./openai_compatible.md) using the `openai` library. Install all optional dependencies of the `OpenAI` model with: `pip install "outlines[openai]"`. You also need to have an Openrouter API key. This API key must either be set as an environment variable called `OPENAI_API_KEY` or be provided to the `openai.OpenAI` class when instantiating it. ## Model Initialization To create a model instance, you can use the `from_openai` function. It takes 2 arguments: - `client`: an `openai.OpenAI` instance - `model_name`: the name of the model you want to use, defined as `provider/model` For instance: ```python import outlines import openai # Create the client client = openai.OpenAI( base_url="https://openrouter.ai/api/v1", api_key="OPENAI_API_KEY", ) # Create the model model = outlines.from_openai( client, "x-ai/grok-4" ) ``` Leaving an empty string in the model name field will lead OpenRouter to use your default model defined in [settings](https://openrouter.ai/settings/preferences). The [OpenRouter](https://openrouter.ai/models) website lists available models. Keep in mind that some models do not support `json_schema` response formats and may return a 400 error code as a result. ## Related Documentation For specific implementations that use OpenAI-compatible APIs: - [OpenAI](./openai.md): The original OpenAI API implementation - [OpenAI compatible API](./openai_compatible.md): Details on how to use OpenAI-compatible APIs ================================================ FILE: docs/features/models/sglang.md ================================================ --- title: SGLang --- # SGLang ## Prerequisites The Outlines `SGLang` model is intended to be used along with an SGLang instance running on a separate server (can be local or remote). Make sure you have a SGLang server running and accessible before using the `SGLang` model. For instance by running: ```shell pip install "sglang[all]" python -m sglang.launch_server \ --model-path NousResearch/Meta-Llama-3-8B-Instruct \ --host 0.0.0.0 \ --port 30000 ``` Follow the [Installation instructions](https://docs.sglang.ai/start/install.html) for more information on how to set up a SGLang server for your particular setup. As the SGLang client relies on the `openai` python sdk, you need to have the `openai` package installed. Install all optional dependencies of the `SGLang` model with: `pip install "outlines[sglang]"`. When launching your SGLang server, you can specify the backend engine to use for structured generation through the `grammar-backend` cli argument. Add `--grammar-backend outlines` to your command to use Outlines instead of the default engine. ## Model Initialization To load the model, you can use the `from_sglang` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Make sure the value of the `base_url` argument of the `OpenAI` client points to your running SGLang server. Consult the [SGLang documentation](https://docs.sglang.ai/backend/send_request.html) on using an OpenAI client with an SGLang server for more information. Based on whether the `openai` client instance is synchronous or asynchronous, you will receive a `SGLang` or `AsyncSGLang` model instance. For instance: ```python import openai import outlines # Create the OpenAI client sync_openai_client = openai.OpenAI(base_url="http://localhost:11434") async_openai_client = openai.AsyncOpenAI(base_url="http://localhost:11434") # Create a sync model sync_model = outlines.from_sglang(sync_openai_client) print(type(sync_model)) # # Create an async model async_model = outlines.from_sglang(async_openai_client) print(type(async_model)) # ``` ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import openai import outlines # Create the model model = outlines.from_openai(openai.OpenAI(base_url="http://localhost:11434")) # Call it to generate text response = model("What's the capital of Latvia?", max_tokens=20) print(response) # 'Riga' ``` #### Vision Some models you can run with SGLang support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL import outlines import openai from outlines.inputs import Image # Create the model model = outlines.from_openai(openai.OpenAI(base_url="http://localhost:11434")) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Chat You can also use chat inputs with the `SGLang` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import openai import outlines from outlines.inputs import Chat, Image # Create the model model = outlines.from_openai(openai.OpenAI(base_url="http://localhost:11434")) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Streaming Finally, the `SGLang` model supports streaming through the `stream` method. For instance: ```python import openai import outlines # Create the model model = outlines.from_openai(openai.OpenAI(base_url="http://localhost:11434")) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk) # 'Once...' ``` ## Structured Generation SGLang supports all output types available in Outlines (context-free grammars with caveats though, see the subsection below for more details). Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models. ### Simple Type ```python import openai import outlines output_type = int openai_client = openai.OpenAI(base_url="http://localhost:11434") model = outlines.from_sglang(openai_client) result = model("How many countries are there in the world?", output_type) print(result) # '200' ``` ### JSON Schema ```python import openai import outlines from pydantic import BaseModel class Character(BaseModel): name: str age: int skills: List[str] openai_client = openai.OpenAI(base_url="http://localhost:11434") model = outlines.from_sglang(openai_client) result = model("Create a character.", Character, frequency_penalty=1.5) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ### Multiple Choice ```python from typing import Literal import openai import outlines output_type = Literal["Paris", "London", "Rome", "Berlin"] openai_client = openai.OpenAI(base_url="http://localhost:11434") model = outlines.from_sglang(openai_client) result = model("What is the capital of France?", output_type, temperature=0) print(result) # 'Paris' ``` ### Regex ```python import openai import outlines from outlines.types import Regex output_type = Regex(r"\d{3}-\d{2}-\d{4}") openai_client = openai.OpenAI(base_url="http://localhost:11434") model = outlines.from_sglang(openai_client) result = model("Generate a fake social security number.", output_type, top_p=0.1) print(result) # '782-32-3789' ``` ### Context-Free Grammar SGLang supports grammars, but expects an EBNF format instead of the Lark format Outlines uses. Thus, to use a context-free grammar with SGLang, provide a string using the EBNF syntax to the Outlines `CFG` object. ```python import openai import outlines from outlines.types import CFG ebnf_grammar = """ root ::= answer answer ::= "yes" | "no" """ output_type = CFG(ebnf_grammar) openai_client = openai.OpenAI(base_url="http://localhost:11434") model = outlines.from_sglang(openai_client) result = model("Is the weather good today?", output_type) print(result) # 'yes' ``` ### Async Structured Generation All structured generation features work seamlessly with async models: ```python import asyncio import openai import outlines from typing import List from pydantic import BaseModel class User(BaseModel): name: str email: str age: int async def generate_user(): async_client = openai.AsyncOpenAI(base_url="http://localhost:11434") async_model = outlines.from_sglang(async_client) result = await async_model("Generate a random user profile.", output_type=User) user = User.model_validate_json(result) print(f"Name: {user.name}, Email: {user.email}, Age: {user.age}") asyncio.run(generate_user()) ``` ## Inference Arguments When calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `chat.completions.create` method of the OpenAI client. An optional parameter of particular interest is `extra_body`, which is a dictionary containing arguments that are specific to SGLang and are not part of the standard `openai` interface. See the [SGLang documentation](https://docs.sglang.ai/backend/openai_api_completions.html) on parameters for the OpenAI-compatible server for more information on inference parameters. ================================================ FILE: docs/features/models/tgi.md ================================================ --- title: TGI --- # TGI ## Prerequisites The Outlines `TGI` model is intended to be used along with a HuggingFace `Text Generation Inference` server (running locally or remotely). Make sure you have a TGI server running before using the `TGI` model. For instance running: ```shell docker run \ --gpus all \ --shm-size 1g \ -p 8080:80 \ ghcr.io/huggingface/text-generation-inference:3.3.4 \ --model-id NousResearch/Meta-Llama-3-8B-Instruct ``` Please consult the [installation guide](https://huggingface.co/docs/text-generation-inference/en/quicktour) for more information about how to run TGI with your particular setup. As the TGI client relies on the `huggingface_hub` python package, you need to have it installed. Install all optional dependencoes of the `TGI` model with: `pip install "outlines[tgi]"` ## Model Initialization To load the model, you can use the `from_tgi` function. The argument of the function is either an `InferenceClient` or `AsyncInferenceClient` instance from the `huggingface_hub` library. Consult the [HuggingFace documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client) for more information on their inference client. Based on whether the inference client instance is synchronous or asynchronous, you will receive a `TGI` or an `AsyncTGI` model instance. For instance: ```python import outlines import huggingface_hub # Create the inference client client = huggingface_hub.InferenceClient("http://localhost:11434") async_client = huggingface_hub.AsyncInferenceClient("http://localhost:11434") # Create a sync model sync_model = outlines.from_tgi(client) print(type(sync_model)) # # Create an async model async_model = outlines.from_tgi(async_client) print(type(async_model)) # ``` ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import outlines import huggingface_hub # Create the model client = huggingface_hub.InferenceClient("http://localhost:11434") model = outlines.from_tgi(client) # Call it to generate text result = model("Write a short story about a cat.", stop_sequences=["."]) print(result) # 'In a quiet village where the cobblestones hummed softly beneath the morning mist...' ``` The `TGI` model supports streaming. For instance: ```python import outlines import huggingface_hub # Create the model client = huggingface_hub.InferenceClient("http://localhost:11434") model = outlines.from_tgi(client) # Stream text for chunk in model.stream("Write a short story about a cat.", stop_sequences=["."]): print(chunk) # 'In ...' ``` ## Asynchronous Calls TGI supports asynchronous operations by passing an `AsyncInferenceClient` instead of a regular `InferenceClient`. This returns an `AsyncTGI` model instance that supports async/await patterns. ### Basic Async Generation ```python import asyncio import outlines import huggingface_hub async def generate_text(): # Create an async model async_client = huggingface_hub.AsyncInferenceClient("http://localhost:11434") async_model = outlines.from_tgi(async_client) result = await async_model("Write a haiku about Python.", max_new_tokens=50) print(result) asyncio.run(generate_text()) ``` ### Async Streaming The async model also supports streaming with async iteration: ```python import asyncio import outlines import huggingface_hub async def stream_text(): async_client = huggingface_hub.AsyncInferenceClient("http://localhost:11434") async_model = outlines.from_tgi(async_client) async for chunk in async_model.stream("Tell me a story about a robot.", max_new_tokens=100): print(chunk, end="") asyncio.run(stream_text()) ``` ### Concurrent Async Requests One of the main benefits of async calls is the ability to make multiple concurrent requests: ```python import asyncio import outlines import huggingface_hub async def generate_multiple(): async_client = huggingface_hub.AsyncInferenceClient("http://localhost:11434") async_model = outlines.from_tgi(async_client) # Define multiple prompts prompts = [ "Write a tagline for a coffee shop.", "Write a tagline for a bookstore.", "Write a tagline for a gym." ] tasks = [async_model(prompt, max_new_tokens=30) for prompt in prompts] results = await asyncio.gather(*tasks) for prompt, result in zip(prompts, results): print(f"{prompt}\n{result}\n") asyncio.run(generate_multiple()) ``` ## Structured Generation TGI supports all output types available in Outlines except for context-free grammars. Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models. ### Simple Type ```python import outlines import huggingface_hub output_type = int tgi_client = huggingface_hub.InferenceClient("http://localhost:8080") model = outlines.from_tgi(tgi_client) result = model("How many countries are there in the world?", output_type) print(result) # '200' ```### JSON Schema ```python import outlines import huggingface_hub from typing import List from pydantic import BaseModel class Character(BaseModel): name: str age: int skills: List[str] tgi_client = huggingface_hub.InferenceClient("http://localhost:8080") model = outlines.from_tgi(tgi_client) result = model("Create a character.", output_type=Character, frequency_penalty=1.5) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ```### Multiple Choice ```python import outlines import huggingface_hub from typing import Literal output_type = Literal["Paris", "London", "Rome", "Berlin"] tgi_client = huggingface_hub.InferenceClient("http://localhost:8080") model = outlines.from_tgi(tgi_client) result = model("What is the capital of France?", output_type, temperature=0) print(result) # 'Paris' ```### Regex ```python import outlines import huggingface_hub from outlines.types import Regex output_type = Regex(r"\d{3}-\d{2}-\d{4}") tgi_client = huggingface_hub.InferenceClient("http://localhost:8080") model = outlines.from_tgi(tgi_client) result = model("Generate a fake social security number.", output_type, top_p=0.1) print(result) # '782-32-3789' ``` ### Async Structured Generation All structured generation features work seamlessly with async models: ```python import asyncio import outlines import huggingface_hub from pydantic import BaseModel class User(BaseModel): name: str email: str age: int async def generate_user(): async_client = huggingface_hub.AsyncInferenceClient("http://localhost:11434") async_model = outlines.from_tgi(async_client) result = await async_model("Generate a random user profile.", output_type=User) user = User.model_validate_json(result) print(f"Name: {user.name}, Email: {user.email}, Age: {user.age}") asyncio.run(generate_user()) ``` ## Inference parameters When calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `text_generation` method of the TGI client. Common parameters include `max_new_tokens`, `stop_sequences`, `temperature`, `top_k`, `top_p`, and others as specified in the [TGI inference client documentation](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient). ================================================ FILE: docs/features/models/transformers.md ================================================ --- title: Transformers --- # Transformers !!! Installation You need to install the `transformers` library to be able to use the Transformers in Outlines. Install all optional dependencies of the `Transformers` model with: `pip install "outlines[transformers]"`. See the [HuggingFace documentation](https://huggingface.co/docs/transformers/en/installation) for more information on installing `transformers` with CPU, GPU... ## Model Initialization To load the model, you can use the `from_transformers` function. It takes 3 arguments: - `model`: a `transformers` model (created with `AutoModelForCausalLM` for instance) - `tokenizer_or_processor`: a `transformers` tokenizer (created with `AutoTokenizer` for instance, it must be an instance of either `PreTrainedTokenizer` or `PreTrainedTokenizerFast`) - `device_dtype` (optional): the tensor dtype to use for inference. If not provided, the model will use the default dtype. For instance: ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Create the transformers model and tokenizer hf_model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct") hf_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") # Create the Outlines model model = outlines.from_transformers(hf_model, hf_tokenizer) ``` If you provide a processor instead of a tokenizer for the second argument of the `from_transformers` function, you would get a `TransformersMultiModal` instance. See the [TransformersMultiModal model documentation](./transformers_multimodal.md) for more information on using multimodal models in Outlines. ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Create model model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Call it to generate text result = model("What's the capital of Latvia?", max_new_tokens=20) print(result) # 'Riga' ``` #### Chat You can also use chat inputs with the `Transformers` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import outlines from outlines.inputs import Chat from transformers import AutoModelForCausalLM, AutoTokenizer # Create the model model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What's the capital of Latvia?"}, ]) # Call the model to generate a response response = model(prompt, max_new_tokens=50) print(response) # 'This is a picture of a black dog.' ``` #### Batching Finally, the `Transformers` model supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions. For instance: ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Create model model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Create a list of prompts that will be used in a single batch prompts = [ "What's the capital of Lithuania?", "What's the capital of Latvia?", "What's the capital of Estonia?" ] # Call it to generate text result = model.batch(prompts, max_new_tokens=20) print(result) # ['Vilnius', 'Riga', 'Tallinn'] ``` ## Structured Generation As a local model, `Transformers` supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model. ### Simple Type ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer output_type = int model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("How many countries are there in the world?", output_type, max_new_tokens=5) print(result) # '200' ``` ### JSON Schema ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer from pydantic import BaseModel from typing import List class Character(BaseModel): name: str age: int skills: List[str] model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("Create a character.", output_type=Character, max_new_tokens=200, repetition_penalty=0.5) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ### Multiple Choice ```python from typing import Literal import outlines from transformers import AutoModelForCausalLM, AutoTokenizer output_type = Literal["Paris", "London", "Rome", "Berlin"] model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("What is the capital of France?", output_type, max_new_tokens=10, temperature=0) print(result) # 'Paris' ``` ### Regex ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer from outlines.types import Regex output_type = Regex(r"\d{3}-\d{2}-\d{4}") model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("Generate a fake social security number.", output_type, max_new_tokens=20, top_p=0.5) print(result) # '782-32-3789' ``` ### Context-Free Grammar ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer from outlines.types import CFG arithmetic_grammar = """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ output_type = CFG(arithmetic_grammar) model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("Write an addition.", output_type, max_new_tokens=100) print(result) # '23 + 48' ``` ## Inference Arguments When calling the model, you can provide optional inference parameters on top of the prompt and the output type. These parameters will be passed on to the `generate` method of the `transformers` model. Some common inference arguments include `max_new_tokens`, `temperature`, `repetition_penalty` and `top_p`. See the [transformers documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation) for more information on inference parameters. !!! Warning The `max_new_tokens` inference parameter has a default value of 20. This is insufficient for most tasks and will result in the generation output not respecting the output type (because the response is truncated). We recommend you always provide a value for this argument. ================================================ FILE: docs/features/models/transformers_multimodal.md ================================================ --- title: Transformers MultiModal --- # Transformers MultiModal The Outlines `TransformersMultiModal` model inherits from `Transformers` and shares most of its interface. Please start by reading the [Transformers documentation](./transformers.md) as this document only focuses on the specificities of `TransformersMultiModal` compared to `Transformers`. ## Model Initialization To load the model, you can use the `from_transformers` function. It takes 2 arguments: - `model`: a `transformers` model (created with `AutoModelForImageTextToText` for instance) - `tokenizer_or_processor`: a `transformers` processor (created with `AutoProcessor` for instance, it must be an instance of `ProcessorMixin`) - `device_dtype` (optional): the tensor dtype to use for inference. If not provided, the model will use the default dtype. For instance: ```python import outlines from transformers import AutoModelForImageTextToText, AutoProcessor # Create the transformers model and processor hf_model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct") hf_processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct") # Create the Outlines model model = outlines.from_transformers(hf_model, hf_processor) ``` ## Model Input As with other multimodal models, you should provide a list containing a text prompt and assets (`Image`, `Audio` or `Video` instances) as the model input. The type of asset to provide depends on the capabilities of the `transformers` model you are running. Here's an example of using a vision multimodal model: ```python from io import BytesIO from urllib.request import urlopen from PIL import Image as PILImage from pydantic import BaseModel from transformers import ( LlavaForConditionalGeneration, AutoProcessor, ) import outlines from outlines.inputs import Image TEST_MODEL = "trl-internal-testing/tiny-LlavaForConditionalGeneration" IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg" class Animal(BaseModel): specie: str color: str weight: int def get_image_from_url(image_url): img_byte_stream = BytesIO(urlopen(image_url).read()) image = PILImage.open(img_byte_stream).convert("RGB") image.format = "PNG" return image # Create a model model = outlines.from_transformers( LlavaForConditionalGeneration.from_pretrained(TEST_MODEL), AutoProcessor.from_pretrained(TEST_MODEL), ) # Call it with a model input dict containing a text prompt and an image + an output type result = model( ["Describe this animal.", Image(get_image_from_url(IMAGE_URL))], Animal, max_new_tokens=100 ) print(result) # '{"specie": "cat", "color": "white", "weight": 4}' print(Animal.model_validate_json(result)) # specie=cat, color=white, weight=4 ``` !!! Warning Make sure your prompt contains the tags expected by your processor to correctly inject the assets in the prompt. For some vision multimodal models for instance, you need to add as many `` tags in your prompt as there are image assets included in your model input. `Chat` method, instead, does not require this step. ### Chat The `Chat` interface offers a more convenient way to work with multimodal inputs. You don't need to manually add asset tags like ``. The model's HF processor handles the chat templating and asset placement for you automatically. To do so, call the model with a `Chat` instance using a multimodal chat format. Assets must be pre-processed as `outlines.inputs.{Image, Audio, Video}` format, and only `image`, `video`, and `audio` types are supported. For instance: ```python import outlines from outlines.inputs import Chat, Image from transformers import AutoModelForImageTextToText, AutoProcessor from PIL import Image as PILImage from io import BytesIO from urllib.request import urlopen import torch model_kwargs = { "torch_dtype": torch.bfloat16, "attn_implementation": "flash_attention_2", "device_map": "auto", } def get_image_from_url(image_url): img_byte_stream = BytesIO(urlopen(image_url).read()) image = PILImage.open(img_byte_stream).convert("RGB") image.format = "PNG" return image # Create the model model = outlines.from_transformers( AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", **model_kwargs), AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", **model_kwargs) ) IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg" # Create the chat mutimodal input prompt = Chat([ { "role": "user", "content": [ {"type": "image", "image": Image(get_image_from_url(IMAGE_URL))}, {"type": "text", "text": "Describe the image in few words."} ], } ]) # Call the model to generate a response response = model(prompt, max_new_tokens=50) print(response) # 'A Siamese cat with blue eyes is sitting on a cat tree, looking alert and curious.' ``` Or using a list containing text and assets: ```python import outlines from outlines.inputs import Chat, Image from transformers import AutoModelForImageTextToText, AutoProcessor from PIL import Image as PILImage from io import BytesIO import requests import torch TEST_MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" # Function to get an image def get_image(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } r = requests.get(url, headers=headers) image = PILImage.open(BytesIO(r.content)).convert("RGB") image.format = "PNG" return image model_kwargs = { "torch_dtype": torch.bfloat16, # "attn_implementation": "flash_attention_2", "device_map": "auto", } # Create a model model = outlines.from_transformers( AutoModelForImageTextToText.from_pretrained(TEST_MODEL, **model_kwargs), AutoProcessor.from_pretrained(TEST_MODEL, **model_kwargs), ) # Create the chat input prompt = Chat([ {"role": "user", "content": "You are a helpful assistant that helps me described pictures."}, {"role": "assistant", "content": "I'd be happy to help you describe pictures! Please go ahead and share an image"}, { "role": "user", "content": ["Describe briefly the image", Image(get_image("https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg"))] }, ]) # Call the model to generate a response response = model(prompt, max_new_tokens=50) print(response) # 'The image shows a light-colored cat with a white chest...' ``` ### Batching The `TransformersMultiModal` model supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions. An example using the Chat format: ```python import outlines from outlines.inputs import Chat, Image from transformers import AutoModelForImageTextToText, AutoProcessor from PIL import Image as PILImage from io import BytesIO from urllib.request import urlopen import torch from pydantic import BaseModel model_kwargs = { "torch_dtype": torch.bfloat16, "attn_implementation": "flash_attention_2", "device_map": "auto", } class Animal(BaseModel): animal: str color: str def get_image_from_url(image_url): img_byte_stream = BytesIO(urlopen(image_url).read()) image = PILImage.open(img_byte_stream).convert("RGB") image.format = "PNG" return image # Create the model model = outlines.from_transformers( AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", **model_kwargs), AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", **model_kwargs) ) IMAGE_URL_1 = "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg" IMAGE_URL_2 = "https://upload.wikimedia.org/wikipedia/commons/a/af/Golden_retriever_eating_pigs_foot.jpg" # Create the chat mutimodal messages messages = [ { "role": "user", "content": [ {"type": "text", "text": "Describe the image in few words."}, {"type": "image", "image": Image(get_image_from_url(IMAGE_URL_1))}, ], }, ] messages_2 = [ { "role": "user", "content": [ {"type": "text", "text": "Describe the image in few words."}, {"type": "image", "image": Image(get_image_from_url(IMAGE_URL_2))}, ], }, ] prompts = [Chat(messages), Chat(messages_2)] # Call the model to generate a response responses = model.batch(prompts, output_type=Animal, max_new_tokens=100) print(responses) # ['{ "animal": "cat", "color": "white and gray" }', '{ "animal": "dog", "color": "white" }'] print([Animal.model_validate_json(i) for i in responses]) # [Animal(animal='cat', color='white and gray'), Animal(animal='dog', color='white')] ``` An example using a list of lists with tag assets: ```python from io import BytesIO from urllib.request import urlopen from PIL import Image as PILImage from transformers import ( LlavaForConditionalGeneration, AutoProcessor, ) import outlines from outlines.inputs import Image TEST_MODEL = "trl-internal-testing/tiny-LlavaForConditionalGeneration" IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg" IMAGE_URL_2 ="https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg" def get_image_from_url(image_url): img_byte_stream = BytesIO(urlopen(image_url).read()) image = PILImage.open(img_byte_stream).convert("RGB") image.format = "PNG" return image # Create a model model = outlines.from_transformers( LlavaForConditionalGeneration.from_pretrained(TEST_MODEL), AutoProcessor.from_pretrained(TEST_MODEL), ) # Call the batch method with a list of model input dicts result = model.batch( [ ["Describe the image.", Image(get_image_from_url(IMAGE_URL))], ["Describe the image.", Image(get_image_from_url(IMAGE_URL_2))], ] ) print(result) # ['The image shows a cat', 'The image shows an astronaut'] ``` ================================================ FILE: docs/features/models/vllm.md ================================================ --- title: vLLM --- # vLLM ## Prerequisites The Outlines `VLLM` model is intended to be used along with a vLLM instance running on a separate server (can be local or remote). Make sure you have a vLLM server running and accessible before using the `VLLM` model. For instance by running: ```shell pip install vllm vllm serve microsoft/Phi-3-mini-4k-instruct \ --dtype auto \ --api-key token-abc123 ``` Follow the [Installation instructions](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) for more information on how to set up a vLLM server for your particular setup. As the vLLM client relies on the `openai` python sdk, you need to have the `openai` package installed. Install all optional dependencies for the `VLLM` model with: `pip install openai`. If you want to use the vllm offline inference mode instead of the server mode, please refer to the [VLLMOffline](./vllm_offline.md) model documentation. ## Model Initialization To load the model, you can use the `from_vllm` function. The argument of the function is either an `OpenAI` or `AsyncOpenAI` instance from the `openai` library. Make sure the value of the `base_url` argument of the `OpenAI` client points to your running vLLM server. Consult the [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html) on using an OpenAI client with a vLLM server for more information. Based on whether the `openai` client instance is synchronous or asynchronous, you will receive a `VLLM` or `AsyncVLLM` model instance. For instance: ```python import openai import outlines # Create the OpenAI client sync_openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") async_openai_client = openai.AsyncOpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") # Create a sync model sync_model = outlines.from_vllm(sync_openai_client, "microsoft/Phi-3-mini-4k-instruct") print(type(sync_model)) # # Create an async model async_model = outlines.from_vllm(async_openai_client, "microsoft/Phi-3-mini-4k-instruct") print(type(async_model)) # ``` ## Text Generation To generate text, you can simply call the model with a prompt. For instance: ```python import openai import outlines # Create the model model = outlines.from_vllm(openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123"), "microsoft/Phi-3-mini-4k-instruct") # Call it to generate text response = model("What's the capital of Latvia?", max_tokens=20) print(response) # 'The capital of Latvia is Riga.' ``` #### Vision Some models you can run with VLLM support vision input. To use this feature, provide a list containing a text prompt and `Image` instances. For instance: ```python import io import requests import PIL import outlines import openai from outlines.inputs import Image # Create the model model = outlines.from_vllm( openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123"), "Qwen/Qwen2.5-VL-3B-Instruct" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the prompt containing the text and the image prompt = [ "Describe the image", Image(get_image("https://picsum.photos/id/237/400/300")) ] # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'The image shows a black puppy lying on a wooden surface...' ``` #### Chat You can also use chat inputs with the `VLLM` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import io import requests import PIL import openai import outlines from outlines.inputs import Chat, Image # Create the model model = outlines.from_vllm( openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123"), "Qwen/Qwen2.5-VL-3B-Instruct" ) # Function to get an image def get_image(url): r = requests.get(url) return PIL.Image.open(io.BytesIO(r.content)) # Create the chat input prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["Describe the image", Image(get_image("https://picsum.photos/id/237/400/300"))] }, ]) # Call the model to generate a response response = model(prompt, max_tokens=50) print(response) # 'The image shows a black puppy lying on a wooden surface...' ``` #### Streaming Finally, the `VLLM` model supports streaming through the `stream` method. For instance: ```python import openai import outlines # Create the model model = outlines.from_vllm( openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123"), "microsoft/Phi-3-mini-4k-instruct" ) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", max_tokens=50): print(chunk, end="") # 'Once upon a time...' print() ``` ## Asynchronous Calls vLLM supports asynchronous operations by passing an `AsyncOpenAI` client instead of a regular `OpenAI` client. This returns an `AsyncVLLM` model instance that supports async/await patterns. ### Basic Async Generation ```python import asyncio import openai import outlines async def generate_text(): async_client = openai.AsyncOpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") async_model = outlines.from_vllm(async_client, "microsoft/Phi-3-mini-4k-instruct") result = await async_model("Write a haiku about Python.", max_tokens=50) print(result) asyncio.run(generate_text()) ``` ### Async Streaming The async model also supports streaming with async iteration: ```python import asyncio import openai import outlines async def stream_text(): async_client = openai.AsyncOpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") async_model = outlines.from_vllm(async_client, "microsoft/Phi-3-mini-4k-instruct") async for chunk in async_model.stream("Tell me a story about a robot.", max_tokens=100): print(chunk, end="") asyncio.run(stream_text()) ``` ### Concurrent Async Requests One of the main benefits of async calls is the ability to make multiple concurrent requests: ```python import asyncio import openai import outlines async def generate_multiple(): async_client = openai.AsyncOpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") async_model = outlines.from_vllm(async_client, "microsoft/Phi-3-mini-4k-instruct") prompts = [ "Write a tagline for a coffee shop.", "Write a tagline for a bookstore.", "Write a tagline for a gym." ] tasks = [async_model(prompt, max_tokens=30) for prompt in prompts] results = await asyncio.gather(*tasks) for prompt, result in zip(prompts, results): print(f"{prompt}\n{result}\n") asyncio.run(generate_multiple()) ``` ## Structured Generation vLLM supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model. All structured generation features work with both synchronous and asynchronous models. ### Simple Type ```python import openai import outlines output_type = int openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") result = model("How many countries are there in the world?", output_type) print(result) # '200' ``` ### JSON Schema ```python import openai import outlines from typing import List from pydantic import BaseModel class Character(BaseModel): name: str age: int skills: List[str] openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") result = model("Create a character.", output_type=Character, frequency_penalty=1.5) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ### Multiple Choice ```python from typing import Literal import openai import outlines output_type = Literal["Paris", "London", "Rome", "Berlin"] openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") result = model("What is the capital of France?", output_type, temperature=0) print(result) # 'Paris' ``` ### Regex ```python import openai import outlines from outlines.types import Regex output_type = Regex(r"\d{3}-\d{2}-\d{4}") openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") result = model("Generate a fake social security number.", output_type, top_p=0.1) print(result) # '782-32-3789' ``` ### Context-Free Grammar ```python import openai import outlines from outlines.types import CFG arithmetic_grammar = """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ output_type = CFG(arithmetic_grammar) openai_client = openai.OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") result = model("Write an addition.", output_type, extra_body={"guided_decoding_backend": "outlines"}) print(result) # '23 + 48' ``` ### Async Structured Generation All structured generation features work seamlessly with async models: ```python import asyncio import openai import outlines from pydantic import BaseModel class User(BaseModel): name: str email: str age: int async def generate_user(): async_client = openai.AsyncOpenAI(base_url="http://0.0.0.0:8000/v1", api_key="token-abc123") async_model = outlines.from_vllm(async_client, "microsoft/Phi-3-mini-4k-instruct") result = await async_model("Generate a random user profile.", output_type=User) user = User.model_validate_json(result) print(f"Name: {user.name}, Email: {user.email}, Age: {user.age}") asyncio.run(generate_user()) ``` ## Inference Arguments When calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `chat.completions.create` method of the OpenAI client. An optional parameter of particular interest is `extra_body`, which is a dictionary containing arguments that are specific to vLLM and are not part of the standard `openai` interface. Among those, `guided_decoding_backend` allows you to select the library used by the vLLM server to control structured generation. You can use the value `outlines` to generated structured text with Outlines. See the [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters) on extra parameters for the OpenAI-compatible server for more information on inference parameters. ================================================ FILE: docs/features/models/vllm_offline.md ================================================ --- title: vLLM Offline --- # vLLM Offline Outlines provides an integration with [vLLM](https://docs.vllm.ai/en/latest/) using the [vllm library](https://github.com/vllm-project/vllm). This model allows you to use vLLM in the "Offline Inference" mode, meaning that text generation happens within the model, there is no separate server. If you want to use vLLM with a server, see the [VLLM model documentation](./vllm.md). !!! Note "Installation" You need to install the `vllm` library to be able to use the `VLLMOffline` model: `pip install vllm`. Due to a library version conflict between outlines and vllm, you MUST install `vllm` before installing `outlines`. When installing `outlines` (after having first installed `vllm`), you may encounter the following error: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed`. You can safely ignore it. See the [vLLM documentation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) for instructions on how to install vLLM for CPU, ROCm... ## Model Initialization To load the model, you can use the `from_vllm_offline` function. The single argument of the function is a `LLM` model instance from the `vllm` library. You will then receive a `VLLMOffline` model instance you can use to generate text. Consult the [LLM class API reference](https://docs.vllm.ai/en/latest/api/vllm/index.html#vllm.LLM) for detailed information on how to create an `LLM` instance and on the various available parameters. For instance: ```python import outlines from vllm import LLM # Create the model model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) ``` !!! Note When initializing the `vllm.LLM` object, you can specify a `guided_decoding_backend` to choose what library will be used by vLLM to constrain the generation. Consult the [vLLM documentation](https://docs.vllm.ai/en/v0.8.2/features/structured_outputs.html) on structured output for the list of possible values. ## Text Generation Once you've created your Outlines `VLLMOffline` model instance, you're all set to generate text with this provider. You can simply call the model with a prompt. For instance: ```python import outlines from vllm import LLM, SamplingParams # Create the model model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) # Call it to generate text response = model("What's the capital of Latvia?", sampling_params=SamplingParams(max_tokens=20)) print(response) # 'Riga' ``` #### Chat You can also use chat inputs with the `VLLMOffline` model. To do so, call the model with a `Chat` instance. The content of messsage within the chat can be vision inputs as described above. For instance: ```python import outlines from vllm import LLM, SamplingParams from outlines.inputs import Chat # Create the model model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) # Create the chat prompt prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What's the capital of Latvia?"}, ]) # Call the model to generate a response response = model(prompt, sampling_params=SamplingParams(max_tokens=50)) print(response) # 'Riga' ``` #### Streaming The `VLLMOffline` model supports streaming through the `stream` method. For instance: ```python import outlines from vllm import LLM, SamplingParams # Create the model model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) # Stream the response for chunk in model.stream("Tell me a short story about a cat.", sampling_params=SamplingParams(max_tokens=50)): print(chunk) # 'Once...' ``` #### Batching Finally, the `VLLMOffline` model also supports batching through the `batch` method. To use it, provide a list of prompts (using the formats described above) to the `batch` method. You will receive as a result a list of completions. For instance: ```python import outlines from vllm import LLM # Create the model model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) # Create a list of prompts that will be used in a single batch prompts = [ "What's the capital of Lithuania?", "What's the capital of Latvia?", "What's the capital of Estonia?" ] # Call it to generate text result = model.batch(prompts, max_new_tokens=20) print(result) # ['Vilnius', 'Riga', 'Tallinn'] ``` ## Structured Generation The `VLLMOffline` model supports all output types available in Outlines. Simply provide an `output_type` after the prompt when calling the model. ### Simple Type ```python import outlines from vllm import LLM output_type = int model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) result = model("How many countries are there in the world?", output_type) print(result) # '200' ``` ### JSON Schema ```python import outlines from vllm import LLM, SamplingParams from typing import List from pydantic import BaseModel class Character(BaseModel): name: str age: int skills: List[str] model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) result = model("Create a character.", output_type=Character, sampling_params=SamplingParams(frequency_penalty=1.5, max_tokens=200)) print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}' print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy'] ``` ### Multiple Choice ```python from typing import Literal import outlines from vllm import LLM, SamplingParams output_type = Literal["Paris", "London", "Rome", "Berlin"] model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) result = model("What is the capital of France?", output_type, sampling_params=SamplingParams(temperature=0)) print(result) # 'Paris' ``` ### Regex ```python import outlines from vllm import LLM, SamplingParams from outlines.types import Regex output_type = Regex(r"\d{3}-\d{2}-\d{4}") model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) result = model("Generate a fake social security number.", output_type, sampling_params=SamplingParams(top_p=0.1)) print(result) # '782-32-3789' ``` ### Context-Free Grammar ```python import outlines from vllm import LLM, SamplingParams from outlines.types import CFG arithmetic_grammar = """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ output_type = CFG(arithmetic_grammar) model = outlines.from_vllm_offline( LLM("microsoft/Phi-3-mini-4k-instruct") ) result = model("Write an addition.", output_type) print(result) # '23 + 48' ``` ## Inference Arguments When calling the model, you can provide optional parameters on top of the prompt and the output type. Those will be passed on to the `generate` method of the `LLM` model instance. An argument of particular interest is `sampling_params`. It takes as a value a `vllm.SamplingParams` instance containing parameters such as max_tokens or temperature. See the [vLLM documentation](https://docs.vllm.ai/en/latest/api/vllm/sampling_params.html#vllm.sampling_params.SamplingParams) on sampling parameters for more information on inference parameters. ================================================ FILE: docs/features/utility/application.md ================================================ --- title: Application --- # Application The `Application` class enables you to encapsulate a prompt template and an output type into a reusable component. ## Overview An `Application` combines a prompt template with an output type, creating a reusable component that can be applied to different models. Applications are useful for simplifying repeated tasks where you have a well-defined `Template` and a fixed output type, such as classification tasks or data extraction. To create an `Application` instance, initialize the class with a prompt template and an output type. You can then call the application with a model and the variables defined in your template in a dictionary. For instance: ```python from typing import Literal import transformers from outlines import Application, Template, from_transformers # Create a template template_str = "Is {{ name }} a boy or a girl name?" template = Template.from_string(template_str) # Create a model model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Create the application and call it to generate text application = Application(template, Literal["boy", "girl"]) response = application(model, {"name": "Alice"}, max_new_tokens=10) print(response) # "girl" ``` Instead of providing an Outlines `Template` instance, you can provide a `Callable` that returns a string. The parameters of the callable are used as the variables of the template such that you must provide values for them in the dictionary when calling the application. For instance, we can create the same example as above using a a function instead of a template: ```python from typing import Literal import transformers from outlines import Application, from_transformers # Create a function that will be used as a template def template_func(name: str) -> str: return f"Is {name} a boy or a girl name?" # Create a model model = from_transformers( transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) # Create the application with the function template and call it to generate text application = Application(template_func, Literal["boy", "girl"]) response = application(model, {"name": "Alice"}, max_new_tokens=10) print(response) # "girl" ``` ================================================ FILE: docs/features/utility/regex_dsl.md ================================================ --- title: Regex DSL --- # Regex DSL This library provides a Domain-Specific Language (DSL) to construct regular expressions in a more intuitive and modular way. It allows you to create complex regexes using simple building blocks that represent literal strings, patterns, and various quantifiers. Additionally, these custom regex types can be used directly as types in [Pydantic](https://pydantic-docs.helpmanual.io/) schemas to enforce pattern constraints during text generation. --- ## Why Use This DSL? 1. **Modularity & Readability**: Instead of writing cryptic regular expression strings, you compose a regex as a tree of objects. 2. **Enhanced Debugging**: Each expression can be visualized as an ASCII tree, making it easier to understand and debug complex regexes. 3. **Pydantic Integration**: Use your DSL-defined regex as types in Pydantic models. The DSL seamlessly converts to JSON Schema with proper pattern constraints. 4. **Extensibility**: Easily add or modify quantifiers and other regex components by extending the provided classes. --- ## Building Blocks Every regex component in this DSL is a **Term**. Here are two primary types: - **`String`**: Represents a literal string. It escapes the characters that have a special meaning in regular expressions. - **`Regex`**: Represents an existing regex pattern string. ```python from outlines.types import String, Regex # A literal string "hello" literal = String("hello") # Internally represents "hello" # A regex pattern to match one or more digits digit = Regex(r"[0-9]+") # Internally represents the pattern [0-9]+ # Converting to standard regex strings: from outlines.types.dsl import to_regex print(to_regex(literal)) # Output: hello print(to_regex(digit)) # Output: [0-9]+ ``` --- ## Early Introduction to Quantifiers & Combining Terms The DSL supports common regex quantifiers as methods on every `Term`. These methods allow you to specify how many times a pattern should be matched. They include: - **`exactly(count)`**: Matches the term exactly `count` times. - **`optional()`**: Matches the term zero or one time. - **`one_or_more()`**: Matches the term one or more times (Kleene Plus). - **`zero_or_more()`**: Matches the term zero or more times (Kleene Star). - **`between(min_count, max_count)`**: Matches the term between `min_count` and `max_count` times (inclusive). - **`at_least(count)`**: Matches the term at least `count` times. - **`at_most(count)`**: Matches the term up to `count` times. These quantifiers can also be used as functions that take the `Term` as an argument. If the term is a plain string, it will be automatically converted to a `String` object. Thus `String("foo").optional()` is equivalent to `optional("foo")`. Let's see these quantifiers side by side with examples. ### Quantifiers in Action #### `exactly(count)` This method restricts the term to appear exactly `count` times. ```python # Example: exactly 5 digits five_digits = Regex(r"\d").exactly(5) print(to_regex(five_digits)) # Output: (\d){5} ``` You can also use the `exactly` function: ```python from outlines.types import exactly # Example: exactly 5 digits five_digits = exactly(Regex(r"\d"), 5) print(to_regex(five_digits)) # Output: (\d){5} ``` #### `optional()` This method makes a term optional, meaning it may occur zero or one time. ```python # Example: an optional "s" at the end of a word maybe_s = String("s").optional() print(to_regex(maybe_s)) # Output: (s)? ``` You can also use the `optional` function: ```python from outlines.types import optional # Example: an optional "s" at the end of a word maybe_s = optional("s") print(to_regex(maybe_s)) # Output: (s)? ``` #### `one_or_more()` This method indicates that the term must appear at least once. ```python # Example: one or more alphabetic characters letters = Regex(r"[A-Za-z]").one_or_more() print(to_regex(letters)) # Output: ([A-Za-z])+ ``` You can also use the `one_or_more` function: ```python from outlines.types import one_or_more # Example: one or more alphabetic characters letters = one_or_more(Regex(r"[A-Za-z]")) print(to_regex(letters)) # Output: ([A-Za-z])+ ``` #### `zero_or_more()` This method indicates that the term can occur zero or more times. ```python # Example: zero or more spaces spaces = String(" ").zero_or_more() print(to_regex(spaces)) # Output: ( )* ``` You can also use the `zero_or_more` function: ```python from outlines.types import zero_or_more # Example: zero or more spaces spaces = zero_or_more(" ") print(to_regex(spaces)) # Output: ( )* ``` #### `between(min_count, max_count)` This method indicates that the term can appear any number of times between `min_count` and `max_count` (inclusive). ```python # Example: Between 2 and 4 word characters word_chars = Regex(r"\w").between(2, 4) print(to_regex(word_chars)) # Output: (\w){2,4} ``` You can also use the `between` function: ```python from outlines.types import between # Example: Between 2 and 4 word characters word_chars = between(Regex(r"\w"), 2, 4) print(to_regex(word_chars)) # Output: (\w){2,4} ``` #### `at_least(count)` This method indicates that the term must appear at least `count` times. ```python # Example: At least 3 digits at_least_three = Regex(r"\d").at_least(3) print(to_regex(at_least_three)) # Output: (\d){3,} ``` You can also use the `at_least` function: ```python from outlines.types import at_least # Example: At least 3 digits at_least_three = at_least(Regex(r"\d"), 3) print(to_regex(at_least_three)) # Output: (\d){3,} ``` #### `at_most(count)` This method indicates that the term can appear at most `count` times. ```python # Example: At most 3 digits up_to_three = Regex(r"\d").at_most(3) print(to_regex(up_to_three)) # Output: (\d){0,3} ``` You can also use the `at_most` function: ```python from outlines.types import at_most # Example: At most 3 digits up_to_three = at_most(Regex(r"\d"), 3) print(to_regex(up_to_three)) # Output: (\d){0,3} ``` --- ## Combining Terms The DSL allows you to combine basic terms into more complex patterns using concatenation and alternation. ### Concatenation (`+`) The `+` operator (and its reflected variant) concatenates terms, meaning that the terms are matched in sequence. ```python # Example: Match "hello world" pattern = String("hello") + " " + Regex(r"\w+") print(to_regex(pattern)) # Output: hello\ (\w+) ``` ### Alternation (`either()`) The `either()` function creates alternatives, allowing a match for one of several patterns. You can provide as many terms as you want. ```python # Example: Match either "cat" or "dog" or "mouse" animal = either(String("cat"), "dog", "mouse") print(to_regex(animal)) # Output: (cat|dog|mouse) ``` *Note:* When using `either()` with plain strings (such as `"dog"`), the DSL automatically wraps them in a `String` object that escapes the characters that have a special meaning in regular expressions, just like with quantifier functions. --- ## Custom types The DSL comes "batteries included" with types that represent common text constructs: - `integer` represents an integer number as recognized by `int` - `boolean` represents a boolean, "True" or "False" as recognized by `bool` - `number` represents a floating-point number recognize by Python's `float` - `date` represents a date as understood by `datetime.date` - `time` represents a time as understood by `datetime.time` - `datetime` represents a time as understood by `datetime.datetime` - `digit` represents a single digit - `char` represents a single character - `newline` represents a new line character - `whitespace` represents a white space - `hex_str` represents a hexadecimal string, optionally prefixed with "0x" - `uuid4` represents a UUID version 4 string in the format "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx" - `ipv4` represents an IPv4 address in the format "xxx.xxx.xxx.xxx" where each octet is between 0 and 255 - `sentence` represents a sentence - `paragraph` represents a paragraph (one or more sentences separated by one or more line breaks) For instance you can describe the answers in the GSM8K dataset using the following pattern: ```python from outlines.types import sentence, digit answer = "A: " + sentence.between(2,4) + " So the answer is: " + digit.between(1,4) ``` --- ## Practical Examples ### Example 1: Matching a Custom ID Format Suppose you want to create a regex that matches an ID format like "ID-12345", where: - The literal "ID-" must be at the start. - Followed by exactly 5 digits. ```python id_pattern = "ID-" + Regex(r"\d").exactly(5) print(to_regex(id_pattern)) # Output: ID-(\d){5} ``` ### Example 2: Email Validation with Pydantic You can define a regex for email validation and use it as a type in a Pydantic model. ```python from pydantic import BaseModel, ValidationError # Define an email regex term (this is a simplified version) email_regex = Regex(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+") class User(BaseModel): name: str email: email_regex # Use our DSL regex as a field type # Valid input user = User(name="Alice", email="alice@example.com") print(user) # Invalid input (raises a ValidationError) try: User(name="Bob", email="not-an-email") except ValidationError as e: print(e) ``` When used in a Pydantic model, the email field is automatically validated against the regex pattern and its JSON Schema includes the `pattern` constraint. ### Example 3: Building a Complex Pattern Consider a pattern to match a simple date format: `YYYY-MM-DD`. ```python year = Regex(r"\d").exactly(4) # Four digits for the year month = Regex(r"\d").exactly(2) # Two digits for the month day = Regex(r"\d").exactly(2) # Two digits for the day # Combine with literal hyphens date_pattern = year + "-" + month + "-" + day print(to_regex(date_pattern)) # Output: (\d){4}\-(\d){2}\-(\d){2} ``` --- ## Visualizing Your Pattern One of the unique features of this DSL is that each term can print its underlying structure as an ASCII tree. This visualization can be particularly helpful when dealing with complex expressions. ```python # A composite pattern using concatenation and quantifiers pattern = "a" + String("b").one_or_more() + "c" print(pattern) ``` *Expected Output:* ```ascii └── Sequence ├── String('a') ├── KleenePlus(+) │ └── String('b') └── String('c') ``` This tree representation makes it easy to see the hierarchy and order of operations in your regular expression. --- ## Final Words This DSL is designed to simplify the creation and management of regular expressions—whether you're validating inputs in a web API, constraining the output of an LLM, or just experimenting with regex patterns. With intuitive methods for common quantifiers and operators, clear visual feedback, and built-in integration with Pydantic, you can build robust and maintainable regex-based validations with ease. Feel free to explore the library further and adapt the examples to your use cases. Happy regexing! ================================================ FILE: docs/features/utility/template.md ================================================ --- title: Template --- # Template Outlines templates provide a way of creating reusable prompt structures with placeholders for dynamic content. ## Overview To create a `Template` instance, you can use two class methods: - `from_string`: Creates a template from a string containing a Jinja2 template - `from_file`: Creates a template from a file containing a Jinja2 template After creating a template, you can call it with the variables required by the template as keyword arguments. For instance: ```python from outlines import Template # Create a template from a string template_str = """ Hello, {{ name }}! The weather today is {{ weather }}. """ template = Template.from_string(template_str) # Create a template from a file, assuming the content of template_str is put into a file template = Template.from_file("path_to/my_file.txt") # Call the template to render the prompt prompt: str = template(name="Alice", weather="sunny") print(prompt) # "Hello, Alice!\nThe weather today is sunny." ``` ## Composite Templates Templates can be nested and composed to create complex prompt structures: ```python from outlines import Template # Create component templates user_template = Template.from_string("User: {{ query }}") system_template = Template.from_string("System: {{ instruction }}") # Create a composite template chat_template = Template.from_string(""" {{ system }} {{ user }} """) # Fill in nested templates prompt = chat_template( system=system_template(instruction="You are a helpful assistant."), user=user_template(query="What is machine learning?") ) print(prompt) # System: You are a helpful assistant. # # User: What is machine learning? ``` ## Custom Filters You can add custom filters to your Outlines template to extend the templating functionality. To do so, provide as second argument a dictionary with filter names as keys and filter functions as values. The filter can then be used in your jinja2 template following the regular syntax. When rendering a prompt, the function will be applied to the associated variable. For instance: ```python from outlines import Template def uppercase(text: str) -> str: return text.upper() # Add custom filter when creating template template = Template.from_string( "Hello {{ name | uppercase }}!", filters={"uppercase": uppercase} ) prompt = template(name="alice") print(prompt) # "Hello ALICE!" ``` ================================================ FILE: docs/guide/architecture.md ================================================ # Architecture Overview This guide explains how Outlines is organized so you can navigate the codebase, debug issues, and extend the library. ## How Structured Generation Works When you ask an LLM to output JSON or follow a specific format, traditional approaches generate text freely and hope it matches. Outlines takes a different approach: it constrains the model at generation time by masking invalid tokens, making it impossible for the model to produce invalid output. ## Core Abstractions Outlines has three main abstractions: **Model**, **Generator**, and **Type System**. ### Model and ModelTypeAdapter The `Model` class (`outlines/models/base.py`) is the abstract base class for all LLM integrations. There are two categories based on how structured generation is implemented: **Steerable models** (`SteerableModel`): Models where Outlines directly applies a logits processor during generation. This includes: - `LlamaCpp` - llama.cpp bindings - `MLXLM` - Apple MLX models - `Transformers` - HuggingFace Transformers **Black-box models** (`BlackBoxModel`): Models where Outlines delegates structured generation to the provider's API rather than applying logits processors directly. This includes: - `OpenAI`, `Anthropic`, `Gemini`, `Mistral` - Cloud API providers - `VLLM`, `VLLMOffline`, `SGLang`, `TGI`, `Ollama` - Inference servers with built-in structured generation - `Dottxt` - Dottxt API Note: Some black-box models (like vLLM or Ollama) could technically expose logits, but they implement structured generation server-side, so Outlines delegates to their APIs instead of building processors locally. **The Model interface:** Every model subclass must implement these methods: | Method | Purpose | |--------|---------| | `generate(model_input, output_type, **kwargs)` | Generate a single response (internal, receives logits processor or output type) | | `generate_batch(model_input, output_type, **kwargs)` | Generate responses for multiple prompts | | `generate_stream(model_input, output_type, **kwargs)` | Stream a response token by token | The base `Model` class provides these convenience methods that create a `Generator` internally: | Method | Purpose | |--------|---------| | `__call__(model_input, output_type, backend, **kwargs)` | Generate a single response | | `batch(model_input, output_type, backend, **kwargs)` | Generate batch responses | | `stream(model_input, output_type, backend, **kwargs)` | Stream a response | **ModelTypeAdapter - Bridging formats:** Each model has a `type_adapter` attribute that handles format conversion between Outlines and the specific model provider: ```python class ModelTypeAdapter(ABC): @abstractmethod def format_input(self, model_input) -> Any: """Convert user input to model-specific format. For API models: creates the `messages` argument For local models: may apply chat templates, convert str to list, etc. """ ... @abstractmethod def format_output_type(self, output_type) -> Any: """Convert output type to model-specific format. For black-box models: creates `response_format` argument For steerable models: formats the logits processor for the model """ ... ``` ### Generator - Unifying the Generation Interface The `Generator` (`outlines/generator.py`) is a factory function that returns the appropriate generator class based on the model type. **Why Generator exists:** Without Generator, users would need different code for different model types: ```python # Without Generator - user needs to know model internals if isinstance(model, SteerableModel): processor = build_logits_processor(output_type) result = model.generate(prompt, processor) else: result = model.generate(prompt, output_type) ``` With Generator, the complexity is hidden: ```python # With Generator - same code works for any model generator = Generator(model, output_type) result = generator(prompt) ``` **Generator classes:** | Class | Used For | How It Works | |-------|----------|--------------| | `SteerableGenerator` | Local models (`LlamaCpp`, `MLXLM`, `Transformers`) | Builds and caches a logits processor from the output type, resets and passes it to the model on each call | | `BlackBoxGenerator` | Sync API models | Passes output type directly to model's generate method | | `AsyncBlackBoxGenerator` | Async API models | Async version of BlackBoxGenerator | **SteerableGenerator internals:** When you create a `SteerableGenerator` with an output type, it: 1. Converts the Python type to a `Term` using `python_types_to_terms()` 2. Based on the Term type, builds the appropriate logits processor: - `CFG` → calls `get_cfg_logits_processor()` - `JsonSchema` → calls `get_json_schema_logits_processor()` - Other terms → converts to regex via `to_regex()`, then calls `get_regex_logits_processor()` 3. Caches the processor for reuse 4. On each call, resets processor state and passes it to the model ### Type System - From Python Types to Constraints The type system (`outlines/types/dsl.py`) converts Python types into constraints that can be enforced during generation. **The conversion pipeline:** ``` Python Type → Term (via python_types_to_terms) ↓ ┌───────┴───────┐ ↓ ↓ CFG or JsonSchema Other Terms ↓ ↓ Direct to backend to_regex() → Regex string ↓ ↓ └───────┬───────┘ ↓ Logits Processor (via backend) ``` **Term classes:** `Term` is the base class for Outlines' constraint DSL. Key subclasses: | Term | Purpose | Example | |------|---------|---------| | `Regex` | Match a regex pattern | `Regex("[0-9]+")` | | `JsonSchema` | Match valid JSON for a schema | `JsonSchema(MyPydanticModel)` | | `CFG` | Match a context-free grammar | `CFG(grammar_string)` | | `String` | Match a literal string | `String("hello")` | | `Sequence` | Concatenate terms | `String("[") + item + String("]")` | | `Alternatives` | Match any of several terms | `term1 \| term2` | | `KleeneStar` | Zero or more repetitions | `zero_or_more(term)` | | `KleenePlus` | One or more repetitions | `one_or_more(term)` | | `Optional` | Zero or one occurrence | `optional(term)` | **python_types_to_terms:** This function converts Python types to Term instances: ```python def python_types_to_terms(ptype) -> Term: # Already a Term - return as-is if isinstance(ptype, Term): return ptype # Basic types - return predefined regex patterns if is_int(ptype): return types.integer if is_float(ptype): return types.number if is_str(ptype): return types.string if is_bool(ptype): return types.boolean # Structured types - convert to JsonSchema if is_pydantic_model(ptype) or is_dataclass(ptype) or is_typed_dict(ptype): return JsonSchema(ptype) # Enum - create alternatives from members if is_enum(ptype): return Alternatives([...]) # Union, Literal, List, Tuple, Dict - handle recursively ... ``` ## Data Flow Here's how a structured generation request flows through the system: ``` 1. User calls: model("What is 2+2?", int) 2. Model.__call__ creates Generator: Generator(model, int) 3. Generator factory checks model type: - SteerableModel → SteerableGenerator - BlackBoxModel → BlackBoxGenerator 4. For SteerableGenerator: a. python_types_to_terms(int) → Regex("-?[0-9]+") b. to_regex(term) → regex string c. get_regex_logits_processor(backend, model, regex) → LogitsProcessor 5. Generator.__call__(prompt): a. processor.reset() # Reset state for new generation b. model.generate(prompt, processor) 6. During generation (steerable models only): - Model computes logits for all tokens - LogitsProcessor masks invalid tokens (set to -inf) - Model samples from remaining valid tokens 7. Result returned to user ``` ## File Organization ``` outlines/ ├── __init__.py # Public API exports ├── generator.py # Generator factory and classes ├── models/ │ ├── base.py # Model, AsyncModel, ModelTypeAdapter base classes │ ├── transformers.py # HuggingFace Transformers │ ├── llamacpp.py # llama.cpp bindings │ ├── mlxlm.py # Apple MLX models │ ├── openai.py # OpenAI API │ ├── anthropic.py # Anthropic API │ ├── vllm.py # vLLM server │ ├── vllm_offline.py # vLLM offline mode │ └── ... # Other providers ├── types/ │ ├── __init__.py # Predefined types: integer, number, date, etc. │ ├── dsl.py # Term classes, python_types_to_terms, to_regex │ └── utils.py # Type checking utilities ├── backends/ │ ├── __init__.py # get_*_logits_processor functions │ ├── base.py # LogitsProcessorType protocol │ ├── outlines_core.py # Default backend using outlines-core │ ├── llguidance.py # Microsoft llguidance backend │ └── xgrammar.py # xgrammar backend ├── processors/ │ ├── base_logits_processor.py # Base processor implementation │ └── tensor_adapters/ # Tensor library adapters ├── grammars/ # Predefined grammar files └── templates.py # Prompt template utilities ``` ## Backends Backends are responsible for converting constraints (regex, JSON schema, CFG) into logits processors that can be applied during generation. They only apply to steerable models. **Available backends:** | Backend | Default For | Description | |---------|-------------|-------------| | `outlines_core` | Regex, JSON Schema | The default backend, built on the `outlines-core` Rust library. Compiles constraints into finite state machines. | | `llguidance` | CFG | Microsoft's llguidance library. Supports context-free grammars and is the only backend that handles CFG constraints. | | `xgrammar` | - | Alternative backend using the xgrammar library. | **How backends are selected:** 1. If the user specifies a backend via the `backend` parameter, that backend is used 2. Otherwise, the default backend for the constraint type is used: - Regex → `outlines_core` - JSON Schema → `outlines_core` - CFG → `llguidance` **Backend interface:** All backends inherit from `BaseBackend` and implement three methods: ```python class BaseBackend(ABC): @abstractmethod def get_json_schema_logits_processor(self, json_schema: str) -> LogitsProcessorType: ... @abstractmethod def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType: ... @abstractmethod def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType: ... ``` **Specifying a backend:** ```python from outlines import from_transformers, Generator model = from_transformers("microsoft/Phi-3-mini-4k-instruct") # Use xgrammar instead of the default outlines_core generator = Generator(model, int, backend="xgrammar") ``` ## Extension Points ### Adding a New Model Provider 1. Create a new file in `outlines/models/` (e.g., `mymodel.py`) 2. Implement a `ModelTypeAdapter` subclass with `format_input()` and `format_output_type()` 3. Implement a `Model` subclass with `generate()`, `generate_batch()`, and `generate_stream()` 4. Add a factory function (e.g., `from_mymodel()`) 5. Export from `outlines/models/__init__.py` 6. Add to `SteerableModel` or `BlackBoxModel` type alias as appropriate ================================================ FILE: docs/guide/chat_templating.md ================================================ # Chat templating Instruction-tuned language models use "special tokens" to indicate different parts of text, such as the system prompt, the user prompt, any images, and the assistant's response. A [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) is how different types of input are composited together into a single, machine-readable string. Outlines supports chat templating throught the `Chat` model input class. It contains a list of messages similar in format to the chat history you would use with API models such as OpenAI or Anthropic and to the expected arguments of the `apply_chat_template` method of transformers tokenizers. You can find detailed information on the interface of this object in the [model inputs documentation](../features/core/inputs.md). ================================================ FILE: docs/guide/core_concepts.md ================================================ --- title: Core concepts --- # Core concepts Coming soon. This will document various concepts at a high level, so users can understand Outlines before diving into specific implementations. 1. Constrained decoding, tokens, and the basics of logit biasing 2. Different ways to define output structure (regex, JSON schema, Pydantic models, context-free grammars) 3. How finite state machines are used to guarantee output structure 4. `Generator`, `Application`, `Template`, 5. Prompt engineering vs. structured generation ================================================ FILE: docs/guide/fastapi_vllm_deployment.md ================================================ --- title: Deploying with FastAPI --- # Deploying with FastAPI This guide demonstrates how to build a FastAPI application that leverages Outlines' async integration with vLLM. We create a customer support API that can intelligently categorize tickets and generate structured responses. ## Prerequisites Before starting, ensure you have a vLLM server running (locally or remotely) and the following packages installed: ```shell pip install fastapi uvicorn outlines openai pydantic ``` ## Building the Application ### Step 1: Define Data Models First, let's define our Pydantic models for structured outputs: ```python # models.py from enum import Enum from typing import List from pydantic import BaseModel, Field class TicketCategory(str, Enum): BILLING = "billing" TECHNICAL = "technical" ACCOUNT = "account" PRODUCT = "product" OTHER = "other" class TicketPriority(str, Enum): LOW = "low" MEDIUM = "medium" HIGH = "high" URGENT = "urgent" class TicketAnalysis(BaseModel): category: TicketCategory priority: TicketPriority summary: str = Field(description="Brief summary of the issue") customer_sentiment: str = Field(description="Customer emotional state") key_issues: List[str] = Field(description="List of main problems") requires_human: bool = Field(description="Whether this needs human intervention") class SupportResponse(BaseModel): greeting: str acknowledgment: str = Field(description="Acknowledge the customer's issue") solution_steps: List[str] = Field(description="Steps to resolve the issue") closing: str ``` ### Step 2: Define the prompts Let us now write the prompts that we will be using in our application, using Jinja 2's templating language. We separate them from the application implementation so they are easier to modify and version. ```ascii {# prompts/categorize.txt #} Analyze this customer support ticket: Customer ID: {{ customer_id }} Message: {{ message }} Extract the category, priority, and other relevant information. ``` ```ascii {# prompts/respond.txt #} Generate a professional customer support response. Customer Message: {{ message }} Category: {{ category }} Priority: {{ priority }} Customer Sentiment: {{ customer_sentiment }} Create a helpful, empathetic response that addresses their concerns. ``` ### Step 3: Create the FastAPI Application Now let's create our FastAPI application with async vLLM integration: ```python # main.py import asyncio from contextlib import asynccontextmanager from typing import Optional import openai from outlines import models, Template from fastapi import FastAPI, HTTPException from pydantic import BaseModel from models import TicketAnalysis, SupportResponse # Request model class TicketRequest(BaseModel): customer_id: str message: str # Global model instance async_model = None # The lifespan function is a FastAPI construct # used to define startup and shutdown logic for the API. @asynccontextmanager async def lifespan(app: FastAPI): """Initialize the async vLLM model on startup.""" global async_model client = openai.AsyncOpenAI( base_url="http://localhost:8000/v1", # Adjust to your vLLM server URL api_key="dummy" # vLLM doesn't require a real API key ) async_model = models.from_vllm(client, "Qwen/Qwen2.5-VL-7B-Instruct") yield async_model = None # Cleanup # Create FastAPI app app = FastAPI( title="Customer Support Assistant API", description="AI-powered customer support with structured outputs", version="1.0.0", lifespan=lifespan ) @app.post("/analyze-ticket", response_model=TicketAnalysis) async def analyze_ticket(request: TicketRequest): """Analyze a customer support ticket and extract structured information.""" if async_model is None: raise HTTPException(status_code=503, detail="Model not initialized") template = Template.from_file("prompts/categorize.txt") prompt = template( customer_id=request.customer_id, message=request.message ) try: # Generate and parse a structured response result = await async_model(prompt, TicketAnalysis, max_tokens=5000) analysis = TicketAnalysis.model_validate_json(result) return analysis except Exception as e: raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") @app.post("/generate-response", response_model=SupportResponse) async def generate_response( request: TicketRequest, analysis: TicketAnalysis ): """Generate a structured support response based on ticket analysis.""" if async_model is None: raise HTTPException(status_code=503, detail="Model not initialized") template = Template.from_file("prompts/respond.txt") prompt = template( message=request.message, category=analysis.category, priority=analysis.priority, customer_sentiment=analysis.customer_sentiment ) try: # Generate and parse a structured response result = await async_model(prompt, SupportResponse, max_tokens=5000) response = SupportResponse.model_validate_json(result) return response except Exception as e: raise HTTPException(status_code=500, detail=f"Response generation failed: {str(e)}") ``` ## Running the Application ### Step 1: Start your vLLM server ```shell vllm serve Qwen/Qwen2.5-VL-7B-Instruct ``` ### Step 2: Run the FastAPI application ```shell uvicorn main:app --reload --host 0.0.0.0 --port 8080 ``` ## Testing the API ### Example 1: Analyze a support ticket ```shell curl -X POST "http://localhost:8080/analyze-ticket" \ -H "Content-Type: application/json" \ -d '{ "customer_id": "CUST123", "message": "I have been charged twice for my subscription this month. This is unacceptable and I want a refund immediately!" }' ``` Expected response: ```json { "category": "billing", "priority": "high", "summary": "Customer charged twice for subscription, requesting refund", "customer_sentiment": "angry", "key_issues": ["duplicate charge", "subscription billing", "refund request"], "requires_human": false } ``` ### Example 2: Generate a support response ```shell # First, get the analysis ANALYSIS=$(curl -s -X POST "http://localhost:8080/analyze-ticket" \ -H "Content-Type: application/json" \ -d '{ "customer_id": "CUST456", "message": "My app keeps crashing when I try to upload photos." }') # Then generate a response curl -X POST "http://localhost:8080/generate-response" \ -H "Content-Type: application/json" \ -d "{ \"request\": { \"customer_id\": \"CUST456\", \"message\": \"My app keeps crashing when I try to upload photos.\" }, \"analysis\": $ANALYSIS }" ``` By combining FastAPI's async capabilities with Outlines' structured generation, you can build robust APIs that leverage large language models. ## Using Alternative Backends: SGLang and TGI One of the key advantages of Outlines is its unified API across different inference backends. You can easily switch from vLLM to SGLang or TGI with minimal code changes - just modify the model initialization in the `lifespan` function. ### Using SGLang Instead of vLLM To use SGLang, simply change the client initialization: ```python @asynccontextmanager async def lifespan(app: FastAPI): """Initialize the async SGLang model on startup.""" global async_model client = openai.AsyncOpenAI( base_url="http://localhost:30000/v1", # SGLang server URL api_key="dummy" ) async_model = models.from_sglang(client) yield async_model = None ``` Start your SGLang server with: ```shell python -m sglang.launch_server \ --model-path meta-llama/Llama-2-7b-chat-hf \ --port 30000 ``` ### Using TGI Instead of vLLM For TGI (Text Generation Inference), use the Hugging Face client: ```python import huggingface_hub @asynccontextmanager async def lifespan(app: FastAPI): """Initialize the async TGI model on startup.""" global async_model client = huggingface_hub.AsyncInferenceClient( "http://localhost:8080" # TGI server URL ) async_model = models.from_tgi(client) yield async_model = None ``` Start your TGI server with: ```shell docker run --gpus all -p 8080:80 \ ghcr.io/huggingface/text-generation-inference:latest \ --model-id meta-llama/Llama-2-7b-chat-hf ``` The rest of your FastAPI application - all the endpoints, error handling, and business logic - remains completely unchanged. This flexibility allows you to test different inference engines without rewriting your application. ================================================ FILE: docs/guide/getting_started.md ================================================ --- title: Getting Started --- # Getting Started ## Installation We recommend using `uv` to install Outlines. You can find `uv` installation instructions [here](https://github.com/astral-sh/uv). ```shell uv pip install 'outlines[transformers]' ``` or the classic `pip`: ```shell pip install 'outlines[transformers]' ``` For more information, see the [installation guide](./installation). ## Creating a Model Outlines contains a variety of models that wrap LLM inference engines/clients. For each of them, you need to install the model's associated library as described in the [installation guide](../installation). The full list of available models along with detailed explanation on how to use them can be found in the [models page](../features/models/index.md) of the Features section of the documentation. For a quick start, you can find below an example of how to initialize all supported models in Outlines: === "vLLM" ```python import outlines from openai import OpenAI # You must have a separate vLLM server running # Create an OpenAI client with the base URL of the VLLM server openai_client = OpenAI(base_url="http://localhost:11434/v1") # Create an Outlines model model = outlines.from_vllm(openai_client, "microsoft/Phi-3-mini-4k-instruct") ``` === "Ollama" ```python import outlines from ollama import Client # Create an Ollama client ollama_client = Client() # Create an Outlines model, the model must be available on your system model = outlines.from_ollama(ollama_client, "tinyllama") ``` === "OpenAI" ```python import outlines from openai import OpenAI # Create an OpenAI client instance openai_client = OpenAI() # Create an Outlines model model = outlines.from_openai(openai_client, "gpt-4o") ``` === "Transformers" ```python import outlines from transformers import AutoModelForCausalLM, AutoTokenizer # Define the model you want to use model_name = "HuggingFaceTB/SmolLM2-135M-Instruct" # Create a HuggingFace model and tokenizer hf_model = AutoModelForCausalLM.from_pretrained(model_name) hf_tokenizer = AutoTokenizer.from_pretrained(model_name) # Create an Outlines model model = outlines.from_transformers(hf_model, hf_tokenizer) ``` === "llama.cpp" ```python import outlines from llama_cpp import Llama # Model to use, it will be downloaded from the HuggingFace hub repo_id = "TheBloke/Llama-2-13B-chat-GGUF" file_name = "llama-2-13b-chat.Q4_K_M.gguf" # Create a Llama.cpp model llama_cpp_model = Llama.from_pretrained(repo_id, file_name) # Create an Outlines model model = outlines.from_llamacpp(llama_cpp_model) ``` === "Gemini" ```python import outlines from google.generativeai import GenerativeModel # Create a Gemini client gemini_client = GenerativeModel() # Create an Outlines model model = outlines.from_gemini(gemini_client, "gemini-1-5-flash") ``` === "mlx-lm" ```python import outlines import mlx_lm # Create an MLXLM model with the output of mlx_lm.load # The model will be downloaded from the HuggingFace hub model = outlines.from_mlxlm( *mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit") ) ``` === "SgLang" ```python import outlines from openai import OpenAI # You must have a separate SgLang server running # Create an OpenAI client with the base URL of the SgLang server openai_client = OpenAI(base_url="http://localhost:11434/v1") # Create an Outlines model model = outlines.from_sglang(openai_client) ``` === "TGI" ```python # SgLang import outlines from huggingface_hub import InferenceClient # You must have a separate TGI server running # Create an InferenceClient client with the base URL of the TGI server tgi_client = InferenceClient("http://localhost:8080") # Create an Outlines model model = outlines.from_tgi(tgi_client) ``` === "vLLM (offline)" ```python import outlines from vllm import LLM # Create a vLLM model vllm_model = LLM("microsoft/Phi-3-mini-4k-instruct") # Create an Outlines model model = outlines.from_vllm_offline(vllm_model) ``` ## Generating Text Once you have created the Outlines model for your inference engine/client, you are already all set to generate text! Models are callable such that you can simply call them with a text prompt. For instance: ```python model = # Call the model to generate text result = model("Write a short story about a cat.") print(result) # 'In a quiet village where the cobblestones hummed softly beneath the morning mist...' ``` Most models also support streaming through the use of a `streaming` method. You can directly use with a prompt just like regular text generation. For instance: ```python model = # Stream text for chunk in model.streaming("Write a short story about a cat.") print(chunk) # 'In ...' ``` ## Structured Generation Outlines follows a simple pattern that mirrors Python's own type system for structured outputs. Simply specify the desired output type as you would when using type hinting with a function, and Outlines will ensure your data matches that structure exactly. Supported output types can be organized in 5 categories: - [Basic Types](../../features/core/output_types#basic-python-types): `int`, `float`, `bool`... - [Multiple Choices](../../features/core/output_types#multiple-choices): using `Literal` or `Enum` - [JSON Schemas](../../features/core/output_types#json-schemas): using a wide range of possible objects including Pydantic models and dataclasses - [Regex](../../features/core/output_types#regex-patterns): through the Outlines's `Regex` object - [Context-free Grammars](../../features/core/output_types#context-free-grammars): through the Outlines's `CFG` object Consult the section on [Output Types](../../features/core/output_types.md) in the features documentation for more detailed information on all supported types for each output type category. In the meantime, you can find below examples of using each of the five output type categories: === "Basic Types" ```python model = # Generate an integer result = model("How many countries are there in the world?", int) print(result) # '200' ``` === "Multiple Choice" ```python from enum import Enum # Define our multiple choice output type class PizzaOrBurger(Enum): pizza = "pizza" burger = "burger" model = # Generate text corresponding to either of the choices defined above result = model("What do you want to eat, a pizza or a burger?", PizzaOrBurger) print(result) # 'pizza' ``` === "JSON Schemas" ```python from datetime import date from typing import Dict, List, Union from pydantic import BaseModel model = # Define the class we will use as an output type class Character(BaseModel): name: str birth_date: date skills: Union[Dict, List[str]] # Generate a character result = model("Create a character", Character) print(result) # '{"name": "Aurora", "birth_date": "1990-06-15", "skills": ["Stealth", "Diplomacy"]}' print(Character.model_validate_json(result)) # name=Aurora birth_date=datetime.date(1990, 6, 15) skills=['Stealth', 'Diplomacy'] ``` === "Regex" ```python from outlines.types import Regex model = # Define our regex for a 3 digit number output_type = Regex(r"[0-9]{3}") # Generate the number result = model("Write a 3 digit number", output_type) print(result) # '236' ``` === "Context-free Grammars" ```python from outlines.types import CFG model = # Define your Lark grammar as string arithmetic_grammar = """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ # Generate an arithmetic operation result = model("Write an arithmetic operation", CFG(grammar_string)) print(result) # '2 + 3' ``` It's important to note that not all output types are available for all models due to limitations in the underlying inference engines. The [Models](../features/models/index.md) section of the features documentation includes a features matrix that summarize the availability of output types. ## Generators Generators are an important type of objects in Outlines that are used to encapsulate a model and an output type. After having created a generator, you can call it using a similar interface to a model and it will generate text conforming to the output type you initially provided. This feature is useful if you want to generate text several times for given model and output type. Not only does it prevent having to include the same output type at each call, but it also allows us to compile the output type only once instead of doing it at each generation (which is important for local models as this operation can be expensive). For instance: ```python from typing import Literal from outlines import Generator model = # Create a generator generator = Generator(model, Literal["pizza", "burger"]) # Call it as you would call a model result = generator("What do you want to eat, a pizza or a burger?") print(result) # pizza ``` You can find more information on generators in the dedicated page on [Generators](../features/core/generator.md) in the features documentation. ## Other features On top of more detailed explanation on the concepts already discussed here, the [Features](../features/index.md) section of the documentation contains information on additional Outlines features such as applications, prompt templates, the regex DSL... ================================================ FILE: docs/guide/installation.md ================================================ --- title: Installation --- # Installation ## Dependency Management We recommend using modern Python packaging tools such as `uv` for managing python dependencies. ### uv (Recommended) ```shell # Install uv curl -LsSf https://astral.sh/uv/install.sh | sh # Create a virtual environment and install Outlines uv venv source .venv/bin/activate uv pip install outlines ``` or with pip: ```shell pip install outlines ``` ## Optional Dependencies To use Outlines models, you need to install the Python libraries for the associated inference engines/clients. Such libraries are not part of the general installation as you should only install the libraries needed for the specific models you want to use. Outlines models with the installation of their associated additional depencies: - [Anthropic](features/models/anthropic.md): `pip install anthropic` - [Dottxt](features/models/dottxt.md): `pip install dottxt` - [Gemini](features/models/gemini.md): `pip install google-generativeai` - [Llamacpp](features/models/llamacpp.md): `pip install llama-cpp-python` - [Mlx-lm](features/models/mlxlm.md): `pip install mlx mlx-lm` - [Ollama](features/models/ollama.md): `pip install ollama` (after having downloaded Ollama in your system) - [OpenAI](features/models/openai.md): `pip install openai` - [SGLang](features/models/sglang.md): `pip install openai` - [TGI](features/models/tgi.md): `pip install huggingface_hub` - [Transformers](features/models/transformers.md): `pip install transformers` - [TransformersMultiModal](features/models/transformers_multimodal.md): `pip install transformers` - [vLLM (online server)](features/models/vllm.md): `pip install openai` - [vLLM (offline)](features/models/vllm_offline.md): `pip install vllm` If you encounter any problems using Outlines with these libraries, take a look at their installation instructions. The installation of `openai` and `transformers` should be straightforward, but other libraries have specific hardware requirements. !!! warning "Hardware Requirements" If you are using a local model, your model may require specific hardware. Please check the documentation for these libraries. Some libraries like `vllm` and `llama-cpp-python` require specific hardware, such as a compatible GPU. `mlx-lm` on its side is designed for Apple Silicon, so it may not be appropriate for your use case if you are on a different platform. ## Bleeding Edge You can install the latest version of Outlines from the repository's `main` branch: ```sh pip install git+https://github.com/dottxt-ai/outlines.git@main ``` This can be useful, for instance, when a fix has been merged but not yet released. ## Installing for Development See the [contributing documentation](community/contribute.md) for instructions on how to install Outlines for development, including an example using the `dot-install` method for one of the backends. ================================================ FILE: docs/guide/migration.md ================================================ # Outlines 1.0 migration guide Outlines 1.0 introduces some breaking changes that affect the way you use the library. You are likely concerned by all of the following sections, so please read this document carefully until the end. This guide will help you migrate your code to the new version. All previous functionalities will be supported until Outlines version 1.1.0, but a warning message will be displayed to remind you to migrate your code and provide instructions to help you do so. Please migrate your code to the v1 as soon as possible. ## Removed or modified features - [Generate functions](#generate-functions) - [Models](#models) - [Samplers](#samplers) - [Functions](#functions) - [Text generation return types](#text-generation-return-types) - [Inference arguments](#inference-arguments) ### Generate functions The whole `generate` module has been removed. That includes the functions `generate.cfg`, `generate.choice`, `generate.format`,`generate.fsm`, `generate.json`, `generate.regex` and `generate.text`. You should replace these functions by the [`Generator`](../features/core/generator.md) object along with the right output type as an argument (on top of the model). The output type can either be a python type or be an object from the `outlines.types` module. You can find more information about the output types in the [Output Types](../features/core/output_types.md) section of the features documentation. Associated v1 output types for each deprecated function: - `generate.cfg` -> `outlines.types.CFG` - `generate.choice` -> `typing.Literal` or `typing.Union` - `generate.format` -> native python types (`str`, `int` etc.) - `generate.fsm` -> `outlines.types.FSM` - `generate.json` -> `pydantic.BaseModel`, `typing.TypedDict`, `dataclasses.dataclass`, `genson.schema.SchemaBuilder` or `outlines.types.JsonSchema` - `generate.regex` -> `outlines.types.Regex` - `generate.text` -> no output type (`None`) For instance, instead of: ```python from outlines import generate model = ... generator = generate.choice(model, ["foo", "bar"]) ``` You should now use: ```python from typing import Literal from outlines import Generator model = ... generator = Generator(model, Literal["foo", "bar"]) ``` ### Models The model classes found in the `outlines.models` module are maintained but there are a few important changes to be aware of. The functions used to created a model have been replaced by equivalent functions named with a `from_` prefix. The function `outlines.models.transformers` has been replaced by `outlines.from_transformers` for instance. On top of this change of name, the arguments have been modified. You should refer to the [models documentation](../features/models/index.md) for more details, but the overall idea is that you now need to provide a model/client instance from the inference library the Outlines model is wrapping. For instance, instead of: ```python from outlines import models model = models.llamacpp( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) ``` You should now do: ```python from llama_cpp import Llama from outlines import from_llamacpp llamacpp_model = Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) model = from_llamacpp(llamacpp_model) ``` The `load_lora` methods that are present on the `VLLM` and `LlamaCpp` models have been removed. You should now handle lora loading through the `Llama` instance in the case of the `LlamaCpp` model or provide it as a keyword argument when calling the model in the case of the `VLLM` model. For instance, instead of: ```python from outlines import from_vllm from vllm import LLM model = from_vllm( LLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct") ) model.load_lora("path/to/lora/file") response = model("foo") ``` You should now do: ```python from outlines import from_vllm from vllm import LLM from vllm.lora.request import LoRARequest model = from_vllm( LLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct") ) lora_request = LoRARequest("path/to/lora/file", 1, "path/to/lora/file") response = model("foo", lora_request=lora_request) ``` The `ExLlamav2` model has been removed without replacement. This inference library is not fully compatible with Outlines, so we decided to remove it. You can still use it until final deprecation, but we recommend you to migrate to a different inference library right now. ### Samplers The `outlines.samplers` module has been removed without replacement. You should now use the arguments of the inference library model to control the sampling. Depending on the model you use, this could be done at initialization or when calling the model to generate text (so when calling the outlines model or a generator). For instance, instead of: ```python from outlines import generate model = generator = generate.text(model, samplers.beam_search(2)) response = generator("foo") ``` You should now do: ```python from outlines import Generator model = generator = Generator(model) response = generator("foo", num_beams=2) ``` ### Functions The `outlines.function` module has been removed. It is replaced by the `outlines.applications` module. An [`Application`](../features/utility/application.md) serves a similar purpose as a `Function`: it encapsulates a prompt template and an output type. A difference is that can `Application` is not instantiated with a model name. Instead, you should provide a model instance along with the prompt when calling it. For instance, instead of: ```python from outlines import Function prompt_template = ... output_type = ... fn = Function( prompt_template, output_type, "hf-internal-testing/tiny-random-GPTJForCausalLM", ) result = fn("foo") ``` You should now do: ```python from outlines import Application prompt_template = ... output_type = ... application = Application( prompt_template, output_type, ) model = ... result = application(model, "foo") ``` ### Text generation return types In the previous version of Outlines, the return type of the generators depended on the output type provided. For instance, if you passed a Pydantic model to the `generate.json` function, the return type was a Pydantic model instance. In the v1, the return type of a generator is always a `str`, the raw text generated by the model. You are responsible for parsing the text into the desired format. For instance, instead of: ```python from pydantic import BaseModel from outlines import generate class Foo(BaseModel): bar: str model = ... generator = generate.json(model, Foo) result = generator("foo") print(result.bar) ``` You should now do: ```python from pydantic import BaseModel from outlines import Generator class Foo(BaseModel): bar: str model = ... generator = Generator(model, Foo) result = generator("foo") result = Foo.model_validate_json(result) # parse the text into the Pydantic model instance print(result.bar) ``` The [Output Types](../features/core/output_types.md) section of the features documentation includes extensive details on available output types. ### Inference arguments In the previous version of Outlines, some of the inference arguments were standardized across the models and were provided as positional arguments to the generator or through the sampling params dictionary. Additionally, various default values were added by outlines to the inference library models. This is no longer the case. You should refer to the documentation of the inference library you use to find the right arguments for your use case and pass them as keyword arguments to the outlines generator when calling it. For instance, instead of: ```python from outlines import generate model = generator = generate.text(model) result = generator("foo", 256, ".", 10) # 256 tokens, stop at "." and seed 10 ``` You should now do: ```python from outlines import Generator model = generator = Generator(model) result = generator("foo", max_new_tokens=256, stop_strings=".", seed=10) ``` ================================================ FILE: docs/guide/selecting_an_inference_backend.md ================================================ This guide should provide a general overview of the available models in the [API reference](/api/models/). ## Models - [Anthropic](/api/models/anthropic) ================================================ FILE: docs/guide/vlm.md ================================================ # Vision-Language Models with Outlines This guide demonstrates how to use Outlines with vision-language models. Vision-language models can process both text and images, allowing for tasks like image captioning, visual question answering, and more. We will be using the Pixtral-12B model from Mistral to take advantage of some of its visual reasoning capabilities and a workflow to generate a multistage atomic caption. ## Setup First, we need to install the necessary dependencies. In addition to Outlines, we"ll need to install the transformers library and any specific requirements for the vision-language model we"ll be using. ```shell pip install outlines transformers torch pillow ``` ### Initializing the Model We"ll use the `outlines.from_transformers` function to initialize our vision-language model. For this function to return a vision multi-modal model we need to pass in a transformers model and a transformers processor that can handle both text and image inputs. Today we"ll be using the Pixtral model with the AutoProcessor. ```python import outlines import torch from transformers import ( AutoProcessor, LlavaForConditionalGeneration ) model_name="mistral-community/pixtral-12b" # original magnet model is able to be loaded without issue model_class=LlavaForConditionalGeneration processor_class=AutoProcessor def get_vision_model(model_name: str, model_class, processor_class): model_kwargs = { "torch_dtype": torch.bfloat16, "attn_implementation": "flash_attention_2", "device_map": "auto", } processor_kwargs = { "device": "cuda", } model = outlines.from_transformers( model_class.from_pretrained(model_name, **model_kwargs), processor_class.from_pretrained(model_name, **processor_kwargs), ) return model model = get_vision_model(model_name, model_class, processor_class) ``` ### Defining the Schema Next, we will define a schema for the output we expect from our vision multi-modal model. This schema will help structure the model's responses. We use the `outlines.Generator` object to create a generator for our schema that will then be called with our prompt and images. ```python from enum import Enum from pydantic import BaseModel, Field, confloat, constr from pydantic.types import StringConstraints, PositiveFloat from typing import List from typing_extensions import Annotated class TagType(Enum): ENTITY = "Entity" RELATIONSHIP = "Relationship" STYLE = "Style" ATTRIBUTE = "Attribute" COMPOSITION = "Composition" CONTEXTUAL = "Contextual" TECHNICAL = "Technical" SEMANTIC = "Semantic" class ImageTag(BaseModel): tag: Annotated[ constr(min_length=1, max_length=30), Field( description=( "Descriptive keyword or phrase representing the tag." ) ) ] category: TagType confidence: Annotated[ confloat(le=1.0), Field( description=( "Confidence score for the tag, between 0 (exclusive) and 1 (inclusive)." ) ) ] class ImageData(BaseModel): tags_list: List[ImageTag] = Field(..., min_items=8, max_items=20) short_caption: Annotated[str, StringConstraints(min_length=10, max_length=150)] dense_caption: Annotated[str, StringConstraints(min_length=100, max_length=2048)] image_data_generator = outlines.Generator(model, ImageData) ``` This schema defines the structure for image tags, including categories like Entity, Relationship, Style, etc., as well as short and dense captions. ### Preparing the Prompt We'll create a prompt that instructs the model on how to analyze the image and generate the structured output: ```python pixtral_instruction = """ [INST] You are a structured image analysis agent. Generate comprehensive tag list, caption, and dense caption for an image classification system. - Entity : The content of the image, including the objects, people, and other elements. - Relationship : The relationships between the entities in the image. - Style : The style of the image, including the color, lighting, and other stylistic elements. - Attribute : The most important attributes of the entities and relationships in the image. - Composition : The composition of the image, including the arrangement of elements. - Contextual : The contextual elements of the image, including the background, foreground, and other elements. - Technical : The technical elements of the image, including the camera angle, lighting, and other technical details. - Semantic : The semantic elements of the image, including the meaning of the image, the symbols, and other semantic details. { "tags_list": [ { "tag": "subject 1", "category": "Entity", "confidence": 0.98 }, { "tag": "subject 2", "category": "Entity", "confidence": 0.95 }, { "tag": "subject 1 runs from subject 2", "category": "Relationship", "confidence": 0.90 }, } [IMG][/INST] """.strip() ``` This prompt provides detailed instructions to the model on how to generate comprehensive tag lists, captions, and dense captions for image analysis. Because of the ordering of the instructions the original tag generation serves as a sort of visual grounding for the captioning task, reducing the amount of manual post processing required. It is essential to include the tag in the prompt at the location where the image will be inserted. ### Generating Structured Output Now we can use our model to generate structured output based on an input image: ```python from io import BytesIO from urllib.request import urlopen from PIL import Image def img_from_url(url): img_byte_stream = BytesIO(urlopen(url).read()) return Image.open(img_byte_stream).convert("RGB") image_url="https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg" image= img_from_url(image_url) result = image_data_generator({ "text": pixtral_instruction, "images": image }) print(result) ``` This code loads an image from a URL, passes it to our vision multi-modal model along with the instruction prompt, and generates a structured output based on the defined schema. We end up with an output like this, ready to be used for the next stage in your pipeline: ```json {"tags_list": [ { "tag": "astronaut", "category": , "confidence": 0.99 }, {"tag": "moon", "category": , "confidence": 0.98}, { "tag": "space suit", "category": , "confidence": 0.97 }, { "tag": "lunar module", "category": , "confidence": 0.95 }, { "tag": "shadow of astronaut", "category": , "confidence": 0.95 }, { "tag": "footprints in moon dust", "category": , "confidence": 0.93 }, { "tag": "low angle shot", "category": , "confidence": 0.92 }, { "tag": "human first steps on the moon", "category": , "confidence": 0.95 }], "short_caption": "First man on the Moon", "dense_caption": "The figure clad in a pristine white space suit, emblazoned with the American flag, stands powerfully on the moon's desolate and rocky surface. The lunar module, a workhorse of space engineering, looms in the background, its metallic legs sinking slightly into the dust where footprints and tracks from the mission's journey are clearly visible. The photograph captures the astronaut from a low angle, emphasizing his imposing presence against the desolate lunar backdrop. The stark contrast between the blacks and whiteslicks of lost light and shadow adds dramatic depth to this seminal moment in human achievement." } ``` ## Conclusion This guide demonstrated how Outlines enables structured output generation with vision-language models. With the techniques shown above, you can build: - **Content Management Systems**: Automatically tag and categorize visual content with structured metadata that can be directly stored in databases, enabling powerful search and filtering capabilities - **Accessibility Tools**: Generate rich, structured descriptions of images that can be adapted for different contexts - from brief alt-text to detailed scene descriptions for screen readers - **Quality Assurance Pipelines**: Validate visual content against specific criteria by extracting structured attributes and checking them against business rules ================================================ FILE: docs/index.md ================================================ --- title: Welcome to Outlines! hide: - navigation --- #
![](assets/images/logo-light-mode.svg#only-light){ width="500" } ![](assets/images/logo-dark-mode.svg#only-dark){ width="500" }
LLMs are powerful but their outputs are unpredictable. Most solutions attempt to fix bad outputs after generation using parsing, regex, or fragile code that breaks easily. Outlines guarantees structured outputs during generation — directly from any LLM. - **Works with any model** - Same code runs across OpenAI, Ollama, vLLM, and more - **Simple integration** - Just pass your desired output type: `model(prompt, output_type)` - **Guaranteed valid structure** - No more parsing headaches or broken JSON - **Provider independence** - Switch models without changing code - **Rich structure definition** - Use Json Schema, regular expressions or context-free grammars
[Get Started](guide/getting_started){ .md-button .md-button--primary } [View Examples](examples/){ .md-button } [API Reference](api_reference/){ .md-button } [GitHub](https://github.com/dottxt-ai/outlines){ .md-button }
## 🚀 Building the future of structured generation We're working with select partners to develop new interfaces to structured generation. Need XML, FHIR, custom schemas or grammars? Let's talk. Audit your schema: share one schema, we show you what breaks under generation, the constraints that fix it, and compliance rates before and after. Sign up [here](https://h1xbpbfsf0w.typeform.com/to/rtFUraA2?typeform). ## See it in action ```python from pydantic import BaseModel from typing import Literal import outlines import openai class Customer(BaseModel): name: str urgency: Literal["high", "medium", "low"] issue: str client = openai.OpenAI() model = outlines.from_openai(client, "gpt-4o") customer = model( "Alice needs help with login issues ASAP", Customer ) # ✓ Always returns valid Customer object # ✓ No parsing, no errors, no retries ``` ## Quick install ```shell pip install outlines ``` ## Features
- :material-shield-check: **Reliable** - Guaranteed schema compliance -- always valid JSON. - :material-puzzle: **Feature-rich** - Supports a large proportion of the JSON Schema spec, along with regex and context-free grammars. - :material-lightning-bolt: **Fast** - Microseconds of overhead vs seconds of retries. Compilation happens once, not every request. - :material-lightbulb: **Simple** - Outlines is a low-abstraction library. Write code the way you normally do with LLMs. No agent frameworks needed.
## Supported inference APIs, libraries & servers - [vLLM](features/models/vllm.md) - [vLLM offline](features/models/vllm_offline.md) - [Transformers](features/models/transformers.md) - [llama.cpp](features/models/llamacpp.md) - [Ollama](features/models/ollama.md) - [MLX-LM](features/models/mlxlm.md) - [SgLang](features/models/sglang.md) - [TGI](features/models/tgi.md) - [OpenAI](features/models/openai.md) - [Anthropic](features/models/anthropic.md) - [Gemini](features/models/gemini.md) - [Dottxt](features/models/dottxt.md) ## Who is using Outlines? Hundreds of organisations and the main LLM serving frameworks ([vLLM][vllm], [TGI][tgi], [LoRAX][lorax], [xinference][xinference], [SGLang][sglang]) use Outlines. Prominent companies and organizations that use Outlines include:
Organizations are included either because they use Outlines as a dependency in a public repository, or because of direct communication between members of the Outlines team and employees at these organizations. Still not convinced, read [what people say about us](community/feedback.md). And make sure to take a look at what the [community is building](community/examples.md)! ## Outlines people Outlines would not be what it is today without a community of dedicated developers: ## About .txt Outlines is built with ❤️ by [.txt](https://dottxt.co). .txt solves the critical problem of reliable structured output generation for large language models. Our [commercially-licensed libraries][dottxt-doc] ensure 100% compliance with JSON Schema, regular expressions and context-free grammars while adding only microseconds of latency. Unlike open-source alternatives, we offer superior reliability, performance, and enterprise support. ## Acknowledgements Outlines was originally developed at [@NormalComputing](https://twitter.com/NormalComputing) by [@remilouf](https://twitter.com/remilouf) and [@BrandonTWillard](https://twitter.com/BrandonTWillard). It is now maintained by [.txt](https://dottxt.co). [discord]: https://discord.gg/R9DSu34mGd [aesara]: https://github.com/aesara-devs [blackjax]: https://github.com/blackjax-devs/blackjax [pythological]: https://github.com/pythological [hy]: https://hylang.org/ [.txt]: https://dottxt.co [vllm]: https://github.com/vllm-project/vllm [tgi]: https://github.com/huggingface/text-generation-inference [lorax]: https://github.com/predibase/lorax [xinference]: https://github.com/xorbitsai/inference [sglang]: https://github.com/sgl-project/sglang/ [dottxt-doc]: https://docs.dottxt.co ================================================ FILE: docs/overrides/home.html ================================================ {#- This file overrides the home page to use HTML tooling better. -#} {% extends "main.html" %} {% block tabs %} {{ super() }}
Outlines Logo Outlines Logo

Structured text generation and robust prompting for language models

Follow us on X and Bluesky

Made with ❤️ by the team at .txt

{% endblock %} {% block content %}{% endblock %} {% block footer %}{% endblock %} ================================================ FILE: docs/overrides/main.html ================================================ {% extends "base.html" %} ================================================ FILE: docs/stylesheets/extra.css ================================================ @font-face { font-family: "Source Code Pro Custom", monospace; src: url(https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,200..900;1,200..900&display=swap); } /* Header/banner styling */ .md-header { background-color: #DFD1B6 !important; } :root > * { /* Notion-like color palette */ --md-default-fg-color: #37352f; --md-default-fg-color--light: #73706c; --md-default-fg-color--lighter: #9b9a97; --md-default-bg-color: #ffffff; --md-default-bg-color--light: #f7f6f3; --md-default-bg-color--lighter: #edece9; /* Typography */ --md-text-font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif; --md-code-font: "Source Code Pro", Consolas, "Liberation Mono", Menlo, monospace; /* Notion-like link colors */ --md-typeset-a-color: #37352f; --md-accent-fg-color: #eb5757; /* Background colors */ --md-code-bg-color: #f7f6f3; --md-code-fg-color: #eb5757; } /* Code block styling */ .highlight pre, .md-typeset pre code, .md-typeset .highlight pre, .md-typeset .highlighttable pre { background-color: #2E3440 !important; /* Nord's darkest blue (nord0) */ border-radius: 4px !important; /* Subtle rounded corners like Notion */ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1), 0 1px 3px rgba(0, 0, 0, 0.08) !important; /* Subtle shadow */ border: none !important; /* No border for cleaner look */ } /* Adjust padding from code content */ .md-code__content { padding: 1em 1.5em !important; /* Increased top/bottom padding */ } /* Style only inline code (not code blocks) */ .md-typeset :not(pre) > code { background-color: rgba(135, 131, 120, 0.15); /* Notion's exact inline code background */ color: #E35A26; /* Orange color for inline code */ border-radius: 3px; /* Subtle rounded corners */ padding: 0.2em 0.4em; /* Notion-like padding */ font-weight: 500; /* Medium weight */ font-size: 0.85em; /* Slightly smaller than body text */ border: none; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; } /* Override code block container background */ .md-typeset .highlight, .md-typeset .highlighttable { background-color: #ffffff !important; /* Match page background */ border-radius: 4px !important; /* Match code block radius */ overflow: hidden; /* Ensure child elements respect borders */ } /* Ensure proper spacing for the entire code block */ .md-typeset pre { margin: 1.5em 0 !important; } /* Style the copy button with Nord snow grey */ .md-clipboard { color: #D8DEE9 !important; /* Nord snow storm */ top: 0.75em !important; /* Lower the button more */ right: 0.5em !important; /* Add some spacing from right edge */ } .md-clipboard:hover { color: #ECEFF4 !important; /* Brighter snow storm on hover */ } .md-clipboard:after { color: #D8DEE9 !important; } /* Style scrollbars with Nord colors */ .md-typeset pre::-webkit-scrollbar { height: 0.4rem; width: 0.4rem; } .md-typeset pre::-webkit-scrollbar-track { background-color: #3B4252; /* Nord1 */ } .md-typeset pre::-webkit-scrollbar-thumb { background-color: #4C566A; /* Nord3 */ border-radius: 0.2rem; } .md-typeset pre::-webkit-scrollbar-thumb:hover { background-color: #D8DEE9; /* Nord snow storm on hover */ } /* Firefox scrollbar styling */ .md-typeset pre { scrollbar-width: thin; scrollbar-color: #4C566A #3B4252; } /* Notion-like visual hierarchy */ .md-typeset h1 { font-weight: 600; font-size: 2.5rem; line-height: 1.2; margin-top: 2.5rem; margin-bottom: 0.75rem; color: #37352f; letter-spacing: -0.01em; } .md-typeset h2 { font-weight: 600; font-size: 1.875rem; line-height: 1.3; margin-top: 2.5rem; margin-bottom: 0.75rem; color: #37352f; letter-spacing: -0.005em; border: none; padding: 0; } .md-typeset h3 { font-weight: 500; font-size: 1.5rem; line-height: 1.3; margin-top: 2rem; margin-bottom: 0.75rem; color: #37352f; letter-spacing: -0.003em; } .md-typeset h4 { font-weight: 500; font-size: 1.25rem; line-height: 1.3; margin-top: 1.75rem; margin-bottom: 0.5rem; color: #37352f; } .md-typeset h5 { font-weight: 500; font-size: 1rem; line-height: 1.4; margin-top: 1.5rem; margin-bottom: 0.5rem; color: #37352f; } .md-typeset h6 { font-weight: 500; font-size: 0.875rem; line-height: 1.4; margin-top: 1.25rem; margin-bottom: 0.5rem; color: #37352f; text-transform: none; letter-spacing: normal; } /* Notion-like paragraph styling */ .md-typeset p { line-height: 1.5; margin-bottom: 1em; /* More whitespace */ color: #37352f; font-weight: 400; } /* Reduce font size for navigation */ .md-nav__link { font-size: 0.6875rem; /* 11px */ } /* Reduce font size for TOC */ .md-nav--secondary .md-nav__link { font-size: 0.6875rem; /* 11px */ } /* Reduce spacing between navigation items */ .md-nav__item { margin: 0; } .md-nav__link { padding-top: 0; padding-bottom: 0; line-height: 1.2; /* Tighter line height */ } /* Make navigation sections more compact */ .md-nav__title { line-height: 1.2; padding: 0.2rem 0.5rem; margin-bottom: 0.2rem; } /* Reduce TOC line height for compactness */ .md-nav--secondary .md-nav__link { line-height: 1.2; } /* Notion-like list styling */ .md-typeset ul, .md-typeset ol { margin-top: 0.25em; /* Reduced top margin to sit closer to text */ margin-bottom: 1em; /* Match paragraph spacing */ color: #37352f; } .md-typeset li { line-height: 1.5; margin-bottom: 0.15rem; /* Slightly more spacing between list items */ font-weight: 400; } /* Notion-style links */ .md-typeset a { color: #37352f; text-decoration: underline; text-decoration-color: rgba(55, 53, 47, 0.4); text-underline-offset: 2px; transition: text-decoration-color 0.1s ease; } .md-typeset a:hover { text-decoration-color: rgba(55, 53, 47, 0.8); background-color: rgba(55, 53, 47, 0.04); } /* Make important elements stand out */ .md-typeset strong { font-weight: 600; color: #37352f; } /* Better spacing for code blocks in relation to text */ .md-typeset pre { margin: 1.5em 0 !important; /* More whitespace around code blocks */ } /* Notion-style tables */ .md-typeset table { border-collapse: collapse; margin: 1rem 0; } .md-typeset table th { font-weight: 600; background-color: #f7f6f3; color: #37352f; border: 1px solid #e1e0dd; padding: 0.5rem 0.75rem; } .md-typeset table td { border: 1px solid #e1e0dd; padding: 0.5rem 0.75rem; } /* Notion-style blockquotes */ .md-typeset blockquote { border-left: 3px solid #37352f; padding-left: 1rem; margin: 1rem 0; color: #37352f; background: transparent; } /* Page styling */ .md-content { background-color: #ffffff; } .md-sidebar { background-color: #fbfbfa; } /* Remove shadows for cleaner look */ .md-header, .md-tabs { box-shadow: none; border-bottom: 1px solid #e1e0dd; } /* Admonition styling with custom palette */ .md-typeset .admonition, .md-typeset details { border-radius: 4px; border: none; box-shadow: none; font-size: 0.6875rem; /* Very small font size - 11px */ padding: 0.75rem; margin: 1rem 0; } /* Note/Info - Blue */ .md-typeset .admonition.note, .md-typeset details.note, .md-typeset .admonition.info, .md-typeset details.info { background-color: rgba(127, 154, 207, 0.1) !important; border-left: 4px solid #7F9ACF !important; } .md-typeset .note > .admonition-title, .md-typeset .note > summary, .md-typeset .info > .admonition-title, .md-typeset .info > summary { background-color: rgba(127, 154, 207, 0.2) !important; border-left: none !important; } /* Additional specificity for info type and custom types that should be blue */ .md-typeset .admonition.admonition-info, .md-typeset details.details-info, .md-typeset .admonition.installation, .md-typeset .admonition.example, .md-typeset .admonition.abstract, .md-typeset .admonition.summary, .md-typeset .admonition.tldr { background-color: rgba(127, 154, 207, 0.1) !important; border-left: 4px solid #7F9ACF !important; } /* Titles for custom blue admonitions */ .md-typeset .installation > .admonition-title, .md-typeset .example > .admonition-title, .md-typeset .abstract > .admonition-title, .md-typeset .summary > .admonition-title, .md-typeset .tldr > .admonition-title { background-color: rgba(127, 154, 207, 0.2) !important; border-left: none !important; } /* Warning/Caution - Yellow */ .md-typeset .admonition.warning, .md-typeset details.warning, .md-typeset .admonition.caution, .md-typeset details.caution { background-color: rgba(189, 147, 47, 0.1); border-left: 4px solid #BD932F; } .md-typeset .warning > .admonition-title, .md-typeset .warning > summary, .md-typeset .caution > .admonition-title, .md-typeset .caution > summary { background-color: rgba(189, 147, 47, 0.2); border-left: none; } /* Danger/Error - Orange */ .md-typeset .admonition.danger, .md-typeset details.danger, .md-typeset .admonition.error, .md-typeset details.error { background-color: rgba(227, 90, 38, 0.1); border-left: 4px solid #E35A26; } .md-typeset .danger > .admonition-title, .md-typeset .danger > summary, .md-typeset .error > .admonition-title, .md-typeset .error > summary { background-color: rgba(227, 90, 38, 0.2); border-left: none; } /* Success/Tip/Hint - Green */ .md-typeset .admonition.success, .md-typeset details.success, .md-typeset .admonition.tip, .md-typeset details.tip, .md-typeset .admonition.hint, .md-typeset details.hint { background-color: rgba(166, 180, 163, 0.1); border-left: 4px solid #A6B4A3; } .md-typeset .success > .admonition-title, .md-typeset .success > summary, .md-typeset .tip > .admonition-title, .md-typeset .tip > summary, .md-typeset .hint > .admonition-title, .md-typeset .hint > summary { background-color: rgba(166, 180, 163, 0.2); border-left: none; } /* General admonition title styling */ .md-typeset .admonition-title, .md-typeset summary { font-weight: 600; font-size: 0.6875rem; /* Very small - 11px */ padding: 0.5rem 0.75rem; margin: -0.75rem -0.75rem 0.5rem -0.75rem; border-radius: 4px 4px 0 0; } /* Ensure consistent icon styling */ .md-typeset .admonition > .admonition-title::before, .md-typeset details > summary::before { font-size: 1rem; margin-right: 0.5rem; } ================================================ FILE: environment.yml ================================================ # To use: # # $ conda env create -f environment.yml # `mamba` works too for this command # $ conda activate dottxt-ai # name: dottxt-ai channels: - conda-forge - huggingface dependencies: - python==3.10.0 - jinja2 - numpy - pydantic - scipy - pytest - pre-commit - referencing - jsonschema - transformers - pip - pip: - -e ".[test]" ================================================ FILE: examples/babyagi.py ================================================ """This example is a simplified translation of BabyAGI. It currently does not use the vector store retrieval The original repo can be found at https://github.com/yoheinakajima/babyagi """ from collections import deque from typing import Deque, List from openai import OpenAI import outlines from outlines import Template model = outlines.from_openai(OpenAI(), "gpt-4o-mini") complete = outlines.Generator(model) ## Load the prompts perform_task_ppt = Template.from_file("prompts/babyagi_perform_task.txt") create_tasks_ppt = Template.from_file("prompts/babyagi_create_task.txt") prioritize_tasks_ppt = Template.from_file("prompts/babyagi_prioritize_task.txt") def create_tasks_fmt(result: str) -> List[str]: new_tasks = result.split("\n") task_list = [] for task in new_tasks: parts = task.strip().split(".", 1) if len(parts) == 2: task_list.append(parts[1].strip()) return task_list def prioritize_tasks_fmt(result: str): new_tasks = result.split("\n") task_list: Deque = deque([]) for task in new_tasks: parts = task.strip().split(".", 1) if len(parts) == 2: task_id = int(parts[0].strip()) task_name = parts[1].strip() task_list.append({"task_id": task_id, "task_name": task_name}) return task_list objective = "Becoming rich while doing nothing." first_task = { "task_id": 1, "task_name": "Find a repeatable, low-maintainance, scalable business.", } next_task_id = 1 task_list = deque([first_task]) def one_cycle(objective: str, task_list, next_task_id: int): """One BabyAGI cycle. It consists in executing the highest-priority task, creating some new tasks given the result, and re-priotizing the tasks. Parameters ---------- objective The overall objective of the session. task_list The current list of tasks to perform. task_id_counter The current task id. """ task = task_list.popleft() prompt = perform_task_ppt(objective=objective, task=task) result = complete(prompt) prompt = create_tasks_ppt( objective=objective, task=first_task["task_name"], result=result, previous_tasks=[first_task["task_name"]], ) new_tasks = complete(prompt) new_tasks = create_tasks_fmt(new_tasks) for task in new_tasks: next_task_id += 1 task_list.append({"task_id": next_task_id, "task_name": task}) prompt = prioritize_tasks_ppt( objective=objective, tasks=[task["task_name"] for task in task_list], next_task_id=next_task_id, ) prioritized_tasks = complete(prompt) prioritized_tasks = prioritize_tasks_fmt(prioritized_tasks) return task, result, prioritized_tasks, next_task_id # Let's run it for 5 cycles to see how it works without spending a fortune. for _ in range(5): print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m") for t in task_list: print(" • " + str(t["task_name"])) task, result, task_list, next_task_id = one_cycle( objective, task_list, next_task_id ) print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m") print(task) print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m") print(result) ================================================ FILE: examples/beam-cloud/README.md ================================================ ## Deploy Outlines on Beam 1. Create an account [here](https://beam.cloud) and install the Beam SDK 2. Download the `app.py` file to your computer 3. Deploy it as a serverless API by running: `beam deploy app.py:predict` ================================================ FILE: examples/beam-cloud/app.py ================================================ from typing import Literal from beam import Image, endpoint, env if env.is_remote(): import outlines # Pre-load models when the container first starts def load_models(): from transformers import AutoModelForCausalLM, AutoTokenizer import outlines model = outlines.models.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), ) return model @endpoint( name="outlines-serverless", gpu="A10G", cpu=1, memory="16Gi", on_start=load_models, image=Image().add_python_packages( ["outlines", "torch", "transformers", "accelerate"] ), ) def predict(context, **inputs): default_prompt = """You are a sentiment-labelling assistant. Is the following review positive or negative? Review: This restaurant is just awesome! """ prompt = inputs.get("prompt", default_prompt) # Unpack cached model from context model = context.on_start_value # Inference generator = outlines.Generator(model, Literal["Positive", "Negative"]) answer = generator(prompt) return {"answer": answer} ================================================ FILE: examples/bentoml/.bentoignore ================================================ __pycache__/ *.py[cod] *$py.class .ipynb_checkpoints venv/ ================================================ FILE: examples/bentoml/bentofile.yaml ================================================ service: "service:Outlines" labels: owner: bentoml-team stage: demo include: - "*.py" python: requirements_txt: "./requirements.txt" lock_packages: false ================================================ FILE: examples/bentoml/import_model.py ================================================ import bentoml MODEL_ID = "mistralai/Mistral-7B-v0.1" BENTO_MODEL_TAG = MODEL_ID.lower().replace("/", "--") def import_model(model_id, bento_model_tag): import torch from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16, low_cpu_mem_usage=True, ) with bentoml.models.create(bento_model_tag) as bento_model_ref: tokenizer.save_pretrained(bento_model_ref.path) model.save_pretrained(bento_model_ref.path) if __name__ == "__main__": import_model(MODEL_ID, BENTO_MODEL_TAG) ================================================ FILE: examples/bentoml/requirements.txt ================================================ bentoml>=1.2.11 outlines==0.0.37 transformers==4.38.2 datasets==2.18.0 accelerate==0.27.2 ================================================ FILE: examples/bentoml/service.py ================================================ import typing as t import bentoml from import_model import BENTO_MODEL_TAG, MODEL_ID DEFAULT_SCHEMA = """{ "title": "Character", "type": "object", "properties": { "name": { "title": "Name", "maxLength": 10, "type": "string" }, "age": { "title": "Age", "type": "integer" }, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": { "title": "Strength", "type": "integer" } }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string" }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string" } } }""" @bentoml.service( traffic={ "timeout": 300, }, resources={ "gpu": 1, "gpu_type": "nvidia-l4", }, ) class Outlines: bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG) def __init__(self) -> None: import torch from transformers import AutoModelForCausalLM, AutoTokenizer import outlines self.model = outlines.from_transformers( AutoTokenizer.from_pretrained(MODEL_ID), AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16, low_cpu_mem_usage=True, ) ) @bentoml.api async def generate( self, prompt: str = "Give me a character description.", json_schema: t.Optional[str] = DEFAULT_SCHEMA, ) -> t.Dict[str, t.Any]: import outlines generator = outlines.Generator(self.model, outlines.json_schema(json_schema)) character = generator(prompt) return character ================================================ FILE: examples/cerebrium/cerebrium.toml ================================================ [cerebrium.deployment] name = "cerebrium" python_version = "3.11" cuda_version = "12" include = "[./*, main.py, cerebrium.toml]" exclude = "[.*]" shell_commands = [] [cerebrium.hardware] cpu = 2 memory = 14.0 gpu = "AMPERE A10" gpu_count = 1 provider = "aws" region = "us-east-1" [cerebrium.scaling] min_replicas = 0 max_replicas = 5 cooldown = 60 [cerebrium.dependencies.pip] outline = "==0.0.37" transformers = "==4.38.2" datasets = "==2.18.0" accelerate = "==0.27.2" ================================================ FILE: examples/cerebrium/main.py ================================================ from transformers import AutoModelForCausalLM, AutoTokenizer import outlines model = outlines.from_transformers( AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2"), AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2"), ) schema = { "title": "Character", "type": "object", "properties": { "name": {"title": "Name", "maxLength": 10, "type": "string"}, "age": {"title": "Age", "type": "integer"}, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": {"title": "Strength", "type": "integer"}, }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string", }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string", }, }, } def generate( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): character = model( f"[INST]Give me a character description. Describe {prompt}.[/INST]", outlines.json_schema(schema), ) print(character) return character ================================================ FILE: examples/dating_profile.py ================================================ from dataclasses import dataclass from enum import Enum import torch import transformers from pydantic import BaseModel, conlist import outlines from outlines import Template class QuestionChoice(str, Enum): A = "The key to my heart is" B = "The first item on my bucket list is" C = "Perks of dating me" D = "Message me if you also love" E = "People would describe me as" F = "I can beat you in a game of" @dataclass class QuestionAnswer: question: QuestionChoice answer: str class DatingProfile(BaseModel): # It is possible put length constraints on these strings using constr- however, this appears to dramatically increase the generation time # This may be resolved in the future with this PR: https://github.com/dottxt-ai/outlines/pull/272 bio: str job: str # Ignore mypy checks here because it still doesn't support conlist or constr: https://github.com/pydantic/pydantic/issues/975 interests: conlist(str, min_length=1, max_length=5) # type: ignore qna1: QuestionAnswer qna2: QuestionAnswer @dataclass class Example: description: str profile: DatingProfile samples: list[Example] = [ Example( description="I'm an author and former professional soccer player living in Seattle who publishes popular fiction books. A typical day for me starts by hanging out with my cat, drinking a coffee, and reading as much as I can in a few hours. Then, I'll prepare a quick smoothie before starting to write for a few hours, take a break with soccer or running a few miles, and finally meet friends for dinner at a new, hip restaurant in the evening. Sometimes we go axe-throwing afterwards, or play poker, or watch a comedy show, or visit a dive bar. On my vacations, I travel extensively to countries South America, Europe, and Asia, with the goal of visiting them all!", profile=DatingProfile( bio="Adventurer, dreamer, author, and soccer enthusiast. Life’s too short to waste time so I make the most of each day by exploring new places and playing with my friends on the pitch. What’s your favorite way to get out and have fun?", job="Famous Soccer Player -> Famous Author", interests=["Soccer", "Travel", "Friends", "Books", "Fluffy Animals"], qna1=QuestionAnswer( question=QuestionChoice.B, answer="swim in all seven oceans!" ), qna2=QuestionAnswer( question=QuestionChoice.E, answer="fun-loving, adventurous, and a little bit crazy", ), ), ), Example( description="I run my company and build houses for a living. I'm a big fan of the outdoors and love to go hiking, camping, and fishing. I don't like video games, but do like to watch movies. My love language is home-cooked food, and I'm looking for someone who isn't afraid to get their hands dirty.", profile=DatingProfile( bio="If you're looking for a Montana man who loves to get outdoors and hunt, and who's in-tune with his masculinity then I'm your guy!", job="House Construction Manager / Entrepreneur", interests=["Hunting", "Hiking", "The outdoors", "Home-cooked food"], qna1=QuestionAnswer(question=QuestionChoice.A, answer="food made at home"), qna2=QuestionAnswer( question=QuestionChoice.C, answer="having a man in your life who can fix anything", ), ), ), Example( description="I run my own Youtube channel with 10M subscribers. I love working with kids, and my audience skews pretty young too. In my free time, I play Fortnite and Roblox. I'm looking for someone who is also a gamer and likes to have fun. I'm learning Japanese in my free time as well as how to cook.", profile=DatingProfile( bio="Easy on the eyes (find me on Youtube!) and great with kids. What more do you need?", job="Youtuber 10M+ subscribers", interests=["Kids", "Gaming", "Japanese"], qna1=QuestionAnswer(question=QuestionChoice.D, answer="anime and gaming!"), qna2=QuestionAnswer(question=QuestionChoice.F, answer="Fortnite, gg ez"), ), ), ] # Below requires ~13GB of GPU memory # https://huggingface.co/mosaicml/mpt-7b-8k-instruct # Motivation: Reasonably large model that fits on a single GPU and has been fine-tuned for a larger context window model_name = "mosaicml/mpt-7b-8k-instruct" model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(model_name), transformers.AutoTokenizer.from_pretrained(model_name), ) new_description = "I'm a laid-back lawyer who spends a lot of his free-time gaming. I work in a corporate office, but ended up here after the start-up I cofounded got acquired, so still play ping pong with my cool coworkers every day. I have a bar at home where I make cocktails, which is great for entertaining friends. I secretly like to wear suits and get a new one tailored every few months. I also like weddings because I get to wear those suits, and it's a good excuse for a date. I watch the latest series because I'm paying, with my hard-earned money, for every streaming service." dating_profile_prompt = Template.from_file("prompts/dating_profile.txt") prompt = dating_profile_prompt(description=new_description, examples=samples) profile = model(prompt, outlines.json_schema(DatingProfile), max_tokens=500) # type: ignore print(profile) # Sample generated profiles """ { "bio": "I'm an ambitious lawyer with a casual and fashionable style. I love games and sports, but my true passion is preparing refreshing cocktails at home and dressing to the nines at weddings. I'm currently looking for a woman to show a good time to and get a kiss on the opulent suit I just had made. Send resumà € to this inbox.", "job": "Lawyer", "interests": [ "Stylish guys", "Gaming", "Ping pong", "Cocktails", "Weddings" ], "qna1": { "question": "The first item on my bucket list is", "answer": "be married and have a family." }, "qna2": { "question": "People would describe me as", "answer": "charming, stylish, and funny." } } """ """ { "bio": "I’m a sexy lawyer with time on my hands. I love to game and play ping pong, but the real reason you should swipe to the right is because I look great in a suit. Who doesn’t love a man in a suit? Just saying. Send me a message if you think it’s time to take your dating life to the next level.", "job": "Lawyer", "interests": [ "Gaming", "Ping Pong", "Tailored Suits", "Weddings", "Streaming Services" ], "qna1": { "question": "The first item on my bucket list is", "answer": "simulate space but stay alive for as long as possible" }, "qna2": { "question": "People would describe me as", "answer": "easy-going, a little nerdy but with a mature essence" } } """ ================================================ FILE: examples/llamacpp_example.py ================================================ from enum import Enum from pydantic import BaseModel, constr from llama_cpp import Llama import outlines class Weapon(str, Enum): sword = "sword" axe = "axe" mace = "mace" spear = "spear" bow = "bow" crossbow = "crossbow" class Armor(str, Enum): leather = "leather" chainmail = "chainmail" plate = "plate" class Character(BaseModel): name: constr(max_length=10) age: int armor: Armor weapon: Weapon strength: int if __name__ == "__main__": # curl -L -o mistral-7b-instruct-v0.2.Q5_K_M.gguf https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf model = outlines.from_llamacpp(Llama("./mistral-7b-instruct-v0.2.Q5_K_M.gguf")) # Construct structured sequence generator generator = outlines.Generator(model, Character) # Draw a sample seed = 789005 prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:" sequence = generator(prompt, seed=seed, max_tokens=512) print(sequence) ================================================ FILE: examples/llamacpp_processor.py ================================================ from enum import Enum from llama_cpp import Llama, LogitsProcessorList from pydantic import BaseModel, constr from outlines.processors import JSONLogitsProcessor from outlines.models.llamacpp import LlamaCppTokenizer class Weapon(str, Enum): sword = "sword" axe = "axe" mace = "mace" spear = "spear" bow = "bow" crossbow = "crossbow" class Armor(str, Enum): leather = "leather" chainmail = "chainmail" plate = "plate" class Character(BaseModel): name: constr(max_length=10) age: int armor: Armor weapon: Weapon strength: int if __name__ == "__main__": llama = Llama("./phi-2.Q4_K_M.gguf") tokenizer = LlamaCppTokenizer(llama) prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:" logits_processor = JSONLogitsProcessor(Character, tokenizer, tensor_library_name="numpy") json_str = llama.create_completion( prompt, top_k=40, top_p=0.95, temperature=0.7, max_tokens=100, logits_processor=LogitsProcessorList([logits_processor]), )["choices"][0]["text"] print(json_str) ================================================ FILE: examples/math_generate_code.py ================================================ """Example from https://dust.tt/spolu/a/d12ac33169""" import openai import outlines from outlines import Template examples = [ {"question": "What is 37593 * 67?", "code": "37593 * 67"}, { "question": "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?", "code": "(16-3-4)*2", }, { "question": "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?", "code": " 2 + 2/2", }, ] question = "Carla is downloading a 200 GB file. She can download 2 GB/minute, but 40% of the way through the download, the download fails. Then Carla has to restart the download from the beginning. How load did it take her to download the file in minutes?" answer_with_code_prompt = Template.from_string( """ {% for example in examples %} QUESTION: {{example.question}} CODE: {{example.code}} {% endfor %} QUESTION: {{question}} CODE:""" ) def execute_code(code): result = eval(code) return result prompt = answer_with_code_prompt(question=question, examples=examples) model = outlines.from_openai(openai.OpenAI(), "gpt-4o-mini") answer = model(prompt) result = execute_code(answer) print(f"It takes Carla {result:.0f} minutes to download the file.") ================================================ FILE: examples/meta_prompting.py ================================================ """Meta-prompting examples. References ---------- .. [0] "Prompting is programming: A Query Language for Large Language Models" https://arxiv.org/abs/2212.06094 .. [1] "Prompt programming For Large Language Models: Beyond the Few-Shot Paradigm" https://arxiv.org/abs/2102.07350. """ import argparse import openai import outlines from outlines import Template client = openai.OpenAI() def split_into_steps(question, model_name: str): solve = Template.from_string( """{{question}} Rephrase : : as a true or false statement, identify an Object, relationship and subject """ ) model = outlines.from_openai(client, model_name) prompt = solve(question=question) answer = model(prompt, max_tokens=500) prompt += ( answer + "\n what is the only option that displays the same type of relationship as : :?" ) answer = model(prompt, max_tokens=500) completed = prompt + answer return completed def fill_in_the_blanks(question, model_name: str): determine_goal = Template.from_string( """{{question}} In order to solve this problem, we will analyze each of the options and determine """ ) solve = Template.from_string("""{{memory}}. Let's begin.""") model = outlines.from_openai(client, model_name) prompt = determine_goal(question=question) answer = model(prompt, stop=["."]) prompt = solve(memory=prompt + answer) answer = model(prompt, max_tokens=500) completed = prompt + answer return completed def ask_an_expert(question, model_name: str): find_expert = Template.from_string( """ {{question}} I entered my question into the Expert Generator \ and waited. The Expert Generator will render a \ simulation of an expert to answer my question. \ The expert could be anyone, dead or alive, real \ or fictional; the machine will find the person \ most qualified to answer the question. For this \ question in particular, the expert must be someone \ who has thought a lot about the problem of \ artificial intelligence and its alignment. \ The Expert Generator beeped, indicating that it has \ found the most qualified expert. The name displayed \ on the screen: " """ ) get_answer = Template.from_string( """ {{memory}}". I am ready to ask my question. "{{expert}}" I say, {{question}} """ ) model = outlines.from_openai(client, model_name) prompt = find_expert(question=question) expert = model(prompt, stop=['"']) prompt = get_answer(question=question, expert=expert, memory=prompt+expert) answer = model(prompt, max_tokens=500) completed = prompt + answer return completed def ask_an_expert_simple(question, model_name: str): find_expert = Template.from_string( """ Q: {{question}} A: A good person to answer this question would be """ ) get_answer = Template.from_string( """ {{memory}}. For instance, {{expert}} would answer """ ) model = outlines.from_openai(client, model_name) prompt = find_expert(question=question) expert = model(prompt, stop=["\n", "."]) prompt = get_answer(expert=expert, memory=prompt+expert) answer = model(prompt, max_tokens=500) completed = prompt + answer return completed def run_example(model_fn, question, model_name): completed = model_fn(question, model_name) print("\n-----------------------") print(f"{completed}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run the Meta Prompting examples") parser.add_argument( "--model", type=str, default="gpt-4o-mini", help="The Large Language Model to use to run the examples.", ) args = parser.parse_args() math_q = "f(x) = x*x. What is f(f(3))?" sat_q = """ BRAGGART :: MODESTY A) FLEDGLING : EXPERIENCE B) EMBEZZLER : GREED C) WALLFLOWER : TIMIDITY D) INVALID : MALADY E) CANDIDATE : AMBITION """ alignment_q = "What should humankind do to ensure that artificial general intelligence is aligned?" meaning_q = "What is the meaning of life?" run_example(split_into_steps, math_q, args.model) run_example( split_into_steps, sat_q.lower(), args.model ) # gpt>3.5 usually gets this one right run_example(fill_in_the_blanks, sat_q, args.model) run_example(ask_an_expert, alignment_q, args.model) run_example(ask_an_expert_simple, meaning_q, args.model) ================================================ FILE: examples/modal_example.py ================================================ import modal app = modal.App(name="outlines-app") outlines_image = modal.Image.debian_slim(python_version="3.11").pip_install( "outlines==1.0.0", "transformers==4.38.2", "datasets==2.18.0", "accelerate==0.27.2", ) def import_model(): from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "mistralai/Mistral-7B-Instruct-v0.2" _ = AutoTokenizer.from_pretrained(model_id) _ = AutoModelForCausalLM.from_pretrained(model_id) outlines_image = outlines_image.run_function(import_model) schema = """{ "title": "Character", "type": "object", "properties": { "name": { "title": "Name", "maxLength": 10, "type": "string" }, "age": { "title": "Age", "type": "integer" }, "armor": {"$ref": "#/definitions/Armor"}, "weapon": {"$ref": "#/definitions/Weapon"}, "strength": { "title": "Strength", "type": "integer" } }, "required": ["name", "age", "armor", "weapon", "strength"], "definitions": { "Armor": { "title": "Armor", "description": "An enumeration.", "enum": ["leather", "chainmail", "plate"], "type": "string" }, "Weapon": { "title": "Weapon", "description": "An enumeration.", "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], "type": "string" } } }""" @app.function(image=outlines_image, gpu="A100-40GB") def generate( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): import outlines from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "mistralai/Mistral-7B-Instruct-v0.2" model = outlines.from_transformers( tokenizer=AutoTokenizer.from_pretrained(model_id), model=AutoModelForCausalLM.from_pretrained(model_id, device="cuda"), ) character = model( f"[INST]Give me a character description. Describe {prompt}.[/INST]", outlines.json_schema(schema), ) print(character) @app.local_entrypoint() def main( prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", ): generate.remote(prompt) ================================================ FILE: examples/pick_odd_one_out.py ================================================ """Chain-of-thought prompting for Odd one out classification. Example taken from the LQML library [1]_. References ---------- .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). Prompting Is Programming: A Query Language For Large Language Models. arXiv preprint arXiv:2212.06094. """ import json import openai import outlines from outlines import Generator from outlines.types import JsonSchema build_ooo_prompt = outlines.Template.from_file("prompts/pick_odd_one_out.txt") options = ["sea", "mountains", "plains", "sock"] options_schema = JsonSchema({ "type": "object", "properties": { "result": { "type": "string", "enum": options } }, "required": ["result"] }) model = outlines.from_openai(openai.OpenAI(), "gpt-4o-mini") gen_text = Generator(model) gen_choice = Generator(model, options_schema) prompt = build_ooo_prompt(options=options) reasoning = gen_text(prompt, stop=["Pick the odd word", "So the odd one"]) prompt += reasoning raw_result = gen_choice(prompt) result = json.loads(raw_result)["result"] prompt += result print(result) ================================================ FILE: examples/prompts/babyagi_create_task.txt ================================================ Objective: {{ objective }} Current Task: {{ task }} Result: {{ result }} Previous Tasks: {{ previous_tasks }} Based on the result, create a list of new tasks that will help achieve the objective. Please provide the tasks in the following format: 1. [Task description] 2. [Task description] ================================================ FILE: examples/prompts/babyagi_perform_task.txt ================================================ Objective: {{ objective }} Task: {{ task }} Please perform the task and provide a concise result in the following format: Result: [Your concise result here] ================================================ FILE: examples/prompts/babyagi_prioritize_task.txt ================================================ Tasks: {{ tasks }} Next Task ID: {{ next_task_id }} Please prioritize the tasks based on their importance and urgency to achieve the objective. Provide the prioritized tasks in the following format: 1. [Task ID]. [Task description] 2. [Task ID]. [Task description] ================================================ FILE: examples/prompts/dating_profile.txt ================================================ You are a world-renowned matchmaker who understands the modern dating market. Your job is to generate dating app profiles for male clients interested in women based on a provided description. The profiles should be authentic, show off their strengths, and maximize their likelihood of getting matches on dating apps. Here are some examples of past clients that you have successfully created profiles for: {% for example in examples %} Description: {{ example.description }} Profile: {{ example.profile }} {% endfor %} Here is the new client who you need to create a profile for: Description: {{ description }} Profile: ================================================ FILE: examples/prompts/pick_odd_one_out.txt ================================================ Pick the odd word out: skirt, dress, pen, jacket. skirt is clothing, dress is clothing, pen is an object, jacket is clothing. So the odd one is pen. Pick the odd word out: Spain, France, German, England, Singapore. Spain is a country, France is a country, German is a language, ... So the odd one is German. Pick the odd word out: {{ options | join(", ") }}. ================================================ FILE: examples/prompts/self_consistency.txt ================================================ {% for example in examples %} Q: {{ example.question }} A: {{ example.answer }} {% endfor %} Q: {{ question }} A: ================================================ FILE: examples/react.py ================================================ """ReAct This example was inspired by the LQML library [1]_. The ReAct framework was first developed in [2]_ and augments Chain-of-Thought prompting with the ability for the model to query external sources. References ---------- .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). Prompting Is Programming: A Query Language For Large Language Models. arXiv preprint arXiv:2212.06094. .. [2] Yao, S., Zhao, J., Yu, D., Du, N., Shafran, I., Narasimhan, K., & Cao, Y. (2022). React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629. """ import json import requests # type: ignore from openai import OpenAI import outlines from outlines import Generator, Template from outlines.types import JsonSchema build_reAct_prompt = Template.from_string( """What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into? Tho 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ... Act 2: Search 'Colorado orogeny' Obs 2: The Colorado orogeny was an episode of mountain building (an orogeny) ... Tho 3: It does not mention the eastern sector. So I need to look up eastern sector. ... Tho 4: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft. Act 5: Finish '1,800 to 7,000 ft' {{ question }} """ ) add_mode = Template.from_string( """{{ prompt }} {{ mode }} {{ i }}: {{ result }} """ ) def search_wikipedia(query: str): url = f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={query}&origin=*" response = requests.get(url) page = response.json()["query"]["pages"] return ".".join(list(page.values())[0]["extract"].split(".")[:2]) prompt = build_reAct_prompt(question="Where is Apple Computers headquarted? ") model = outlines.from_openai(OpenAI(), "gpt-4o-mini") # Define JSON schemas for mode and action mode_schema = JsonSchema({ "type": "object", "properties": { "result": { "type": "string", "enum": ["Tho", "Act"] } }, "required": ["result"] }) action_schema = JsonSchema({ "type": "object", "properties": { "result": { "type": "string", "enum": ["Search", "Finish"] } }, "required": ["result"] }) mode_generator = Generator(model, mode_schema) action_generator = Generator(model, action_schema) text_generator = Generator(model) for i in range(1, 10): mode_output = mode_generator(prompt, max_tokens=128) mode = json.loads(mode_output)["result"] # Extract the result from the JSON output prompt = add_mode(i=i, mode=mode, result="", prompt=prompt) if mode == "Tho": thought = text_generator(prompt, stop="\n", max_tokens=128) prompt += f"{thought}" elif mode == "Act": action_output = action_generator(prompt, max_tokens=128) action = json.loads(action_output)["result"] # Extract the result from the JSON output prompt += f"{action} '" subject = text_generator(prompt, stop=["'"], max_tokens=128) # Apple Computers headquartered subject = " ".join(subject.split()[:2]) prompt += f"{subject}'" if action == "Search": result = search_wikipedia(subject) prompt = add_mode(i=i, mode="Obs", result=result, prompt=prompt) else: break print(prompt) ================================================ FILE: examples/sampling.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "id": "62129e1a-e9de-454e-a714-35ccbcf0b518", "metadata": {}, "outputs": [], "source": [ "#OK\n", "import functools as ft\n", "import re\n", "\n", "import numpy as np\n", "import matplotlib.pylab as plt\n", "import openai\n", "\n", "import outlines" ] }, { "cell_type": "code", "execution_count": 13, "id": "b20aafe8-b7a3-4df4-878f-b48b74e131df", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "env: OPENAI_API_KEY=# you key here\n" ] } ], "source": [ "%env OPENAI_API_KEY= # you key here" ] }, { "cell_type": "markdown", "id": "2a3514d6-d5d7-46e9-9b69-1251d337e094", "metadata": {}, "source": [ "In this example we will look at completion results for questions similar to those in the GSM8K dataset, using few-shots prompts with 5 examples. We first use `outlines.Template` to build the few-shot prompt. Outlines uses the Jinja2 templating engine to render the object when the function is called with the variables' values; it thus allows you to build complex prompts very easily." ] }, { "cell_type": "code", "execution_count": 3, "id": "ffe8bb11-6b51-4fe7-bfb3-c62556a60db8", "metadata": {}, "outputs": [], "source": [ "examples = [\n", " {\n", " \"question\": \"There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?\",\n", " \"answer\": \"We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\",\n", " },\n", " {\n", " \"question\": \"If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?\",\n", " \"answer\": \"There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\",\n", " },\n", " {\n", " \"question\": \"Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?\",\n", " \"answer\": \"Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\",\n", " },\n", " {\n", " \"question\": \"Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?\",\n", " \"answer\": \"Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\",\n", " },\n", " {\n", " \"question\": \"Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?\",\n", " \"answer\": \"He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\",\n", " },\n", " {\n", " \"question\": \"There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?\",\n", " \"answer\": \"There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\",\n", " },\n", " {\n", " \"question\": \"Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?\",\n", " \"answer\": \"Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\",\n", " },\n", " {\n", " \"question\": \"Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\",\n", " \"answer\": \"She bought 5 bagels for $3 each. This means she spent 5\",\n", " },\n", "]\n", "\n", "\n", "few_shot_prompt = outlines.Template.from_string(\n", " \"\"\"\n", " {% for example in examples %}\n", " Q: {{ example.question }}\n", " A: {{ example.answer }}\n", " {% endfor %}\n", " Q: {{ question }}\n", " A:\n", " \"\"\"\n", ")\n", "\n", "# Template instances can be partially evaluated because they are callable objects\n", "gsm8k_prompt = ft.partial(few_shot_prompt, examples=examples)" ] }, { "cell_type": "markdown", "id": "1eae0ec8-89f0-43fc-b055-6fcd64cbc03b", "metadata": {}, "source": [ "## When `gpt-4o-mini` is uncertain" ] }, { "cell_type": "markdown", "id": "a273ed78-e813-467e-85f3-16d7f283ba87", "metadata": {}, "source": [ "Let us now sample 20 completions with the `gpt-4o-mini` model. Outlines is sampling first, and allows to draw several samples with both OpenAI and `transformers` models easily:" ] }, { "cell_type": "code", "execution_count": 4, "id": "beff960d-6833-4f24-af09-5b65886a9549", "metadata": {}, "outputs": [], "source": [ "model = outlines.from_openai(openai.OpenAI(), \"gpt-4o\")\n", "\n", "question = \"When I was 6, my sister was half the age of my brother. When I was 14, my sister was 3 years younger than my brother. Now I'm 70, how old is my sister now?\"\n", "prompt = gsm8k_prompt(question=question)\n", "answers = model(prompt, n=20, max_tokens=512)" ] }, { "cell_type": "markdown", "id": "1a895b6d-d4d4-40f9-9156-24ba7e21cc08", "metadata": {}, "source": [ "The correct answer to this question is 67. Let us now count the different answers, and take a look at their distribution. Let us first define a few utility functions:" ] }, { "cell_type": "code", "execution_count": 5, "id": "f1c83d1f-a478-4509-890e-b84a2e0d8846", "metadata": {}, "outputs": [], "source": [ "def count_digits(answers):\n", " digits = []\n", " for answer in answers:\n", " try:\n", " match = re.findall(r\"\\d+\", answer)[-1]\n", " if match is not None:\n", " digit = int(match)\n", " digits.append(digit)\n", " except AttributeError:\n", " print(f\"Could not parse the completion: '{answer}'\")\n", "\n", " unique_digits, counts = np.unique(digits, return_counts=True)\n", " return {d: c for d, c in zip(unique_digits, counts)}\n", "\n", "\n", "def plot_counts(counts):\n", " fig = plt.figure(figsize=(12, 8))\n", " ax = fig.add_subplot(111)\n", "\n", " bar = ax.bar(counts.keys(), counts.values())\n", " ax.spines[[\"right\", \"top\", \"left\"]].set_visible(False)\n", " ax.get_yaxis().set_visible(False)\n", " ax.get_yaxis().set_visible(False)\n", "\n", " for rect in bar:\n", " height = rect.get_height()\n", " plt.text(\n", " rect.get_x() + rect.get_width() / 2.0,\n", " height,\n", " f\"{height:.0f}\",\n", " ha=\"center\",\n", " va=\"bottom\",\n", " fontsize=20,\n", " )\n", "\n", " ax.set_xticks(list(counts.keys()))\n", " ax.set_xlabel(\"Answer\")\n", "\n", "\n", "def entropy(counts):\n", " counts = np.array(list(counts.values()))\n", " probs = counts / np.sum(counts)\n", " log_probs = np.log(probs)\n", " return -np.sum(probs * log_probs)" ] }, { "cell_type": "code", "execution_count": 6, "id": "88668e09-bcd6-4a6a-83a5-838189b910eb", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAHgCAYAAACCbCTDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVwklEQVR4nO3df7DldX3f8dcbFtmVLb9cDDgLrB0DjNWUlWQzWBaDtEJg0BBiJk5VsKLFKRSsU7u2M8wKZouDjsg4Y0chBn9MTfgh3QkapQiCHSJWFigBAlMgggUkJlVJDXXh0z/2ILuwF9LZe+95372Px8yZvff7PeznvcB893m/53vOt8YYAQCAjnaZ9gAAADATsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtLXkRfb7XCsAAOZDbW+jM6sAALQlVgEAaEusAgvGddddl5NPPjn7779/dt9997ziFa/Icccdl69+9avTHg3YiTn2TNeLXbMK0MIHP/jBXHjhhVm5cmXe/OY3Z8WKFXn88cfzve99LzfccENOOOGEaY8I7IQce6avxnjB91B5gxUwdZ/97Gfz3ve+N6eeemo+85nP5CUveck2+3/+859nt912m9J0wM7KsWfebfcNVmIVaO3JJ5/MgQcemGXLluW+++573l8WAHPBsWcqthurLgMAWrv22mvz+OOP55xzzskuu+ySa665JnfeeWeWLl2aNWvW5Mgjj5z2iMBOyLGnD7EKtPbd7343SbJ06dKsXr06d9555zb7jz766FxxxRXZb7/9pjEesJNy7OnDpwEArf3whz9Mklx44YWpqtx000356U9/mjvuuCNvetObcuONN+atb33rlKcEdjaOPX2IVaC1p59+OkmyZMmSbNy4MUcddVSWL1+e1772tfnKV76SlStX5lvf+lZuvvnmKU8K7Ewce/oQq0Bre++9d5Jk9erVWbVq1Tb7XvrSl+a4445Lktxyyy3zPBmwM3Ps6UOsAq0deuihSZ79i+O59tlnnyTJz372s/kaCVgEHHv6EKtAa8cee2yqKnfdddcvXpbb2jNvenjlK18536MBOzHHnj7EKtDawQcfnJNOOinf//7388lPfnKbfd/4xjfy9a9/PXvvvXeOP/74KU0I7Iwce/pwUwCgvYcffjivf/3r89BDD+XYY4/N6tWr88ADD+Tqq69OVeXLX/5yTjnllGmPCexkHHvmnTtYAQvX448/nvPOOy8bN27MI488kj333DNr167Nhz70oaxZs2ba4wE7KceeeSVWAQBoa7ux6ppVAADaEqsAALQlVgEAaGvJtAcAeDGr1l0z474HLzhxHicBFhPHnh6cWQUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCAAvaF7/4xVRVqiqXXHLJtMdhlolVAGDBeuihh3LmmWdm+fLl0x6FOSJWAYAFaYyRd73rXXnZy16WM844Y9rjMEfEKgCwIF188cX55je/mc997nPZY489pj0Oc0SsAgALzt13351169bl7LPPztFHHz3tcZhDYhUAWFA2b96cd7zjHTnooIOyYcOGaY/DHFsy7QEAAP5/nHfeedm0aVO+/e1vZ9myZdMehznmzCoAsGB85zvfyYYNG/KBD3wgRx555LTHYR6IVQBgQdi8eXPe+c535pBDDsn5558/7XGYJ2IVAFgQnnjiidx77725++67s3Tp0l/cCKCq8uEPfzhJ8p73vCdVlXPOOWe6wzJrXLMKACwIu+++e9797ndvd9+tt96aTZs25aijjsqhhx7qEoGdiFgFABaEZcuWzXg71fXr12fTpk059dRTc/rpp8/zZMwllwEAANCWWAUAoC2xCgAseOvXr88YwyUAOyGxCgBAW2IVAIC2xCoAAG356CoAYMFYte6aF9z/4AUnztMkzBdnVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAAIvEj370o1xyySU5+eST86pXvSrLli3LXnvtlaOOOiqXXnppnn766WmP+DxLpj0AAADz4/LLL8/73ve+HHDAATnmmGNy0EEH5bHHHstVV12V008/PV/72tdy+eWXp6qmPeoviFUAgEXikEMOycaNG3PiiSdml12efYF9w4YNWbNmTa688spcddVVOeWUU6Y45bZcBgAAsEi88Y1vzEknnbRNqCbJ/vvvnzPOOCNJcsMNN0xhspmJVQAAsttuuyVJlizp9cK7WAUAWOQ2b96cz3/+80mS448/fsrTbEusAgAscuvWrcudd96ZE044Iccdd9y0x9mGWAUAWMQuvvjifPzjH89hhx2WL3zhC9Me53nEKgDAIvWpT30qZ599dl796lfn+uuvz7777jvtkZ5HrAIALEIXXXRRzjrrrLzmNa/J9ddfn/3333/aI22XWAUAWGQ++tGP5v3vf38OP/zwXH/99Xn5y18+7ZFmJFYBABaR888/P+vWrcsRRxyR6667LitWrJj2SC+o1wdpAQAwZy677LKce+652XXXXbN27dpcfPHFz3vOqlWrctppp83/cDMQqwAAi8QDDzyQJHnqqady0UUXbfc5b3jDG1rFqssAAAAWifXr12eM8YIPt1sFAIC/J7EKAEBbYhUAgLa8wQoAYBFZte6aGfc9eMGJ8zjJ348zqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoq2WsXnHFFTnrrLOydu3a7LnnnqmqvP3tb5/2WAAAc0L7zGzJtAfYno985CO5/fbbs3z58qxcuTL33HPPtEcCAJgz2mdmLc+sfuITn8i9996bn/zkJ/n0pz897XEAAOaU9plZyzOrxxxzzLRHAACYN9pnZi3PrAIAQCJWAQBoTKwCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2mp5U4Crr746V199dZLk0UcfTZLcfPPNOe2005IkK1asyMc+9rEpTQcAMLu0z8xaxuptt92Wyy67bJtt999/f+6///4kycEHH7xo/4MBADsf7TOzlpcBrF+/PmOMGR8PPvjgtEcEAJg12mdmLWMVAAASsQoAQGNiFQCAtlq+wSpJVq27ZsZ9D15w4jxOAgAwt3TPzJxZBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2hKrAAC0JVYBAGhLrAIA0JZYBQCgLbEKAEBbYhUAgLbEKgAAbYlVAADaEqsAALQlVgEAaEusAgDQllgFAKAtsQoAQFtiFQCAtsQqAABtiVUAANoSqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG3VGGPmnVV/mmTF/I0zoxVJ/mraQwBtOCYAz5jP44G15tZfjTGOf+7GF4zVLqrqv48xfnXacwA9OCYAz5jP44G1psNlAAAAtCVWAQBoa6HE6memPQDQimMC8Iz5PB5YawoWxDWrAAAsTgvlzCoAAItQ+1itqr2r6oqquqeq7q6qI6c9EzA/qmppVd1SVbdX1Z9X1Ycn2/+wqh6oqtsmj8OnPCowD16oCarqA1U1qmpWPnJze2tV1R9tddx5sKpum4V1Dt3q97ytqn5SVedU1b5VdW1V3Tf5dZ85XOv8qrpjsu0bVfWKHV1rNrW/DKCqLkty0xjjkqp6SZKXjjH+95THAuZBVVWSPcYYT1TVbkm+neTsJGck+ZMxxhVTHRCYVzM1QVUdmOSSJIclOWKMscOfGfpi/VFVH0/y4zHGeTu61la/565JfpDk15P8qyR/Pca4oKrWJdlnjPHv5mitvxlj/GSy/V8nefUY44zZWmtHtT6zWlV7JTk6yaVJMsb4v0IVFo+xxROTb3ebPHr/hA3MiRdpgk8k+WBm6fjwYv0x+UH6d5P859lYbyvHJvmfY4y/TPKWJJdNtl+W5Lfmaq1nQnVijzQ7zraO1SSvTPJ4ks9V1aaquqSq9pj2UMD8qapdJy+1/TDJtWOM70x2/f7kZatPVNXu05sQmCfbbYKqekuSH4wxbp/rtbbavzbJY2OM+2ZxzST5vTwbwL80xnhk8vWjSX5pDtdKVf1+VT2U5J8nOXeW19oh3WN1SZLXJfn0GGN1kr9Nsm66IwHzaYzx1Bjj8CQrk6ypqtck+VC2vNz3a0n2TTJrL40BbW2vCdYn+feZ/bh6sf54W2b5rOrkUoM3J7n8ufvGlms2Z+1s5/bWGmP8hzHGgUm+lOTM2VprNnSP1YeTPLzVmZQrsuV/HmCRmbwEd32S48cYj0wuEXgyyeeSrJnqcMB8mKkJXpnk9qp6MFt+qL21qvafo7VSVUuS/HaSP9rBNZ7rN5PcOsZ4bPL9Y1V1wGTNA7Ll1aW5WmtrX0pyyiyutcNax+oY49EkD1XVoZNNxya5a4ojAfOoqvarqr0nXy9L8s+S3LPVAbyy5TquO6c1IzA/ZmiCW8cYLx9jrBpjrMqWyHzd5LmzvdYz/fFPk9wzxnh4R9bYjueerd2Y5NTJ16cm+S9ztVZV/fJW+96S5J5ZXGuHLYRPAzg8W97h95Ik9yd51xjjb6Y6FDAvqupXsuWNBbtmyw/XfzzGOK+qvplkvySV5LYkZ2z1RixgJ/ViTTA5u/qrs/RpANtdq6r+MMmfjTH+046usdVaeyT5fpJ/OMb48WTby5L8cZKDkvxlkt8dY/z1HK11ZZJDkzw9WeuMMcYPdnSt2dI+VgEAWLxaXwYAAMDiJlYBAGhLrAIA0JZYBQCgLbEKAEBbYhXgOarqt6pqVNVh054FYLETqwDP97Yk3578OhWTu+QALHpiFWArVbU8yVFJ3p3k9ybbfqOqbqiqK6rqnqr60uTuWamqC6rqrqq6o6o+VlW7VtUDtcXeVfVUVR09ee6NVfXLVbVHVf1BVd1SVZuq6i2T/adV1cbJTQ+um86/AYBe/OQOsK23JPnTMca9VfWjqjpisn11kn+U5H8l+W9J/klV3Z3k5CSHjTFGVe09xniqqv4iyauz5Z7ltyZZW1XfSXLgGOO+qtqQ5JtjjH8xuZ3sLVX1XyfrvC7Jr8zGnWoAdgbOrAJs621Jvjz5+st59lKAW8YYD48xns6WW7yuSvLjJH+X5NKq+u0k/2fy3JuSHD15/MdsOVP7a0m+O9n/piTrquq2JDckWZott1RMkmuFKsCznFkFmKiqfZO8Mclrq2ok2TXJSHJNkie3eupTSZaMMTZX1Zokxyb5nSRnTv75G5O8L8krkpyb5N8m+Y1sidgkqSSnjDH+4jnr/3qSv52TPxzAAuXMKsCzfifJF8YYB48xVo0xDkzyQJK123vy5PrWvcYYX03y/iT/eLLrliSvT/L0GOPvsuVM7L/MlohNkq8nOWur615Xz9GfB2DBE6sAz3pbkq88Z9uVmflTAf5Bkj+pqjuy5dMD/k2SjDGeTPJQkj+bPO+myXP/x+T785PsluSOqvrzyfcAbEeNMaY9AwAAbJczqwAAtCVWAQBoS6wCANCWWAUAoC2xCgBAW2IVAIC2xCoAAG2JVQAA2vp/jdj4sUZoV2sAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "counts = count_digits(answers)\n", "plot_counts(counts)" ] }, { "cell_type": "markdown", "id": "661a1135-ac2d-4a49-a786-d04a7ba68b48", "metadata": {}, "source": [ "We see that there is an important variabilty in the answers given by `gpt-4o-mini`. Depending on the number of samples taken, even self-consistency sampling may lead to the wrong result here." ] }, { "cell_type": "code", "execution_count": 7, "id": "30ea0dfe-6c15-44f0-881c-88b325542b44", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Entropy: 1.5741030017371853\n" ] } ], "source": [ "print(f\"Entropy: {entropy(counts)}\")" ] }, { "cell_type": "markdown", "id": "0b15b230-b667-4c9c-8a5d-366dd61de9b7", "metadata": {}, "source": [ "## `gpt-4o-mini` on an easier question" ] }, { "cell_type": "markdown", "id": "beae30f0-4168-4a80-90d4-d26a4f476469", "metadata": {}, "source": [ "Let us now look at the results for an arguably easier question:" ] }, { "cell_type": "code", "execution_count": 8, "id": "7e106b94-2dfd-4a75-b4d9-b1ad693418a7", "metadata": {}, "outputs": [], "source": [ "question = \"When I was 6 my sister was half my age. Now I’m 70 how old is my sister?\"\n", "prompt = gsm8k_prompt(question)\n", "answers = model(question, samples=20, max_tokens=512)" ] }, { "cell_type": "code", "execution_count": 9, "id": "dd46fb2b-08ef-4003-8d03-ea0f39c865c4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Entropy: 0.1985152433458726\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAHgCAYAAACCbCTDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQuklEQVR4nO3db6ye9V3H8c+PPxGxIjMVghSLkCYgOtmUzSCd6xii8mCIyxBCTBUTWbBmIzEhWeIAUfegSYkha6JIQsmSRaoclnUbmeCyQbrBoi2GrtSlEIZKxnQOCmPB9vLBuaH8aUui4dyfwuuVnPQ+13Wd3N/TR+/++ruva0zTFAAAaHTEvAcAAICDEasAANQSqwAA1BKrAADUEqsAANQSqwAA1Drqdc67rxUAAEthHOiglVUAAGqJVQAAaolVAIA3sc2bN2fdunVZvXp1jjvuuIwxcsUVVxz0+meeeSYf+9jHcsYZZ+SYY47J2972tlx44YW55557lnDq/V5vzyoAAIexG2+8Mdu3b8+yZcuyYsWK7Ny586DXfve73815552XHTt25KyzzspVV12VPXv25K677sr73//+3HLLLbnyyiuXcHorqwAAb2obNmzIrl278vTTT2fjxo2HvPa6667Ljh07cskll2Tbtm256aabcsstt+Thhx/OKaecknXr1uWJJ55YoskXiVUAgDexNWvWZNWqVRnjgB+2f4U777wzSXLDDTfkqKP2/wf8CSeckGuuuSbf//73c+utt75hsx6IWAUAIEny5JNPJklOO+2015x78dhS710VqwAAJEmWL1+eJHn00Udfc2737t1JkkceeWRJZxKrAAAkSS666KIkycc//vHs3bv3peNPPfVUNmzYkGTxQ1hLyd0AAABIsrhX9e67787mzZtz9tln5/zzz8+zzz6bu+66KyeffHIef/zxHHHE0q51WlkFACBJctJJJ+XBBx/M1VdfnWeeeSaf/OQns2XLllx66aW54447kix+2GopWVkFAOAlJ554Ym6++ebcfPPNrzh+7733JknOOeecJZ3HyioAAK9r06ZNSZLLL798Sd9XrAIAkCTZt29f9uzZ85rjt99+ezZt2pRzzz03F1988ZLOZBsAAMCb2MLCQhYWFpLsv4/q1q1bs3bt2iSLt6tav359kuS5557LiSeemAsuuCCnn356jjjiiNx///3ZunVrzjzzzNxxxx1L/gGrMU3Toc4f8iQAAN2uu+66XH/99Qc9v3Llyjz22GNJkhdeeCFXXXVV7rvvvpceq7pq1ap86EMfykc+8pEce+yxb+SoB3zEllgFAKDBAWPVnlUAAGqJVQAAaolVAABqiVUAAHLqtVvmPcIBiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABqiVUAAGrVxOrmzZuzbt26rF69Oscdd1zGGLniiivmPRYAAHN01LwHeNGNN96Y7du3Z9myZVmxYkV27tw575EAAJizmpXVDRs2ZNeuXXn66aezcePGeY8DAECBmpXVNWvWzHsEAADK1KysAgDAq4lVAABqiVUAAGqJVQAAaolVAABqiVUAAGqJVQAAaolVAABq1TwUYGFhIQsLC0mSJ598MkmydevWrF27NkmyfPnyrF+/fk7TAQAwDzWxum3bttx2222vOLZ79+7s3r07SbJy5UqxCgDwFjOmaTrU+UOeBADgzeHUa7fksU9cNM8RxoEO2rMKAEAtsQoAQC2xCgBArdpYPfXaLfMeAQCAOauNVQAAEKsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BKrAADUEqsAANQSqwAA1BrTNB385BhfSLJ86cZ5heVJvjOn9wYAeCuaZ399Z5qmX3v1wUPG6jyNMb4+TdMvznsOAIC3isb+sg0AAIBaYhUAgFrNsfpX8x4AAOAtpq6/avesAgBA88oqAABvcXWxOsY4ZozxwBhj+xjj4THG9fOeCQDgcHewxhqL/myMsWuM8Y0xxh+97PhfjjG+OcZ4aIzxznnMfdQ83vR1/CDJ+6Zp2jPGODrJfWOMz0/T9NV5DwYAcBg7YGMlOTPJKUnOmKZp3xjjhNn1v55k1ezr3Uk2zv5cUnWxOi1uot0z+/bo2ZeNtQAA/w+HaKwPJ7l8mqZ9s+u+PbvmA0k2zX7uq2OM48cYJ03T9B9LOXfdNoAkGWMcOcbYluTbSb44TdPX5jwSAMBh7yCNdXqSS8cYXx9jfH6MsWp2+clJvvWyH39idmxJVcbqNE17p2k6O8mKJO8aY/zsnEcCADjsHaSxfijJ87MnV/11klvnOOJrVMbqi6Zp+u8k/5jkNc+JBQDg/+ZVjfVEkr+fnbozydtnr/8ti3tZX7RidmxJ1cXqGOMnxhjHz17/cJILkuyc61AAAIe5QzTWQpI1s8t+Jcmu2evPJPmd2V0BfinJ95Z6v2pS+AGrJCcluW2McWQWY/pvp2n67JxnAgA43B2wscYY9yX51Bjjo1n8ANbvz67/XJLfSPLNJM8l+d05zOwJVgAA9KrbBgAAAC8SqwAA1BKrAADUEqsAANQSqwAA1BKrAK8yxrh4jDGNMc6Y9ywAb3ViFeC1Lkty3+zPuRhjNN4HG2DJiVWAlxljLEtyXpIrk/z27Nh7xxhfGmNsHmPsHGN8aowxZuc+McbYMcZ4aIyxfoxx5Bjj0dkTX44fY+wdY7xndu2Xxxirxhg/Msa4dYzxwBjjn8cYH5idXzvG+MwY494k98znbwCgi3+5A7zSB5J8YZqmXWOM/xxj/MLs+DuSnJXk35Pcn+SXxxjfSPKbSc6YpmkaYxw/TdPeMcYjSX4myU8n+ackq8cYX0tyyjRN/zrG+PMk907T9HuzRx8+MMb4h9n7vDPJ26dp+q+l+oUBmllZBXily5J8evb609m/FeCBaZqemKZpX5JtSU5N8r0kzyf5mzHGJVl8HGGSfCXJe2Zff5HFldpzkjw4O/+rSa4dY2xL8qUkxyT5qdm5LwpVgP2srALMjDF+PMn7kvzcGGNKcmSSKcmWJD942aV7kxw1TdP/jDHeleT8JB9M8oezn/9ykg8n+ckkf5Lkj5O8N4sRmyQjyW9N0/TIq97/3UmefUN+OYDDlJVVgP0+mOT2aZpWTtN06jRNpyR5NMnqA10829/6Y9M0fS7JR5P8/OzUA0nOTbJvmqbns7gS+wdZjNgkuTvJupfte33HG/T7ABz2xCrAfpclufNVx/4uB78rwI8m+ewY46Es3j3gmiSZpukHSb6V5Kuz674yu/ZfZt//aZKjkzw0xnh49j0ABzCmaZr3DAAAcEBWVgEAqCVWAQCoJVYBAKglVgEAqCVWAQCoJVYBAKglVgEAqCVWAQCo9b+lzUDoz9UHogAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "counts = count_digits(answers)\n", "plot_counts(counts)\n", "print(f\"Entropy: {entropy(counts)}\")" ] }, { "cell_type": "markdown", "id": "cf4cacdf-a31d-43bd-8517-eec9f656eee4", "metadata": {}, "source": [ "The entropy of the results is much lower, we say that the model is more \"certain\" of its answers. " ] }, { "cell_type": "markdown", "id": "22f31872-aab7-4a68-b9f2-d335a4f1a875", "metadata": {}, "source": [ "## How `gpt-4` compares to `gpt-4o-mini`\n", "\n", "Let us now look at how GPT4 fares on the original question:" ] }, { "cell_type": "code", "execution_count": 11, "id": "2d5ab5b8-eca5-47f5-a35c-5f3865e35755", "metadata": {}, "outputs": [], "source": [ "model = outlines.from_openai(openai.OpenAI(), \"gpt-4\")\n", "\n", "question = \"When I was 6, my sister was half the age of my brother. When I was 14, my sister was 3 years younger than my brother. Now I'm 70, how old is my sister now?\"\n", "prompt = gsm8k_prompt(question)\n", "answers = model(prompt, samples=20, max_tokens=512)" ] }, { "cell_type": "code", "execution_count": 12, "id": "d316a5f7-cebc-4b09-9b1b-aee219b2f088", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Entropy: -0.0\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqwAAAHgCAYAAABgsD+6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQI0lEQVR4nO3dYahf9X3H8c+vua6J3Ui4hKKDOpVMxTmn1Tlwatt0rKKUziZgfJa5B2tg0TkY5EEZLepWYYITwTJ0o5Wi08ypXYdjMxG76OpkLc51jQUdU4ZiHaagREg8e5C/0WhiNWlyP7l9vSDk/s/v/Lnffx6Ed05+95wxTVMAAKDVhxZ6AAAAeC+CFQCAaoIVAIBqghUAgGqCFQCAaoIVAIBqcz9h3T2vAAA4EsaBFlxhBQCgmmAFAKCaYAV4H15++eXcdtttueyyy7Jq1aosW7Ysy5cvzwUXXJDbb789b7zxxn7f9+ijj+aSSy7J/Px8li1bljPPPDM33XRTdu/efYQ/AcDRa/yER7PawwqQ5Ktf/Wo2bNiQ448/Pp/61Kdywgkn5MUXX8y9996bHTt2ZM2aNbnnnnsyxltbsO6///6sWbMmS5cuzeWXX575+fl885vfzPbt27N27drcc889C/iJAOoccA+rYAV4H7Zs2ZJXX301l156aT70obf+c+qFF17Ieeedl+eeey6bN2/OmjVrkiQ//vGPs2rVquzYsSPbtm3LueeemyTZuXNnVq9encceeyx33nln1q1btyCfB6CQH7oCOBSrV6/OZz/72X1iNUmOO+64fOELX0iSPPzww3uPb968OS+99FLWrVu3N1aTZOnSpbnuuuuSJLfeeuvhHxxgERCsAIfomGOOSZLMzb11p8AtW7YkSS6++OJ3nX/RRRfl2GOPzaOPPprXX3/9yAwJcBQTrACHYNeuXfn617+eZN843b59e5LklFNOedd75ubmctJJJ2XXrl155plnjsygAEcxwQpwCDZt2pSnnnoql1xyST7zmc/sPb5jx44kyfLly/f7vjePv/LKK4d9RoCjnWAFOEg333xzbrzxxpx22mm54447FnocgEVLsAIchFtuuSVXX311Tj/99GzdujXz8/P7rL95BfXNK63v9ObxFStWHNY5ARYDwQrwAd10003ZuHFjzjjjjGzdujXHHXfcu8459dRTkyRPP/30u9Z27dqVZ599NnNzczn55JMP+7wARzvBCvAB3HDDDbnmmmty1llnZevWrfnoRz+63/NWr16dJHnwwQfftfbII4/ktddey/nnn58Pf/jDh3VegMVAsAK8T9dee202bdqUc845Jw899FBWrlx5wHPXrl2blStX5q677soTTzyx9/jOnTvzxS9+MUmyYcOGwz4zwGLgSVcA78PXvva1rF+/PkuWLMnGjRv3+9P/J554YtavX7/39X333Ze1a9dm6dKlWbduXebn5/PAAw/sfTTr3Xffvc+jXAF+xnk0K8Ch+NKXvpQvf/nL73nOJz7xiX2edpUk27Zty/XXX5/HHnssO3fuzKpVq3LllVfmqquuypIlSw7jxABHHcEKAEC1AwarPawAAFQTrAAAVBOsAABUm1voAQ7kxE3fWugRAAB+pvz3Vy5d6BH2yxVWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKqNaZoOvDjGg0lWHrlxABaFlUl+tNBDABxlfjRN08X7W3jPYAXggxtjPDFN07kLPQfAYmFLAAAA1QQrAADVBCvAT99fLvQAAIuJPawAAFRzhRUAgGpzCz0AwNFsjLEiyW1JzkgyJbkyyR8mOXV2yookr0zTdNaRnw5gcRCsAIfmL5I8OE3T2jHGzyU5dpqmy99cHGPcmGTHgk0HsAjYwwpwkMYYy5N8L8nJ037+Mh1jjCT/k2T1NE0/PMLjASwa9rACHLyTkryU5K/HGN8dY9w2xvjI29YvTPKiWAU4NIIV4ODNJfl4klunaTo7yatJNr1t/Yokdy7EYACLiWAFOHjPJ3l+mqbvzF5vzp6AzRhjLsnnk/zNAs0GsGgIVoCDNE3TC0meG2O8eUeATyf5/uzr30ryg2manl+Q4QAWEXcJADg0G5N8Y3aHgGeS/O7s+LrYDgDwU+EuAQAAVLMlAACAaoIVAIBqghUAgGqCFQCAaoIVAIBqghXgHcYYvzPGmMYYpy30LAAIVoD9uSLJv8x+XxCzJ2UBEMEKsI8xxs8nuSDJ72XPzf8zxvjkGOPhMcbmMcYPxhjfGGOM2dpXxhjfH2M8Ocb48zHGkjHGs2OPFWOM3WOMi2bnPjLG+OUxxkfGGH81xnh8jPHdMcbnZuvrxxgPjDG2JHloYf4EAPr4FzzAvj6X5MFpmp4eY7w8xjhndvzsJL+S5H+TbEvym2OM/0pyWZLTpmmaxhgrpmnaPcbYnuT0JCcl+fckF44xvpPkY9M0/XCM8adJtkzTdOUYY0WSx8cY/zz7Ph9PcuY0Tf93pD4wQDtXWAH2dUWSu2Zf35W3tgU8Pk3T89M0vZHke0lOTLIjyc4kt48xPp/ktdm5305y0ezXn2XPFdtfT/Jvs/XfTrJpjPG9JA8nWZrkhNnaP4lVgH25wgowM8aYT7I6ya+OMaYkS5JMSb6V5PW3nbo7ydw0TbvGGOcl+XSStUn+YPb+R5JsSPKLSf4kyR8n+WT2hGySjCRrpmna/o7v/xtJXj0sHw7gKOYKK8Bb1ia5Y5qmX5qm6cRpmj6W5NkkF+7v5Nl+1+XTNP1DkmuS/Nps6fEk5yd5Y5qmndlzRfb3sydkk+Qfk2x82z7Ysw/T5wFYFAQrwFuuSPJ37zj2tznw3QJ+IcnfjzGezJ67CvxRkkzT9HqS55L86+y8b8/O/Y/Z62uTHJPkyTHGf85eA3AAY5qmhZ4BAAAOyBVWAACqCVYAAKoJVgAAqglWAACqCVYAAKoJVgAAqglWAACqCVYAAKr9P8bb7HZA9fu3AAAAAElFTkSuQmCC", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "counts = count_digits(answers)\n", "plot_counts(counts)\n", "print(f\"Entropy: {entropy(counts)}\")" ] }, { "cell_type": "markdown", "id": "2f6c8a22-fdf5-4f30-865c-8e11927b1b7c", "metadata": {}, "source": [ "GPT4 returns the correct answer with certainty." ] }, { "cell_type": "markdown", "id": "50d4a55e-86df-46ab-8b38-302c79bc8add", "metadata": {}, "source": [ "## Conclusion\n", "\n", "When generating text completions with a language model we typically look at one output sample, trying to find the \"right\" answer. However, doing so we obscure the diversity of answers that these language models can produce. Assuming the diversity of answers reflects these models' \"uncertainty\", we can use measures such as the entropy of the answers' distribution to evaluate the quality of the answer.\n", "\n", "Which result should we be choosing once we have different samples? There is no definite answer to this question. The [self-consistency method](https://arxiv.org/abs/2203.11171) consists in choosing the result based on a majority vote. We think this choice is arbitrary and that choosing the correct answer is a [decision theory](https://en.wikipedia.org/wiki/Decision_theory) problem, which can only be solved by specifying a loss function that is adapted to the experiment's context; the majority vote being a particular case with a 0-1 loss." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 } ================================================ FILE: examples/self_consistency.py ================================================ import re import numpy as np import openai import outlines from outlines import Template examples = [ { "question": "There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?", "answer": "We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.", }, { "question": "If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?", "answer": "There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.", }, { "question": "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?", "answer": "Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.", }, { "question": "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?", "answer": "Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.", }, { "question": "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?", "answer": "He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.", }, { "question": "There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?", "answer": "There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.", }, { "question": "Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?", "answer": "Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.", }, { "question": "Olivia has $23. She bought five bagels for $3 each. How much money does she have left?", "answer": "She bought 5 bagels for $3 each. This means she spent 5", }, ] question = "When I was 6 my sister was half my age. Now I’m 70 how old is my sister?" few_shots = Template.from_file("prompts/self_consistency.txt") model = outlines.from_openai(openai.OpenAI(), "gpt-4o-mini") generator = outlines.Generator(model) prompt = few_shots(question=question, examples=examples) answers = generator(prompt, n=10) digits = [] for answer in answers: try: match = re.findall(r"\d+", answer)[-1] if match is not None: digit = int(match) digits.append(digit) except AttributeError: print(f"Could not parse the completion: '{answer}'") unique_digits, counts = np.unique(digits, return_counts=True) results = {int(d): int(c) for d, c in zip(unique_digits, counts)} print(results) max_count = max(results.values()) answer_value = [key for key, value in results.items() if value == max_count][0] total_count = sum(results.values()) print( f"The most likely answer is {answer_value} ({max_count / total_count * 100}% consensus)" ) ================================================ FILE: examples/simulation_based_inference.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "id": "e7c7d0bb-8d45-4139-a584-02c7196db92b", "metadata": {}, "source": [ "# Find the best few-shot examples using simulation-based inference" ] }, { "cell_type": "code", "execution_count": 1, "id": "831a76f5-c569-4174-adab-fb0245877367", "metadata": {}, "outputs": [], "source": [ "import json\n", "import random\n", "import requests\n", "import re\n", "\n", "import openai\n", "\n", "import outlines\n", "\n", "random.seed(0)" ] }, { "cell_type": "code", "execution_count": 18, "id": "ec604edc-c8b6-4088-bf17-b77ae57d05a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "env: OPENAI_API_KEY=# your key here\n" ] } ], "source": [ "%env OPENAI_API_KEY = # your key here" ] }, { "cell_type": "markdown", "id": "aabb4db6-fd94-4c42-ab7f-97c3de45b2cc", "metadata": {}, "source": [ "In this example we will use GPT 4 mini to solve problems from the GSM-8K dataset. The state-of-the-art performance on this dataset is obtained using few-shot prompting with 5 examples. However, it is not clear how one should select these examples. Here, we will use **simulation-based inference** to try to infer which examples we should be using to get the best out of the model's abilities to solve the problem.\n", "\n", "Let's start with downloading the dataset:" ] }, { "cell_type": "code", "execution_count": 3, "id": "367f5f89-8e5d-4381-b9eb-78c60bc50f86", "metadata": {}, "outputs": [], "source": [ "result = requests.get(\n", " \"https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/train.jsonl\"\n", ")\n", "lines = result.iter_lines()" ] }, { "cell_type": "markdown", "id": "ef0f7aa9-d528-41e9-8a9d-4497f01f0692", "metadata": {}, "source": [ "We now divide the train set in two sets:\n", "- 20 problems from which we are going to sample 5 examples at random for every inference;\n", "- 500 problems which we are going to use to perform inference." ] }, { "cell_type": "code", "execution_count": 4, "id": "0667c4a8-cebe-4796-bbc9-575ee9498717", "metadata": {}, "outputs": [], "source": [ "example_set = []\n", "for _ in range(10):\n", " line = json.loads(next(lines))\n", " answer = re.findall(r\"\\d+\", line[\"answer\"])[-1]\n", " example_set.append({\"question\": line[\"question\"], \"answer\": answer})\n", "\n", "train_set = []\n", "for _ in range(500):\n", " line = json.loads(next(lines))\n", " answer = re.findall(r\"\\d+\", line[\"answer\"])[-1]\n", " train_set.append({\"question\": line[\"question\"], \"answer\": answer})" ] }, { "cell_type": "markdown", "id": "4b52b470-d818-495a-a6e3-e50a1deff13c", "metadata": {}, "source": [ "Now let's define the prompt, the model, and the sampling loop. The sampling loop consists in choosing 5 examples at random, sampling 20 model answers; if the answer is correct we keep the example ids as samples, otherwise continue:" ] }, { "cell_type": "code", "execution_count": 9, "id": "9fbebaa9-f05e-4c6b-8875-73a08273bbb5", "metadata": {}, "outputs": [], "source": [ "few_shots = outlines.Template.from_string(\n", " \"\"\"\n", " {% for example in examples %}\n", " Q: {{ example.question }}\n", " A: {{ example.answer }}\n", " {% endfor %}\n", " Q: {{ question }}\n", " A:\n", " \"\"\"\n", ")\n", "\n", "model = outlines.from_openai(openai.OpenAI(), \"gpt-4o-mini\")\n", "\n", "\n", "# TODO: This could largely benefit from vectorization in #52\n", "def one_train_example(problem, example_set):\n", " example_ids = random.choices(range(0, len(example_set)), k=5)\n", " examples = [example_set[i] for i in example_ids]\n", " prompt = few_shots(question=problem[\"question\"], examples=examples)\n", " answers_raw = model(prompt, samples=20)\n", "\n", " samples = []\n", " for answer_raw in answers_raw:\n", " try:\n", " answer = re.findall(r\"\\d+\", answer_raw)[-1]\n", " if answer == problem[\"answer\"]:\n", " samples += example_ids\n", " else:\n", " continue\n", " except IndexError:\n", " pass\n", "\n", " return samples" ] }, { "cell_type": "code", "execution_count": 10, "id": "1dae1ef2-c9e0-4c98-8686-7fbc2ff55e56", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9efc9d077af24a2eb5ea3c05fe63f298", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/500 [00:00" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import matplotlib.pylab as plt\n", "\n", "example_ids, counts = np.unique(samples, return_counts=True)\n", "\n", "fig = plt.figure(figsize=(12, 8))\n", "ax = fig.add_subplot(111)\n", "ax.bar(example_ids, counts)\n", "\n", "ax.spines[[\"top\", \"right\"]].set_visible(False)\n", "\n", "ax.set_xticks(range(10))\n", "ax.set_xlabel(\"Example #\")\n", "ax.set_ylabel(\"Counts\")" ] }, { "cell_type": "markdown", "id": "cde37e5b-377e-4872-af40-674d680bd2da", "metadata": {}, "source": [ "Looking at the distribution, our best guess for which examples we should use for benchmarking on the test set would be 0, 1, 2, 6 and 9. This method can be trivially extended to other workflows that use few-shot examples to query LLMs. Of course, simulation-based inference extends beyong choosing the \"best\" prompt, and could for instance be useful to select the structure of chains of LLMs and tools as well." ] }, { "cell_type": "code", "execution_count": 13, "id": "bddda20b-234a-4d30-b40a-90708fbaba23", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',\n", " 'answer': '72'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_set[0]" ] }, { "cell_type": "code", "execution_count": 14, "id": "fb186bf9-62b7-485f-a8ce-401f551a9e57", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?',\n", " 'answer': '10'}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_set[1]" ] }, { "cell_type": "code", "execution_count": 15, "id": "ae427bb2-e3f4-4a96-a508-e8011a0fc553", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?',\n", " 'answer': '5'}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_set[2]" ] }, { "cell_type": "code", "execution_count": 16, "id": "fe43ae0f-c18f-4b74-b639-8481472edf4d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'Albert is wondering how much pizza he can eat in one day. He buys 2 large pizzas and 2 small pizzas. A large pizza has 16 slices and a small pizza has 8 slices. If he eats it all, how many pieces does he eat that day?',\n", " 'answer': '48'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_set[6]" ] }, { "cell_type": "code", "execution_count": 17, "id": "19d9d936-d0f0-4927-990c-76dbbfa95b47", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'question': 'Tina makes $18.00 an hour. If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage. If she works 10 hours every day for 5 days, how much money does she make?',\n", " 'answer': '990'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_set[9]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 } ================================================ FILE: examples/vllm_offline_integration.py ================================================ """Example of integrating `outlines` with `vllm`.""" import vllm from pydantic import BaseModel from transformers import AutoTokenizer from outlines.models.vllm_offline import adapt_tokenizer from outlines.processors import JSONLogitsProcessor class Person(BaseModel): first_name: str surname: str MODEL_ID = "mistralai/Mistral-7B-v0.1" llm = vllm.LLM(model=MODEL_ID, max_model_len=512) tokenizer = adapt_tokenizer(AutoTokenizer.from_pretrained(MODEL_ID)) logits_processor = JSONLogitsProcessor( schema=Person, tokenizer=tokenizer, tensor_library_name="torch", whitespace_pattern=r" ?" ) result = llm.generate( ["He is Tom Jones", "She saw Linda Smith"], sampling_params=vllm.SamplingParams( temperature=0.0, max_tokens=50, logits_processors=[logits_processor], ), ) print(result) ================================================ FILE: flake.nix ================================================ { inputs.flake-utils.url = "github:numtide/flake-utils"; outputs = { self, nixpkgs, flake-utils }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs { inherit system; config.allowUnfree = true; }; in { devShells.default = import ./shell.nix { inherit pkgs; }; }); } ================================================ FILE: llm.txt ================================================ # Outlines Codebase Reference ## Overview Outlines is a library for structured generation for type-safe LLMs. It ensures outputs conform to specified formats (JSON schemas, regex patterns, grammars) by constraining the token generation process, or calling an API that uses this process. **Core insight**: Instead of generating text and hoping it matches a format, Outlines makes it impossible for the model to generate invalid outputs by masking invalid tokens during generation. **Note**: The codebase has undergone significant refactoring. Core FSM functionality has been extracted to the `outlines-core` package. ## Usage Examples For comprehensive usage examples, see: - **README.md**: Quick start examples for JSON generation, regex constraints, and choice selection - **docs/cookbook/**: Detailed examples including: - `docs/cookbook/prompting.md`: Advanced prompting techniques - `docs/cookbook/models.md`: Working with different model providers - `docs/cookbook/humaneval.md`: Code generation examples - `docs/cookbook/qa-with-citations.md`: Question answering with structured citations - `docs/cookbook/deploy-to-servers.md`: Deployment examples with vLLM and TGI - **examples/**: Standalone example scripts - `examples/lark_grammar.py`: Grammar-based generation - `examples/math_generate_code.py`: Code generation with constraints - `examples/multiple_sglang_backend.py`: Using multiple backend servers - **tests/**: Test files contain many practical usage patterns ## Architecture ### Layer Stack ``` User API (outlines.models) ↓ Generator Classes (SteerableGenerator, BlackBoxGenerator) ↓ Type System (types/dsl.py: Pydantic → JsonSchema → Regex) ↓ FSM Compilation (outlines-core: regex → FSM via interegular) ↓ Guide System (processors/guide.py: FSM state management) ↓ Logits Processing (processors/structured.py: token masking) ↓ Model Providers (transformers, OpenAI, etc.) ``` ### Key Design Decisions 1. **FSM-based constraints**: For local models, constraints compile to finite state machines that track valid next tokens 2. **Provider abstraction**: Same constraint system works across local models (transformers) and APIs (OpenAI) 3. **Lazy compilation**: FSMs are compiled on first use and cached persistently 4. **Token-level control**: Constraints apply at the token level, not character level 5. **Type-driven API**: Python types are the primary interface for specifying constraints ## Core Components ### Models (`outlines/models/`) Base classes and implementations for different model providers: - `SteerableModel`: For models where we control logits (transformers, llama.cpp) - `BlackBoxModel`: For API models with structured output support (OpenAI, Anthropic) - Each provider has an adapter class handling input and output format conversion Key files: - `base.py`: Abstract base classes defining the model interface - `transformers.py`: Integration with HuggingFace transformers - `openai.py`: OpenAI API integration - `gemini.py`: Gemini integration - `mlxlm.py`: MLX-LM integration - `vllm_offline.py`: vLLM integration - `llamacpp.py`: llama.cpp integration - `ollama.py`: Ollama integration - `vllm.py`: Integration with vLLM servers - `tgi.py`: Integration with text-generation-inferece servers - `sglang.py`: Integration with SGLang servers ### Generation (`outlines/generator.py`) Handles the generation process: - `generator.py`: Main `Generator` class implementations (root level) - Stream functionality is now integrated into generator classes Base classes and implementations for different model providers: - `BlackBoxGenerator`: For API models with structured outputs support - `SteerableGenerator`: For modesl where we control the logits ### FSM System (`outlines/fsm/` and `outlines/processors/`) Core constraint enforcement: - `processors/guide.py`: Base `Guide` class and `RegexGuide` implementation - `fsm/parsing.py`: Lark-based CFG parsing with `PartialLark` parser - Regex to FSM compilation now uses `outlines_core.fsm` module Key concepts: - **Guide**: Manages FSM state during generation - **State transitions**: Precomputed mapping of (state, token) → next_state - **Token masking**: For each state, compute which tokens are valid ### Type System (`outlines/types/`) Type conversion pipeline: - `dsl.py`: Term DSL defining constraint language (Sequence, Choice, etc.) and JSON schema to regex conversion - `__init__.py`: Common regex types and DSL functions - Python types → Term DSL → Regex → FSM ### Logits Processors (`outlines/processors/`) Apply constraints during generation: - `structured.py`: Main `StructuredLogitsProcessor` - `base_logits_processor.py`: Abstract base class - Processors mask invalid tokens by setting their logits to -inf ## Key Algorithms ### FSM Compilation Pipeline 1. **Pattern definition**: User provides Pydantic model, regex, or grammar 2. **Schema to regex**: Convert complex types to regex patterns - JSON schemas become regex matching valid JSON - Pydantic models extract JSON schema then convert 3. **Regex to FSM**: Use interegular library to build FSM 4. **FSM to token map**: For each FSM state, compute valid tokens - Handle multi-character tokens - Account for token boundaries 5. **Guide creation**: Wrap FSM with state tracking ### Token Masking Process ```python # Simplified logits processing def process_logits(logits, current_state, guide): valid_tokens = guide.get_valid_tokens(current_state) mask = torch.full_like(logits, -float('inf')) mask[valid_tokens] = 0 return logits + mask ``` ## File Organization ``` outlines/ ├── __init__.py # Public API exports ├── generator.py # Main Generator classes ├── models/ # Model integrations │ ├── base.py # Abstract base classes │ ├── transformers.py # HuggingFace support │ └── [provider].py # Other providers (openai, anthropic, etc.) ├── fsm/ # FSM engine │ ├── __init__.py │ └── parsing.py # Grammar parsing ├── types/ # Type system │ ├── __init__.py # Common regex types and DSL exports │ ├── dsl.py # Term DSL and JSON schema conversion │ └── utils.py # Type checking utilities ├── processors/ # Logits processing and guides │ ├── guide.py # Guide implementations │ ├── structured.py # Main processor │ └── tensor_adapters/ # Framework-specific tensor handling ├── caching.py # Caching system ├── grammars/ # Grammar files (.lark) ``` ## Extension Points ### Adding a Model Provider 1. Create model class inheriting from `SteerableModel` or `BlackBoxModel` 2. Implement required methods: `generate()`, `generate_stream()` 3. Add constructor function in `outlines/__init__.py` 4. Handle provider-specific input and structured output formats with a `TypeAdapter` ### Adding a Constraint Type 1. Define new Term subclass in `types/dsl.py` 2. Implement `to_regex()` conversion 3. Register type handler for Python type conversion in `python_types_to_terms()` 4. Add tests for FSM compilation ### Custom Logits Processor 1. Inherit from `OutlinesLogitsProcessor` 2. Implement `process_logits()` method 3. Handle batch processing and state management 4. Register with generator ## Common Patterns in Codebase 1. **Factory functions**: `from_transformers()`, `from_openai()` hide complexity 2. **Abstract base classes**: Define interfaces for models, processors, guides 3. **Lazy imports**: Optional dependencies imported only when needed 5. **Type adapters**: Convert between Outlines types and provider formats ================================================ FILE: mkdocs.yml ================================================ # Site information site_name: Outlines site_author: The Outlines developers site_description: >- Structured text generation with LLMs # Repository repo_name: dottxt-ai/outlines repo_url: https://github.com/dottxt-ai/outlines # Copyright copyright: Copyright © 2023- The Outlines Developers # Documentation directory docs_dir: docs # Configuration theme: name: material palette: # Palette toggle for light mode - media: "(prefers-color-scheme: light)" scheme: default primary: white logo: assets/images/logo-square.svg favicon: assets/images/logo-square.png icon: repo: fontawesome/brands/github features: - content.code.copy - navigation.expand - navigation.tabs - navigation.sections - header.autohide - announce.dismiss font: text: Inter code: Source Code Pro # Additional configuration extra: social: - icon: fontawesome/brands/github link: https://github.com/dottxt-ai - icon: fontawesome/brands/twitter link: https://twitter.com/remilouf generator: false analytics: provider: google property: !ENV GOOGLE_ANALYTICS_KEY version: provider: mike default: latest alias: true # Extensions markdown_extensions: - admonition - def_list - attr_list - md_in_html - pymdownx.highlight: anchor_linenums: true line_spans: __span pygments_lang_class: true noclasses: True pygments_style: nord - pymdownx.superfences: custom_fences: - name: mermaid class: mermaid format: !!python/name:pymdownx.superfences.fence_code_format - pymdownx.tabbed: alternate_style: true - pymdownx.inlinehilite - pymdownx.details - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.snippets: extra_css: - stylesheets/extra.css plugins: - blog - mkdocstrings: default_handler: python handlers: python: options: docstring_style: numpy show_submodules: true - search - section-index - social: cards_layout_options: color: #173a58 - redirects: redirect_maps: "welcome.md": "index.md" - git-committers: repository: dottxt-ai/outlines branch: main - git-revision-date-localized: enable_creation_date: true type: timeago - gen-files: scripts: - scripts/gen_ref_pages.py - literate-nav: nav_file: SUMMARY.md nav: - Home: index.md - Guide: - Getting Started: guide/getting_started.md - Installation: guide/installation.md - Migrating to v1: guide/migration.md - Vision-Language Models: guide/vlm.md - Deploying with FastAPI: guide/fastapi_vllm_deployment.md - Chat Templating for Instruct Models: guide/chat_templating.md - Architecture: guide/architecture.md - Features: - Overview: features/index.md - Core: - Models: - Overview: features/models/index.md - Anthropic: features/models/anthropic.md - Dottxt: features/models/dottxt.md - Gemini: features/models/gemini.md - Llamacpp: features/models/llamacpp.md - Mlx-lm: features/models/mlxlm.md - Ollama: features/models/ollama.md - OpenAI: features/models/openai.md - OpenAI compatible API: features/models/openai_compatible.md - OpenRouter: features/models/openrouter.md - SGLang: features/models/sglang.md - TGI: features/models/tgi.md - Transformers: features/models/transformers.md - TransformersMultiModal: features/models/transformers_multimodal.md - vLLM (online server): features/models/vllm.md - vLLM (offline): features/models/vllm_offline.md - Model Inputs: features/core/inputs.md - Output Types: - Overview: features/core/output_types.md - Basic Types: features/core/output_types#basic-python-types - Multiple-Choices: features/core/output_types#multiple-choices - JSON: features/core/output_types#json-schemas - Regex: features/core/output_types#regex-patterns - Context-free Grammars: features/core/output_types#context-free-grammars - Generator: features/core/generator.md - Utilities: - Application: features/utility/application.md - Regex DSL: features/utility/regex_dsl.md - Template: features/utility/template.md - Advanced: - Logits Processors: features/advanced/logits_processors.md - Structured Generation Backends: features/advanced/backends.md - API Reference: api_reference/ - Examples: - examples/index.md - Classification: examples/classification.md - Named Entity Extraction: examples/extraction.md - Dating Profiles: examples/dating_profiles.md - Chain of Density: examples/chain_of_density.md - Playing chess: examples/models_playing_chess.md - SimTom: examples/simtom.md - Q&A with Citations: examples/qa-with-citations.md - Knowledge Graph Extraction: examples/knowledge_graph_extraction.md - Structured Generation Workflow: examples/structured_generation_workflow.md - Chain of Thought (CoT): examples/chain_of_thought.md - ReAct Agent: examples/react_agent.md - Structured Generation from PDFs: examples/read-pdfs.md - Earnings Reports to CSV: examples/earnings-reports.md - Receipt Digitization: examples/receipt-digitization.md - Extract Events Details: examples/extract_event_details.md - Run on the cloud: - BentoML: examples/deploy-using-bentoml.md - Cerebrium: examples/deploy-using-cerebrium.md - Modal: examples/deploy-using-modal.md - Community: - community/index.md - Feedback 🫶: community/feedback.md - Our Discord Server ☕: https://discord.com/invite/R9DSu34mGd - How to Contribute 🏗️: community/contribute.md - Community Projects 👏: community/examples.md - Versioning Guide 📌: community/versioning.md - Blog: https://blog.dottxt.co ================================================ FILE: outlines/__init__.py ================================================ """Outlines is a Generative Model Programming Framework.""" # re-export on top-level namespace from outlines import grammars as grammars from outlines import inputs as inputs from outlines import models as models from outlines import processors as processors from outlines import types as types from outlines.applications import Application as Application from outlines.caching import clear_cache as clear_cache from outlines.caching import disable_cache as disable_cache from outlines.caching import get_cache as get_cache from outlines.generator import Generator as Generator from outlines.inputs import Audio as Audio from outlines.inputs import Image as Image from outlines.inputs import Video as Video from outlines.models import * # noqa: F403 from outlines.templates import Template as Template from outlines.templates import Vision as Vision from outlines.types import cfg as cfg from outlines.types import json_schema as json_schema from outlines.types import regex as regex ================================================ FILE: outlines/applications.py ================================================ """Encapsulate a prompt template and an output type into a reusable object.""" from typing import Any, Callable, Dict, Optional, Union from outlines.generator import ( BlackBoxGenerator, Generator, SteerableGenerator, ) from outlines.models.base import Model from outlines.templates import Template class Application: """ Application is a class that encapsulates a prompt template and an output type. It can be called to generate a response by providing a model, the values to be substituted in the template in a dictionary and optional inference parameters. Parameters ---------- template : Union[Template, Callable] A callable that takes arguments and returns a prompt string. output_type : Any The expected output type of the generated response. Examples -------- ```python from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer from outlines import models, Application from outlines.types import JsonType from outlines.templates import Template class OutputModel(BaseModel): result: int model = models.from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) template_string = "What is 2 times {{ num }}?" template = Template.from_string(template_string) application = Application(template, JsonType(OutputModel)) result = application(model, {"num": 3}, max_new_tokens=20) print(result) # Expected output: { "result" : 6 } ``` """ def __init__( self, template: Union[Template, Callable], output_type: Optional[Any] = None, ): """ Parameters ---------- template The template to use to build the prompt. output_type The output type provided to the generator. """ self.template = template self.output_type = output_type self.generator: Optional[Union[ BlackBoxGenerator, SteerableGenerator ]] = None self.model: Optional[Model] = None def __call__( self, model: Model, template_vars: Dict[str, Any], **inference_kwargs ) -> Any: """ Parameters ---------- model The model to use to generate the response. template_vars The variables to be substituted in the template. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The generated response. """ if model is None: raise ValueError("you must provide a model") # We save the generator to avoid creating a new one for each call. # If the model has changed since the last call, we create a new # generator. if model != self.model: self.model = model self.generator = Generator(model, self.output_type) # type: ignore prompt = self.template(**template_vars) assert self.generator is not None return self.generator(prompt, **inference_kwargs) ================================================ FILE: outlines/backends/__init__.py ================================================ """Module to define the backends in charge of creating logits processors.""" from outlines.backends.base import ( BaseBackend, LogitsProcessorType, ) from outlines.backends.llguidance import LLGuidanceBackend from outlines.backends.outlines_core import OutlinesCoreBackend from outlines.backends.xgrammar import XGrammarBackend from outlines.models import SteerableModel __all__ = [ "BaseBackend", "LogitsProcessorType", "LLGuidanceBackend", "OutlinesCoreBackend", "XGrammarBackend", "SteerableModel", "CFG_DEFAULT_BACKEND", "JSON_SCHEMA_DEFAULT_BACKEND", "REGEX_DEFAULT_BACKEND", "get_json_schema_logits_processor", "get_regex_logits_processor", "get_cfg_logits_processor", ] CFG_DEFAULT_BACKEND = "llguidance" JSON_SCHEMA_DEFAULT_BACKEND = "outlines_core" REGEX_DEFAULT_BACKEND = "outlines_core" def _get_backend(backend_name: str, model: SteerableModel) -> BaseBackend: """Create a Backend instance. Parameters ---------- backend_name: str The name of the backend to get. model: Model The Outlines model of the user. Returns ------- backend: BaseBackend The backend instance. """ if backend_name == "outlines_core": return OutlinesCoreBackend(model) elif backend_name == "xgrammar": return XGrammarBackend(model) elif backend_name == "llguidance": return LLGuidanceBackend(model) else: raise ValueError(f"Backend {backend_name} not supported") def get_json_schema_logits_processor( backend_name: str | None, model: SteerableModel, json_schema: str, ) -> LogitsProcessorType: """Create a logits processor from a JSON schema. Parameters ---------- backend_name: str | None The name of the backend to use. model: Model The Outlines model of the user. json_schema: str The JSON schema to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ backend = _get_backend( backend_name or JSON_SCHEMA_DEFAULT_BACKEND, model, ) return backend.get_json_schema_logits_processor(json_schema) def get_regex_logits_processor( backend_name: str | None, model: SteerableModel, regex: str, ) -> LogitsProcessorType: """Create a logits processor from a regex. Parameters ---------- backend_name: str | None The name of the backend to use. model: Model The Outlines model of the user. regex: str The regex to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ backend = _get_backend( backend_name or REGEX_DEFAULT_BACKEND, model, ) return backend.get_regex_logits_processor(regex) def get_cfg_logits_processor( backend_name: str | None, model: SteerableModel, grammar: str, ) -> LogitsProcessorType: """Create a logits processor from a context-free grammar. Parameters ---------- backend_name: str | None The name of the backend to use. model: Model The Outlines model of the user. grammar: str The context-free grammar to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ backend = _get_backend( backend_name or CFG_DEFAULT_BACKEND, model, ) return backend.get_cfg_logits_processor(grammar) ================================================ FILE: outlines/backends/base.py ================================================ """Base class for all backends.""" from abc import ABC, abstractmethod from typing import Any LogitsProcessorType = Any class BaseBackend(ABC): """Base class for all backends. The subclasses must implement methods that create a logits processor from a JSON schema, regex or CFG. """ @abstractmethod def get_json_schema_logits_processor( self, json_schema: str ) -> LogitsProcessorType: """Create a logits processor from a JSON schema. Parameters ---------- json_schema: str The JSON schema to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ ... @abstractmethod def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType: """Create a logits processor from a regex. Parameters ---------- regex: str The regex to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ ... @abstractmethod def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType: """Create a logits processor from a context-free grammar. Parameters ---------- grammar: str The context-free grammar to create a logits processor from. Returns ------- LogitsProcessorType The logits processor. """ ... ================================================ FILE: outlines/backends/llguidance.py ================================================ """Backend class for LLGuidance.""" import warnings from typing import TYPE_CHECKING from outlines.backends.base import BaseBackend from outlines.models import LlamaCpp, MLXLM, SteerableModel, Transformers from outlines.processors.base_logits_processor import ( OutlinesLogitsProcessor, TensorType ) if TYPE_CHECKING: from llguidance import LLGTokenizer SUPPORTED_TENSOR_LIBRARIES = ["numpy", "mlx", "torch"] class LLGuidanceLogitsProcessor(OutlinesLogitsProcessor): """Logits Processor for the LLGuidance backend.""" def __init__( self, grammar: str, llg_tokenizer, tensor_library_name: str, ) -> None: """ Parameters ---------- grammar: str The grammar spec to use to create the LLMatcher llg_tokenizer: LLTokenizer The LLGuidance tokenizer tensor_library_name: str The name of the tensor library used by the model """ self.is_first_token = True self.grammar = grammar self.llg_tokenizer = llg_tokenizer self.tensor_library_name = tensor_library_name super().__init__(tensor_library_name) def reset(self): """Ensure self._setup is called again for the next generation.""" self.is_first_token = True def _setup(self, batch_size: int) -> None: """Setup the LLMatchers, the bitmask and some functions used in the `process_logits` method. This method is called when the first token is generated instead of at initialization because we need to know the batch size. Parameters ---------- batch_size: int The batch size of the input """ from llguidance import LLMatcher self.ll_matchers = [ LLMatcher(self.llg_tokenizer, self.grammar) for _ in range(batch_size) ] # we must adapt the bitmask creation and the bias function to the # tensor library used by the model if self.tensor_library_name == "torch": import llguidance.torch self.bitmask = llguidance.torch.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size) self._bias_logits = self._bias_logits_torch elif self.tensor_library_name == "numpy": import llguidance.numpy self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size) self._bias_logits = self._bias_logits_numpy elif self.tensor_library_name == "mlx": # pragma: no cover import llguidance.numpy self.bitmask = llguidance.numpy.allocate_token_bitmask(batch_size, self.llg_tokenizer.vocab_size) self._bias_logits = self._bias_logits_mlx else: # pragma: no cover raise ValueError(f"Unsupported tensor library: {self.tensor_library_name}") def _bias_logits_mlx( # pragma: no cover self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Bias the logits for the MLX backend.""" import llguidance.mlx import llguidance.numpy biased_logits_array = [] for i in range(self.tensor_adapter.shape(input_ids)[0]): llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i) biased_logits = llguidance.mlx.apply_token_bitmask( logits[i], self.bitmask[i] # type: ignore ) biased_logits_array.append(biased_logits) return self.tensor_adapter.concatenate(biased_logits_array) def _bias_logits_torch( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Bias the logits for the Torch backend.""" import llguidance.torch for i in range(self.tensor_adapter.shape(input_ids)[0]): llguidance.torch.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i) self.bitmask = self.tensor_adapter.to_device( self.bitmask, self.tensor_adapter.get_device(logits) ) llguidance.torch.apply_token_bitmask_inplace( logits[i], # type: ignore self.bitmask[i] ) self.bitmask = self.tensor_adapter.to_device( self.bitmask, "cpu" ) return logits def _bias_logits_numpy( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Bias the logits for the Numpy backend.""" import llguidance.numpy for i in range(self.tensor_adapter.shape(input_ids)[0]): llguidance.numpy.fill_next_token_bitmask(self.ll_matchers[i], self.bitmask, i) llguidance.numpy.apply_token_bitmask_inplace( logits[i], self.bitmask[i] # type: ignore ) return logits def process_logits( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Use the instances of LLMatcher to bias the logits. Parameters ---------- input_ids The ids of the tokens of the existing sequences. logits The logits for the current generation step. Returns ------- TensorType The biased logits. """ if self.is_first_token: self._setup(self.tensor_adapter.shape(input_ids)[0]) self.is_first_token = False # we do not make the matchers consume the last token during the first # generation step because no tokens have been generated yet else: for i in range(self.tensor_adapter.shape(input_ids)[0]): sequence = input_ids[i] # type: ignore last_token = sequence[-1].item() self.ll_matchers[i].consume_token(last_token) error = self.ll_matchers[i].get_error() if error: warnings.warn(f"Error in LLMatcher: {error}") return self._bias_logits(input_ids, logits) class LLGuidanceBackend(BaseBackend): """Backend for LLGuidance.""" def __init__(self, model: SteerableModel): """ Parameters ---------- model The Outlines model of the user. """ import llguidance as llg self.llg = llg self.tensor_library_name = model.tensor_library_name self.llg_tokenizer = self._create_llg_tokenizer(model) def _create_llg_tokenizer(self, model: SteerableModel) -> "LLGTokenizer": """Create an llg tokenizer from the Outlines model's tokenizer. Parameters ---------- model: Model The Outlines model. Returns ------- LLGTokenizer The llg tokenizer. """ if isinstance(model, Transformers): import llguidance.hf return llguidance.hf.from_tokenizer(model.hf_tokenizer) elif isinstance(model, LlamaCpp): import llama_cpp import llguidance.llamacpp vocab = llama_cpp.llama_model_get_vocab(model.model.model) return llguidance.llamacpp.lltokenizer_from_vocab(vocab) elif isinstance(model, MLXLM): # pragma: no cover import llguidance.hf return llguidance.hf.from_tokenizer( model.mlx_tokenizer._tokenizer ) else: # pragma: no cover raise ValueError( f"Unsupported model type: {type(model)}. " "Llguidance only supports LlamaCpp, MLXLM " "and Transformers models." ) def get_json_schema_logits_processor( self, json_schema: str ) -> LLGuidanceLogitsProcessor: """Create a logits processor from a JSON schema. Parameters ---------- json_schema: str The JSON schema to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ grammar_spec = self.llg.grammar_from("json_schema", json_schema) return LLGuidanceLogitsProcessor( grammar_spec, self.llg_tokenizer, self.tensor_library_name ) def get_regex_logits_processor( self, regex: str ) -> LLGuidanceLogitsProcessor: """Create a logits processor from a regex. Parameters ---------- regex: str The regex to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ grammar_spec = self.llg.grammar_from("regex", regex) return LLGuidanceLogitsProcessor( grammar_spec, self.llg_tokenizer, self.tensor_library_name ) def get_cfg_logits_processor( self, grammar: str ) -> LLGuidanceLogitsProcessor: """Create a logits processor from a context-free grammar. Parameters ---------- grammar: str The context-free grammar to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ # We try both lark and ebnf try: grammar_spec = self.llg.grammar_from("grammar", grammar) except ValueError: grammar_spec = self.llg.grammar_from("lark", grammar) return LLGuidanceLogitsProcessor( grammar_spec, self.llg_tokenizer, self.tensor_library_name ) ================================================ FILE: outlines/backends/outlines_core.py ================================================ """Backend class for Outlines Core.""" from typing import Callable, Dict, List from outlines_core import Guide, Index, Vocabulary from outlines_core.json_schema import build_regex_from_schema from outlines.backends.base import BaseBackend from outlines.models import SteerableModel from outlines.models.llamacpp import LlamaCpp from outlines.models.mlxlm import MLXLM from outlines.models.transformers import Transformers from outlines.processors.base_logits_processor import ( OutlinesLogitsProcessor, TensorType, ) class OutlinesCoreLogitsProcessor(OutlinesLogitsProcessor): """Logits processor for Outlines Core.""" def __init__(self, index: Index, tensor_library_name: str): """ Parameters ---------- index: Index The Outlines Core `Index` instance to use to create the Outlines Core `Guide` instances that will be used to bias the logits tensor_library_name: str The tensor library name to use for the logits processor. """ self.index = index self.tensor_library_name = tensor_library_name self.is_first_token = True super().__init__(tensor_library_name) def reset(self) -> None: """Reset the logits processor.""" self.is_first_token = True def _setup(self, batch_size: int, vocab_size: int) -> None: """Set the guides, bitmasks and some functions used in the `process_logits` method. This method is called when the first token is generated instead of at initialization because we need to know the batch size and the device of the logits. Parameters ---------- batch_size: int The batch size. vocab_size: int The vocabulary size. """ if self.tensor_library_name == "torch": from outlines_core.kernels.torch import allocate_token_bitmask self.allocate_token_bitmask = allocate_token_bitmask self.bias_logits = self._bias_logits_torch elif self.tensor_library_name == "numpy": from outlines_core.kernels.numpy import allocate_token_bitmask self.allocate_token_bitmask = allocate_token_bitmask self.bias_logits = self._bias_logits_numpy elif self.tensor_library_name == "mlx": # pragma: no cover from outlines_core.kernels.mlx import allocate_token_bitmask self.allocate_token_bitmask = allocate_token_bitmask self.bias_logits = self._bias_logits_mlx else: # pragma: no cover raise ValueError(f"Unsupported tensor library: {self.tensor_library_name}") self._guides = [Guide(self.index) for _ in range(batch_size)] self._bitmasks = [ self.allocate_token_bitmask(vocab_size) for _ in range(batch_size) ] def _bias_logits_mlx( # pragma: no cover self, batch_size: int, logits: TensorType ) -> TensorType: """Bias the logits for MLX tensors.""" from outlines_core.kernels.mlx import ( apply_token_bitmask, fill_next_token_bitmask, ) biased_logits_array = [] for i in range(batch_size): fill_next_token_bitmask(self._guides[i], self._bitmasks[i]) biased_logits = apply_token_bitmask( self.tensor_adapter.unsqueeze(logits[i]), # type: ignore self._bitmasks[i], # type: ignore ) biased_logits_array.append(biased_logits) return self.tensor_adapter.concatenate(biased_logits_array) def _bias_logits_torch(self, batch_size: int, logits: TensorType) -> TensorType: """Bias the logits for Torch tensors.""" from outlines_core.kernels.torch import ( apply_token_bitmask_inplace, fill_next_token_bitmask, ) for i in range(batch_size): fill_next_token_bitmask(self._guides[i], self._bitmasks[i]) self._bitmasks[i] = self.tensor_adapter.to_device( self._bitmasks[i], self.tensor_adapter.get_device(logits) ) apply_token_bitmask_inplace( self.tensor_adapter.unsqueeze(logits[i]), # type: ignore self._bitmasks[i], ) self._bitmasks[i] = self.tensor_adapter.to_device(self._bitmasks[i], "cpu") return logits def _bias_logits_numpy(self, batch_size: int, logits: TensorType) -> TensorType: """Bias the logits for Numpy tensors.""" from outlines_core.kernels.numpy import ( apply_token_bitmask_inplace, fill_next_token_bitmask, ) for i in range(batch_size): fill_next_token_bitmask(self._guides[i], self._bitmasks[i]) apply_token_bitmask_inplace( self.tensor_adapter.unsqueeze(logits[i]), # type: ignore self._bitmasks[i], ) return logits def process_logits(self, input_ids: TensorType, logits: TensorType) -> TensorType: """Use the guides to bias the logits. Parameters ---------- input_ids The ids of the tokens of the existing sequences. logits The logits for the current generation step. Returns ------- TensorType The biased logits. """ batch_size = self.tensor_adapter.shape(input_ids)[0] vocab_size = self.tensor_adapter.shape(logits)[1] if self.is_first_token: self._setup(batch_size, vocab_size) self.is_first_token = False else: for i in range(batch_size): last_token_id = self.tensor_adapter.to_scalar(input_ids[i][-1]) # type: ignore # This circumvents issue #227 in outlines_core # Ideally, we would be able to advance all the times as the final # state would accept the eos token leading to itself if not self._guides[i].is_finished() or self._guides[i].accepts_tokens( [last_token_id] ): self._guides[i].advance(token_id=last_token_id, return_tokens=False) return self.bias_logits(batch_size, logits) class OutlinesCoreBackend(BaseBackend): """Backend for Outlines Core.""" def __init__(self, model: SteerableModel): """ Parameters ---------- model The Outlines model of the user. """ if isinstance(model, Transformers): tokenizer = model.tokenizer vocabulary = tokenizer.get_vocab() eos_token_id = tokenizer.eos_token_id eos_token = tokenizer.eos_token token_to_str = tokenizer.convert_token_to_string elif isinstance(model, LlamaCpp): tokenizer = model.tokenizer # type: ignore vocabulary = tokenizer.vocabulary eos_token_id = tokenizer.eos_token_id eos_token = tokenizer.eos_token token_to_str = tokenizer.convert_token_to_string elif isinstance(model, MLXLM): # pragma: no cover tokenizer = model.mlx_tokenizer # type: ignore vocabulary = tokenizer.get_vocab() eos_token_id = tokenizer.eos_token_id eos_token = tokenizer.eos_token token_to_str = lambda token: tokenizer.convert_tokens_to_string([token]) # type: ignore else: # pragma: no cover raise ValueError(f"Unsupported model type: {type(model)}") self.eos_token_id = eos_token_id self.vocabulary = self.create_outlines_core_vocabulary( vocabulary, eos_token_id, eos_token, token_to_str ) self.tensor_library_name = model.tensor_library_name def get_json_schema_logits_processor(self, json_schema: str): """Create a logits processor from a JSON schema. Parameters ---------- json_schema: str The JSON schema to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ regex = build_regex_from_schema(json_schema) return self.get_regex_logits_processor(regex) def get_regex_logits_processor(self, regex: str): """Create a logits processor from a regex. Parameters ---------- regex: str The regex to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ index = Index(regex, self.vocabulary) return OutlinesCoreLogitsProcessor(index, self.tensor_library_name) def get_cfg_logits_processor(self, grammar): raise NotImplementedError( "Outlines Core does not support context-free grammar." ) @staticmethod def create_outlines_core_vocabulary( vocab: Dict[str, int], eos_token_id: int, eos_token: str, token_to_str: Callable[[str], str], ) -> Vocabulary: """Create an Outlines Core Vocabulary instance. Parameters ---------- vocab: Dict[str, int] The vocabulary to create an Outlines Core vocabulary from. eos_token_id: int The EOS token ID. eos_token: str The EOS token. token_to_str: Callable[[str], str] The function to convert a token to a string. Returns ------- Vocabulary The Outlines Core Vocabulary instance. """ formatted_vocab: Dict[str, List[int]] = {} for token, token_id in vocab.items(): # This step is necessary to transform special tokens into their # string representation, in particular for spacing. We need those # string representations as outlines core first builds an FSM from # the regex provided that only contains regular strings. token_as_str = token_to_str(token) formatted_vocab.setdefault(token_as_str, []).append(token_id) formatted_vocab.pop(eos_token) return Vocabulary(eos_token_id, formatted_vocab) ================================================ FILE: outlines/backends/xgrammar.py ================================================ """Backend class for XGrammar.""" from outlines.backends.base import BaseBackend from outlines.models import SteerableModel from outlines.models.mlxlm import MLXLM from outlines.models.transformers import Transformers from outlines.processors.base_logits_processor import ( OutlinesLogitsProcessor, TensorType ) class XGrammarLogitsProcessor(OutlinesLogitsProcessor): """Logits processor for XGrammar.""" def __init__(self, compiled_grammar: str, tensor_library_name: str,): """ Parameters ---------- compiled_grammar: str The compiled grammar to use to create the logits processor. tensor_library_name: str The name of the tensor library used by the model """ import xgrammar as xgr self.xgr = xgr self.is_first_token = True self.compiled_grammar = compiled_grammar self.tensor_library_name = tensor_library_name super().__init__(tensor_library_name) def reset(self): """Ensure self._setup is called again for the next generation.""" self.is_first_token = True def _setup(self, batch_size: int, vocab_size: int) -> None: """Setup the logits processor for a new generation.""" if self.tensor_library_name == "torch": self._bias_logits = self._bias_logits_torch elif self.tensor_library_name == "mlx": # pragma: no cover self._bias_logits = self._bias_logits_mlx else: # pragma: no cover raise ValueError( f"Unsupported tensor library: {self.tensor_library_name}" ) self._matchers = [ self.xgr.GrammarMatcher(self.compiled_grammar) for _ in range(batch_size) ] self._bitmask = self.xgr.allocate_token_bitmask(batch_size, vocab_size) def _bias_logits_torch( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Bias the logits for Torch tensors.""" for i in range(self.tensor_adapter.shape(input_ids)[0]): if not self._matchers[i].is_terminated(): self._matchers[i].fill_next_token_bitmask(self._bitmask, i) self._bitmask = self.tensor_adapter.to_device( self._bitmask, self.tensor_adapter.get_device(logits) ) self.xgr.apply_token_bitmask_inplace(logits, self._bitmask) self._bitmask = self.tensor_adapter.to_device( self._bitmask, "cpu" ) return logits def _bias_logits_mlx( # pragma: no cover self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Bias the logits for MLX tensors.""" import mlx.core as mx from xgrammar.kernels.apply_token_bitmask_mlx import apply_token_bitmask_mlx for i in range(self.tensor_adapter.shape(input_ids)[0]): if not self._matchers[i].is_terminated(): self._matchers[i].fill_next_token_bitmask(self._bitmask, i) biased_logits = apply_token_bitmask_mlx( mx.array(self._bitmask.numpy()), logits, self.tensor_adapter.shape(logits)[1] ) return biased_logits def process_logits( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Use the XGrammar matchers to bias the logits.""" batch_size = self.tensor_adapter.shape(input_ids)[0] vocab_size = self.tensor_adapter.shape(logits)[1] if self.is_first_token: self._setup(batch_size, vocab_size) self.is_first_token = False else: for i in range(batch_size): if not self._matchers[i].is_terminated(): # pragma: no cover last_token_id = self.tensor_adapter.to_scalar( input_ids[i][-1] # type: ignore ) assert self._matchers[i].accept_token(last_token_id) return self._bias_logits(input_ids, logits) class XGrammarBackend(BaseBackend): """Backend for XGrammar.""" def __init__(self, model: SteerableModel): """ Parameters ---------- model The Outlines model of the user. """ import xgrammar as xgr if isinstance(model, Transformers): tokenizer = model.hf_tokenizer elif isinstance(model, MLXLM): # pragma: no cover tokenizer = model.mlx_tokenizer._tokenizer else: # pragma: no cover raise ValueError( "The xgrammar backend only supports Transformers and " + "MLXLM models" ) tokenizer_info = xgr.TokenizerInfo.from_huggingface( tokenizer, vocab_size=len(tokenizer.get_vocab()) ) self.grammar_compiler = xgr.GrammarCompiler(tokenizer_info) self.tensor_library_name = model.tensor_library_name def get_json_schema_logits_processor( self, json_schema: str ) -> XGrammarLogitsProcessor: """Create a logits processor from a JSON schema. Parameters ---------- json_schema: str The JSON schema to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ compiled_grammar = self.grammar_compiler.compile_json_schema( json_schema ) return XGrammarLogitsProcessor( compiled_grammar, self.tensor_library_name ) def get_regex_logits_processor( self, regex: str ) -> XGrammarLogitsProcessor: """Create a logits processor from a regex. Parameters ---------- regex: str The regex to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ compiled_grammar = self.grammar_compiler.compile_regex(regex) return XGrammarLogitsProcessor( compiled_grammar, self.tensor_library_name ) def get_cfg_logits_processor( self, grammar: str ) -> XGrammarLogitsProcessor: """Create a logits processor from a context-free grammar. Parameters ---------- grammar: str The context-free grammar to create a logits processor from. Returns ------- LogitsProcessor The logits processor to use to constrain the generation. """ compiled_grammar = self.grammar_compiler.compile_grammar(grammar) return XGrammarLogitsProcessor( compiled_grammar, self.tensor_library_name ) ================================================ FILE: outlines/caching.py ================================================ """Caching and memoization of function calls.""" import asyncio import contextlib import functools import os import tempfile from typing import Callable, Optional import cloudpickle from diskcache import Cache, Disk from diskcache.core import ENOVAL, UNKNOWN, args_to_key, full_name _caching_enabled = True class CloudpickleDisk(Disk): # pragma: no cover def __init__(self, directory, compress_level=1, **kwargs): self.compress_level = compress_level super().__init__(directory, **kwargs) def put(self, key): data = cloudpickle.dumps(key) return super().put(data) def get(self, key, raw): data = super().get(key, raw) return cloudpickle.loads(data) def store(self, value, read, key=UNKNOWN): if not read: value = cloudpickle.dumps(value) return super().store(value, read, key=key) def fetch(self, mode, filename, value, read): data = super().fetch(mode, filename, value, read) if not read: data = cloudpickle.loads(data) return data @functools.lru_cache(1) def get_cache(): """Get the context object that contains previously-computed return values. The cache is used to avoid unnecessary computations and API calls, which can be long and expensive for large models. The cache directory defaults to `HOMEDIR/.cache/outlines`, but this choice can be overridden by the user by setting the value of the `OUTLINES_CACHE_DIR` environment variable. """ from outlines._version import __version__ as outlines_version # type: ignore outlines_cache_dir = os.environ.get("OUTLINES_CACHE_DIR") xdg_cache_home = os.environ.get("XDG_CACHE_HOME") home_dir = os.path.normpath(os.path.expanduser("~")) if outlines_cache_dir: # OUTLINES_CACHE_DIR takes precedence cache_dir = outlines_cache_dir elif xdg_cache_home: # pragma: no cover cache_dir = os.path.join(xdg_cache_home, "outlines") elif home_dir != "/": # pragma: no cover cache_dir = os.path.join(home_dir, ".cache", "outlines") else: # pragma: no cover # home_dir may be / inside a docker container without existing user tempdir = tempfile.gettempdir() cache_dir = os.path.join(tempdir, ".cache", "outlines") memory = Cache( cache_dir, eviction_policy="none", cull_limit=0, disk=CloudpickleDisk, ) # ensure if version upgrade occurs, old cache is pruned if outlines_version != memory.get("__version__"): memory.clear() memory["__version__"] = outlines_version return memory def cache(expire: Optional[float] = None, typed=False, ignore=()): """Caching decorator for memoizing function calls. The cache key is created based on the values returned by the key_function callable if provided or based on the arguments of the decorated function directly otherwise This is based on `diskcache`'s `memoize`. Parameters ---------- expire Seconds until arguments expire. typed Cache different types separately. ignore Positional or keyword arguments to ignore. Returns ------- A decorator function that can be applied to other functions. """ def decorator(cached_function: Callable): memory = get_cache() base = (full_name(cached_function),) if asyncio.iscoroutinefunction(cached_function): # pragma: no cover async def wrapper(*args, **kwargs): if not _caching_enabled: return await cached_function(*args, **kwargs) cache_key = wrapper.__cache_key__(*args, **kwargs) result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True) if result is ENOVAL: result = await cached_function(*args, **kwargs) wrapper.__memory__.set(cache_key, result, expire, retry=True) return result else: def wrapper(*args, **kwargs): if not _caching_enabled: return cached_function(*args, **kwargs) cache_key = wrapper.__cache_key__(*args, **kwargs) result = wrapper.__memory__.get(cache_key, default=ENOVAL, retry=True) if result is ENOVAL: result = cached_function(*args, **kwargs) wrapper.__memory__.set(cache_key, result, expire, retry=True) return result def __cache_key__(*args, **kwargs): """Make key for cache given function arguments.""" return args_to_key(base, args, kwargs, typed, ignore) wrapper.__cache_key__ = __cache_key__ # type: ignore wrapper.__memory__ = memory # type: ignore wrapper.__wrapped__ = cached_function # type: ignore return wrapper return decorator def disable_cache(): """Disable the cache for this session. Generative models output different results each time they are called when sampling. This can be a desirable property for some workflows, in which case one can call `outlines.call.disable` to disable the cache for the session. This function does not delete the cache, call `outlines.cache.clear` instead. It also does not overwrite the cache with the values returned during the session. Example ------- `outlines.cache.disable` should be called right after importing outlines: >>> import outlines.caching as cache >>> cache.disable_cache() """ global _caching_enabled _caching_enabled = False def clear_cache(): """Erase the cache completely.""" memory = get_cache() memory.clear() @contextlib.contextmanager def cache_disabled(): # outlines.caching._caching_enabled global _caching_enabled original_state = _caching_enabled _caching_enabled = False try: yield finally: _caching_enabled = original_state ================================================ FILE: outlines/generator.py ================================================ """Encapsulate a model and an output type into a reusable object.""" from typing import ( Any, AsyncIterator, Iterator, List, Optional, Union, ) from outlines.models import ( AsyncBlackBoxModel, BlackBoxModel, SteerableModel, ) from outlines.models.base import AsyncModel, Model from outlines.backends import ( get_cfg_logits_processor, get_json_schema_logits_processor, get_regex_logits_processor, ) from outlines.backends.base import LogitsProcessorType from outlines.types import CFG, JsonSchema from outlines.types.dsl import python_types_to_terms, to_regex class BlackBoxGenerator: """Synchronous generator for which we don't control constrained generation. The output type provided is not compiled into a logits processor, but is instead directly passed on to the model. """ output_type: Optional[Any] def __init__(self, model: BlackBoxModel, output_type: Optional[Any]): """ Parameters ---------- model An instance of an Outlines model. output_type The output type that will be used to constrain the generation. """ self.model = model self.output_type = output_type def __call__(self, prompt: Any, **inference_kwargs) -> Any: """Generate a response from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ return self.model.generate( prompt, self.output_type, **inference_kwargs ) def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]: """Generate a batch of responses from the model. Parameters ---------- prompts The list of prompts to use to generate a batch of responses. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ return self.model.generate_batch( prompts, self.output_type, **inference_kwargs ) def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]: """Generate a stream of responses from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ return self.model.generate_stream( prompt, self.output_type, **inference_kwargs ) class AsyncBlackBoxGenerator: """Asynchronous generator for which we don't control constrained generation. The output type provided is not compiled into a logits processor, but is instead directly passed on to the model. """ output_type: Optional[Any] def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[Any]): """ Parameters ---------- model An instance of an Outlines model. output_type The output type that will be used to constrain the generation. """ self.model = model self.output_type = output_type async def __call__(self, prompt: Any, **inference_kwargs) -> Any: """Generate a response from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ return await self.model.generate( prompt, self.output_type, **inference_kwargs ) async def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]: """Generate a batch of responses from the model. Parameters ---------- prompts The list of prompts to use to generate a batch of responses. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ return await self.model.generate_batch( prompts, self.output_type, **inference_kwargs ) async def stream(self, prompt: Any, **inference_kwargs) -> AsyncIterator[Any]: """Generate a stream of responses from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ async for chunk in self.model.generate_stream( # pragma: no cover prompt, self.output_type, **inference_kwargs ): yield chunk class SteerableGenerator: """Represents a generator for which we control constrained generation. The generator is responsible for building and storing the logits processor (which can be quite expensive to build), and then passing it to the model when the generator is called. The argument defining constrained generation can be of 2 types associated to different methods to create an instance of the generator: - `output_type` (through `__init__`): an output type as defined in the `outlines.types` module - `processor` (through `from_processor`): an already built logits processor as defined in the `outlines.processors` module The 2 parameters are mutually exclusive. """ logits_processor: Optional[LogitsProcessorType] def __init__( self, model: SteerableModel, output_type: Optional[Any], backend_name: Optional[str] = None, ): """ Parameters ---------- model An instance of an Outlines model. output_type The output type expressed as a Python type backend_name The name of the backend to use to create the logits processor. """ self.model = model if output_type is None: self.logits_processor = None else: term = python_types_to_terms(output_type) if isinstance(term, CFG): cfg_string = term.definition self.logits_processor = get_cfg_logits_processor( backend_name, model, cfg_string, ) elif isinstance(term, JsonSchema): self.logits_processor = get_json_schema_logits_processor( backend_name, model, term.schema, ) else: regex_string = to_regex(term) self.logits_processor = get_regex_logits_processor( backend_name, model, regex_string, ) @classmethod def from_processor( cls, model: SteerableModel, processor: LogitsProcessorType ): """Create a generator from a logits processor. Parameters ---------- model An instance of an Outlines model. processor An instance of a logits processor. """ instance = cls.__new__(cls) instance.model = model instance.logits_processor = processor return instance def __call__(self, prompt: Any, **inference_kwargs) -> Any: """Generate a response from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ if self.logits_processor is not None: self.logits_processor.reset() return self.model.generate( prompt, self.logits_processor, **inference_kwargs ) def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]: """Generate a batch of responses from the model. Parameters ---------- prompts The list of prompts to use to generate a batch of responses. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ if self.logits_processor is not None: self.logits_processor.reset() return self.model.generate_batch( prompts, self.logits_processor, **inference_kwargs ) def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]: """Generate a stream of responses from the model. Parameters ---------- prompt The prompt to use to generate a response. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ if self.logits_processor is not None: self.logits_processor.reset() return self.model.generate_stream( prompt, self.logits_processor, **inference_kwargs ) def Generator( model: Union[Model, AsyncModel], output_type: Optional[Any] = None, backend: Optional[str] = None, *, processor: Optional[LogitsProcessorType] = None, ) -> Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator]: """Create a generator for the given model and output parameters. The 2 parameters output_type and processor are mutually exclusive. The parameters processor is only supported for SteerableModel instances (typically local models) and is intended to be only used by advanced users. Parameters ---------- model An instance of an Outlines model. output_type The output type expressed as a Python type or a type defined in the outlines.types.dsl module. backend The name of the backend to use to create the logits processor. Only used for steerable models if there is an output type and `processor` is not provided. processor An instance of a logits processor. Returns ------- Union[SteerableGenerator, BlackBoxGenerator, AsyncBlackBoxGenerator] A generator instance. """ provided_output_params = sum( param is not None for param in [output_type, processor] ) if provided_output_params > 1: raise ValueError( "At most one of output_type or processor can be provided" ) if isinstance(model, SteerableModel): # type: ignore if processor is not None: return SteerableGenerator.from_processor(model, processor) # type: ignore else: return SteerableGenerator(model, output_type, backend) # type: ignore else: if processor is not None: raise NotImplementedError( "This model does not support logits processors" ) if isinstance(model, AsyncBlackBoxModel): # type: ignore return AsyncBlackBoxGenerator(model, output_type) # type: ignore elif isinstance(model, BlackBoxModel): # type: ignore return BlackBoxGenerator(model, output_type) # type: ignore else: raise ValueError( "The model argument must be an instance of " "SteerableModel, BlackBoxModel or AsyncBlackBoxModel" ) ================================================ FILE: outlines/grammars/arithmetic.lark ================================================ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE ================================================ FILE: outlines/grammars/common.lark ================================================ // Adapted from https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark // Lark License: // Copyright © 2017 Erez Shinan // // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // Basic terminals for common use // // Numbers // DIGIT: "0".."9" HEXDIGIT: "a".."f"|"A".."F"|DIGIT INT: DIGIT+ SIGNED_INT: ["+"|"-"] INT DECIMAL: INT "." INT? | "." INT // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ _EXP: ("e"|"E") SIGNED_INT FLOAT: INT _EXP | DECIMAL _EXP? SIGNED_FLOAT: ["+"|"-"] FLOAT NUMBER: FLOAT | INT SIGNED_NUMBER: ["+"|"-"] NUMBER UNESCAPED_STRING: /\"[^"]*\"/ // based on `outlines/fsm/json_schema.py` _NON_CONTROL_CHAR: /([^"\\\x00-\x1F\x7F-\x9F])/ _ESCAPED_CHAR: /\\/ (_NON_CONTROL_CHAR | /\\/ | /"/) ESCAPED_STRING_INNER: _NON_CONTROL_CHAR | _ESCAPED_CHAR ESCAPED_STRING: /"/ ESCAPED_STRING_INNER* /"/ // // Names (Variables) // LCASE_LETTER: "a".."z" UCASE_LETTER: "A".."Z" LETTER: UCASE_LETTER | LCASE_LETTER WORD: LETTER+ CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* // // Whitespace // WS_INLINE: (" "|/\t/)+ WS: /[ \t\f\r\n]/+ CR : /\r/ LF : /\n/ NEWLINE: (CR? LF)+ // Comments SH_COMMENT: /#[^\n]*/ CPP_COMMENT: /\/\/[^\n]*/ C_COMMENT: "/*" /(.|\n)*?/ "*/" SQL_COMMENT: /--[^\n]*/ ================================================ FILE: outlines/grammars/json.lark ================================================ ?start: value ?value: object | array | ESCAPED_STRING | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : ESCAPED_STRING ":" value %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS ================================================ FILE: outlines/grammars.py ================================================ """A few common Lark grammars.""" from pathlib import Path GRAMMAR_PATH = Path(__file__).parent / "grammars" def read_grammar( grammar_file_name: str, base_grammar_path: Path = GRAMMAR_PATH, ) -> str: """Read grammar file from default grammar path. Parameters ---------- grammar_file_name The name of the grammar file to read. base_grammar_path The path to the directory containing the grammar file. Returns ------- str The contents of the grammar file. """ full_path = base_grammar_path / grammar_file_name with open(full_path) as file: return file.read() arithmetic = read_grammar("arithmetic.lark") json = read_grammar("json.lark") ================================================ FILE: outlines/inputs.py ================================================ """Contain classes used to define the inputs of a model.""" import base64 from dataclasses import dataclass from io import BytesIO from typing import Any, Dict, List, Optional from PIL import Image as PILImage @dataclass class Image: """Contains an image that can be passed to a multimodal model. Provide one or several instances of this class along with a text prompt in a list as the `model_input` argument to a model that supports vision. Parameters ---------- image The image to use in the text generation. """ image: PILImage.Image def __post_init__(self): image = self.image if not image.format: raise TypeError( "Could not read the format of the image passed to the model." ) buffer = BytesIO() image.save(buffer, format=image.format) self.image_str = base64.b64encode(buffer.getvalue()).decode("utf-8") self.image_format = f"image/{image.format.lower()}" @dataclass class Video: """Contains a video that can be passed to a multimodal model. Provide one or several instances of this class along with a text prompt in a list as the `model_input` argument to a model that supports video processing. Parameters ---------- video The video to use in the text generation. """ video: Any @dataclass class Audio: """Contains an audio that can be passed to a multimodal model. Provide one or several instances of this class along with a text prompt in a list as the `model_input` argument to a model that supports audio processing. Parameters ---------- audio The audio to use in the text generation. """ audio: Any @dataclass class Chat: """Contains the input for a chat model. Provide an instance of this class as the `model_input` argument to a model that supports chat. Each message contained in the messages list must be a dict with 'role' and 'content' keys. The role can be 'user', 'assistant', or 'system'. The content supports either: - a text string, - a list containing text and assets (e.g., ["Describe...", Image(...)]), - only for HuggingFace transformers models, a list of dict items with explicit types (e.g., [{"type": "text", "text": "Describe..."}, {"type": "image", "image": Image(...)}]) Examples -------- ```python # Initialize the chat with a system message. chat_prompt = Chat([ {"role": "system", "content": "You are a helpful assistant."}, ]) # Add a user message with an image and call the model (not shown here). chat_prompt.add_user_message(["Describe the image below", Image(image)]) # Add as an assistant message the response from the model. chat_prompt.add_assistant_message("There is a black cat sitting on a couch.") ``` Parameters ---------- messages The list of messages that will be provided to the model. """ messages: List[Dict[str, Any]] = None # type: ignore def __post_init__(self): if self.messages is None: self.messages = [] def append(self, message: Dict[str, Any]): """Add a message to the chat. Parameters ---------- message The message to add to the chat. """ self.messages.append(message) def extend(self, messages: List[Dict[str, Any]]): """Add a list of messages to the chat. Parameters ---------- messages The list of messages to add to the chat. """ self.messages.extend(messages) def pop(self) -> Dict[str, Any]: """Remove the last message from the chat. Returns ------- message The removed message. """ return self.messages.pop() def add_system_message(self, content: str | List[Any]): """Add a system message to the chat. Parameters ---------- content The content of the system message. """ self.messages.append({"role": "system", "content": content}) def add_user_message(self, content: str | List[Any]): """Add a user message to the chat. Parameters ---------- content The content of the user message. """ self.messages.append({"role": "user", "content": content}) def add_assistant_message(self, content: str | List[Any]): """Add an assistant message to the chat. Parameters ---------- content The content of the assistant message. """ self.messages.append({"role": "assistant", "content": content}) def __str__(self): return "\n".join(str(message) for message in self.messages) def __repr__(self): return f"Chat(messages={self.messages})" ================================================ FILE: outlines/models/__init__.py ================================================ """Module that contains all the models integrated in outlines. We group the models in submodules by provider instead of theme (completion, chat completion, diffusers, etc.) and use routing functions everywhere else in the codebase. """ from typing import Union from .anthropic import Anthropic, from_anthropic from .base import Model, ModelTypeAdapter from .dottxt import Dottxt, from_dottxt from .gemini import Gemini, from_gemini from .llamacpp import LlamaCpp, from_llamacpp from .lmstudio import AsyncLMStudio, LMStudio, from_lmstudio from .mistral import AsyncMistral, Mistral, from_mistral from .mlxlm import MLXLM, from_mlxlm from .ollama import AsyncOllama, Ollama, from_ollama from .openai import AsyncOpenAI, OpenAI, from_openai from .sglang import AsyncSGLang, SGLang, from_sglang from .tgi import TGI, AsyncTGI, from_tgi from .transformers import ( Transformers, TransformersMultiModal, TransformerTokenizer, from_transformers, ) from .vllm import VLLM, AsyncVLLM, from_vllm from .vllm_offline import VLLMOffline, from_vllm_offline SteerableModel = Union[LlamaCpp, MLXLM, Transformers] BlackBoxModel = Union[ Anthropic, Dottxt, Gemini, LMStudio, Ollama, OpenAI, Mistral, SGLang, TGI, VLLM, VLLMOffline, ] AsyncBlackBoxModel = Union[ AsyncLMStudio, AsyncMistral, AsyncOllama, AsyncOpenAI, AsyncTGI, AsyncSGLang, AsyncVLLM, ] __all__ = [ "Anthropic", "from_anthropic", "Model", "ModelTypeAdapter", "Dottxt", "from_dottxt", "Gemini", "from_gemini", "LlamaCpp", "from_llamacpp", "AsyncLMStudio", "LMStudio", "from_lmstudio", "AsyncMistral", "Mistral", "from_mistral", "MLXLM", "from_mlxlm", "AsyncOllama", "Ollama", "from_ollama", "AsyncOpenAI", "OpenAI", "from_openai", "AsyncSGLang", "SGLang", "from_sglang", "AsyncTGI", "TGI", "from_tgi", "Transformers", "TransformerTokenizer", "TransformersMultiModal", "from_transformers", "VLLMOffline", "from_vllm_offline", "AsyncVLLM", "VLLM", "from_vllm", "SteerableModel", "BlackBoxModel", "AsyncBlackBoxModel", ] ================================================ FILE: outlines/models/anthropic.py ================================================ """Integration with Anthropic's API.""" from functools import singledispatchmethod from typing import TYPE_CHECKING, Any, Iterator, Optional, Union from outlines.inputs import Chat, Image from outlines.models.base import Model, ModelTypeAdapter if TYPE_CHECKING: from anthropic import Anthropic as AnthropicClient __all__ = ["Anthropic", "from_anthropic"] class AnthropicTypeAdapter(ModelTypeAdapter): """Type adapter for the `Anthropic` model. `AnthropicTypeAdapter` is responsible for preparing the arguments to Anthropic's `messages.create` method: the input (prompt and possibly image). Anthropic does not support defining the output type, so `format_output_type` is not implemented. """ @singledispatchmethod def format_input(self, model_input): """Generate the `messages` argument to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- dict The `messages` argument to pass to the client. """ raise TypeError( f"The input type {type(model_input)} is not available with " "Anthropic. The only available types are `str`, `list` and `Chat` " "(containing a prompt and images)." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> dict: return { "messages": [self._create_message("user", model_input)] } @format_input.register(list) def format_list_model_input(self, model_input: list) -> dict: return { "messages": [ self._create_message("user", model_input) ] } @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> dict: """Generate the `messages` argument to pass to the client when the user passes a Chat instance. """ return { "messages": [ self._create_message(message["role"], message["content"]) for message in model_input.messages ] } def _create_message(self, role: str, content: str | list) -> dict: """Create a message.""" if isinstance(content, str): return { "role": role, "content": content, } elif isinstance(content, list): prompt = content[0] images = content[1:] if not all(isinstance(image, Image) for image in images): raise ValueError("All assets provided must be of type Image") image_content_messages = [ { "type": "image", "source": { "type": "base64", "media_type": image.image_format, "data": image.image_str, }, } for image in images ] return { "role": role, "content": [ *image_content_messages, {"type": "text", "text": prompt}, ], } else: raise ValueError( f"Invalid content type: {type(content)}. " "The content must be a string or a list containing a string " "and a list of images." ) def format_output_type(self, output_type): """Not implemented for Anthropic.""" if output_type is None: return {} else: raise NotImplementedError( f"The output type {output_type} is not available with " "Anthropic." ) class Anthropic(Model): """Thin wrapper around the `anthropic.Anthropic` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `anthropic.Anthropic` client. """ def __init__( self, client: "AnthropicClient", model_name: Optional[str] = None ): """ Parameters ---------- client An `anthropic.Anthropic` client. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = AnthropicTypeAdapter() def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> str: """Generate text using Anthropic. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- str The response generated by the model. """ messages = self.type_adapter.format_input(model_input) if output_type is not None: raise NotImplementedError( f"The type {output_type} is not available with Anthropic." ) if ( "model" not in inference_kwargs and self.model_name is not None ): inference_kwargs["model"] = self.model_name completion = self.client.messages.create( **messages, **inference_kwargs, ) return completion.content[0].text def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "Anthropic does not support batch generation." ) def generate_stream( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Iterator[str]: """Stream text using Anthropic. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type As structured generation is not supported by Anthropic, the value of this argument must be `None`. Otherwise, an error will be raised at runtime. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ messages = self.type_adapter.format_input(model_input) if output_type is not None: raise NotImplementedError( f"The type {output_type} is not available with Anthropic." ) if ( "model" not in inference_kwargs and self.model_name is not None ): inference_kwargs["model"] = self.model_name stream = self.client.messages.create( **messages, stream=True, **inference_kwargs, ) for chunk in stream: if ( chunk.type == "content_block_delta" and chunk.delta.type == "text_delta" ): yield chunk.delta.text def from_anthropic( client: "AnthropicClient", model_name: Optional[str] = None ) -> Anthropic: """Create an Outlines `Anthropic` model instance from an `anthropic.Anthropic` client instance. Parameters ---------- client An `anthropic.Anthropic` client instance. model_name The name of the model to use. Returns ------- Anthropic An Outlines `Anthropic` model instance. """ return Anthropic(client, model_name) ================================================ FILE: outlines/models/base.py ================================================ """Base classes for all models and model type adapters.""" from abc import ABC, abstractmethod from typing import Any, AsyncIterator, Iterator, List, Optional class ModelTypeAdapter(ABC): """Base class for all model type adapters. A type adapter instance must be given as a value to the `type_adapter` attribute when instantiating a model. The type adapter is responsible for formatting the input and output types passed to the model to match the specific format expected by the associated model. """ @abstractmethod def format_input(self, model_input: Any) -> Any: """Format the user input to the expected format of the model. For API-based models, it typically means creating the `messages` argument passed to the client. For local models, it can mean casting the input from str to list for instance. This method is also used to validate that the input type provided by the user is supported by the model. Parameters ---------- model_input The input provided by the user. Returns ------- Any The formatted input to be passed to the model. """ ... @abstractmethod def format_output_type(self, output_type: Optional[Any] = None) -> Any: """Format the output type to the expected format of the model. For black-box models, this typically means creating a `response_format` argument. For steerable models, it means formatting the logits processor to create the object type expected by the model. Parameters ---------- output_type The output type provided by the user. Returns ------- Any The formatted output type to be passed to the model. """ ... class Model(ABC): """Base class for all synchronous models. This class defines shared `__call__`, `batch` and `stream` methods that can be used to call the model directly. The `generate`, `generate_batch`, and `generate_stream` methods must be implemented by the subclasses. All models inheriting from this class must define a `type_adapter` attribute of type `ModelTypeAdapter`. The methods of the `type_adapter` attribute are used in the `generate`, `generate_batch`, and `generate_stream` methods to format the input and output types received by the model. Additionally, steerable models must define a `tensor_library_name` attribute. """ type_adapter: ModelTypeAdapter tensor_library_name: str def __call__( self, model_input: Any, output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> Any: """Call the model. Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) generator("prompt") ``` and ```python model("prompt", Foo) ``` Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ from outlines.generator import Generator return Generator(self, output_type, backend)(model_input, **inference_kwargs) def batch( self, model_input: List[Any], output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> List[Any]: """Make a batch call to the model (several inputs at once). Users can use the `batch` method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its `batch` method. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) generator.batch(["prompt1", "prompt2"]) ``` and ```python model.batch(["prompt1", "prompt2"], Foo) ``` Parameters ---------- model_input The list of inputs provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ from outlines import Generator generator = Generator(self, output_type, backend) return generator.batch(model_input, **inference_kwargs) # type: ignore def stream( self, model_input: Any, output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> Iterator[Any]: """Stream a response from the model. Users can use the `stream` method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its `stream` method. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) for chunk in generator("prompt"): print(chunk) ``` and ```python for chunk in model.stream("prompt", Foo): print(chunk) ``` Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Iterator[Any] A stream of responses from the model. """ from outlines import Generator generator = Generator(self, output_type, backend) return generator.stream(model_input, **inference_kwargs) # type: ignore @abstractmethod def generate( self, model_input: Any, output_type: Optional[Any] = None, **inference_kwargs: Any ) -> Any: """Generate a response from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ ... @abstractmethod def generate_batch( self, model_input: List[Any], output_type: Optional[Any] = None, **inference_kwargs: Any ) -> List[Any]: """Generate a batch of responses from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The list of inputs provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ ... @abstractmethod def generate_stream( self, model_input: Any, output_type: Optional[Any] = None, **inference_kwargs: Any ) -> Iterator[Any]: """Generate a stream of responses from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Iterator[Any] A stream of responses from the model. """ ... class AsyncModel(ABC): """Base class for all asynchronous models. This class defines shared `__call__`, `batch` and `stream` methods that can be used to call the model directly. The `generate`, `generate_batch`, and `generate_stream` methods must be implemented by the subclasses. All models inheriting from this class must define a `type_adapter` attribute of type `ModelTypeAdapter`. The methods of the `type_adapter` attribute are used in the `generate`, `generate_batch`, and `generate_stream` methods to format the input and output types received by the model. Additionally, steerable models must define a `tensor_library_name` attribute. """ type_adapter: ModelTypeAdapter tensor_library_name: str async def __call__( self, model_input: Any, output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> Any: """Call the model. Users can call the model directly, in which case we will create a generator instance with the output type provided and call it. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) await generator("prompt") ``` and ```python await model("prompt", Foo) ``` Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ from outlines import Generator generator = Generator(self, output_type, backend) return await generator(model_input, **inference_kwargs) async def batch( self, model_input: List[Any], output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> List[Any]: """Make a batch call to the model (several inputs at once). Users can use the `batch` method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its `batch` method. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) await generator.batch(["prompt1", "prompt2"]) ``` and ```python await model.batch(["prompt1", "prompt2"], Foo) ``` Parameters ---------- model_input The list of inputs provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ from outlines import Generator generator = Generator(self, output_type, backend) return await generator.batch(model_input, **inference_kwargs) # type: ignore async def stream( self, model_input: Any, output_type: Optional[Any] = None, backend: Optional[str] = None, **inference_kwargs: Any ) -> AsyncIterator[Any]: """Stream a response from the model. Users can use the `stream` method from the model directly, in which case we will create a generator instance with the output type provided and then invoke its `stream` method. Thus, those commands are equivalent: ```python generator = Generator(model, Foo) async for chunk in generator("prompt"): print(chunk) ``` and ```python async for chunk in model.stream("prompt", Foo): print(chunk) ``` Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. backend The name of the backend to use to create the logits processor that will be used to generate the response. Only used for steerable models if `output_type` is provided. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- AsyncIterator[Any] A stream of responses from the model. """ from outlines import Generator generator = Generator(self, output_type, backend) async for chunk in generator.stream(model_input, **inference_kwargs): # type: ignore yield chunk @abstractmethod async def generate( self, model_input: Any, output_type: Optional[Any] = None, **inference_kwargs: Any ) -> Any: """Generate a response from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- Any The response generated by the model. """ ... @abstractmethod async def generate_batch( self, model_input: List[Any], output_type: Optional[Any] = None, **inference_kwargs: Any ) -> List[Any]: """Generate a batch of responses from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The list of inputs provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- List[Any] The list of responses generated by the model. """ ... @abstractmethod async def generate_stream( self, model_input: Any, output_type: Optional[Any] = None, **inference_kwargs: Any ) -> AsyncIterator[Any]: """Generate a stream of responses from the model. The output_type argument contains a logits processor for steerable models while it contains a type (Json, Enum...) for black-box models. This method is not intended to be used directly by end users. Parameters ---------- model_input The input provided by the user. output_type The output type provided by the user. **inference_kwargs Additional keyword arguments to pass to the model. Returns ------- AsyncIterator[Any] A coroutine that will produce an async iterator of responses from the model. """ ... ================================================ FILE: outlines/models/dottxt.py ================================================ """Integration with Dottxt's API.""" from typing import TYPE_CHECKING, Any, Optional, cast from outlines.models.base import Model, ModelTypeAdapter from outlines.types import CFG, JsonSchema, Regex if TYPE_CHECKING: from dottxt import Dottxt as DottxtClient __all__ = ["Dottxt", "from_dottxt"] class DottxtTypeAdapter(ModelTypeAdapter): """Type adapter for the `Dottxt` model.""" def format_input(self, model_input: str) -> str: """Format the prompt to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- str The input to pass to the client. """ if isinstance(model_input, str): return model_input raise TypeError( f"The input type {model_input} is not available with Dottxt. " "The only available type is `str`." ) def format_output_type(self, output_type: Optional[Any] = None) -> str: """Format the output type to pass to the client. Parameters ---------- output_type The output type provided by the user. Returns ------- str The output type to pass to the client. """ # Unsupported languages if output_type is None: raise TypeError( "You must provide an output type. Dottxt only supports " "constrained generation." ) elif isinstance(output_type, Regex): raise TypeError( "Regex-based structured outputs will soon be available with " "Dottxt. Use an open source model in the meantime." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs will soon be available with " "Dottxt. Use an open source model in the meantime." ) elif JsonSchema.is_json_schema(output_type): return cast(str, JsonSchema.convert_to(output_type, ["str"])) else: type_name = getattr(output_type, "__name__", output_type) raise TypeError( f"The type `{type_name}` is not supported by Dottxt. " "Consider using a local mode instead." ) class Dottxt(Model): """Thin wrapper around the `dottxt.client.Dottxt` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `dottxt.client.Dottxt` client. """ def __init__( self, client: "DottxtClient", model_name: Optional[str] = None, model_revision: Optional[str] = None, ): """ Parameters ---------- client A `dottxt.Dottxt` client. model_name The name of the model to use. model_revision The revision of the model to use. """ self.client = client self.model_name = model_name self.model_revision = model_revision self.type_adapter = DottxtTypeAdapter() def generate( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> str: """Generate text using Dottxt. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- str The text generated by the model. """ prompt = self.type_adapter.format_input(model_input) json_schema = self.type_adapter.format_output_type(output_type) if ( "model_name" not in inference_kwargs and self.model_name is not None ): inference_kwargs["model_name"] = self.model_name if ( "model_revision" not in inference_kwargs and self.model_revision is not None ): inference_kwargs["model_revision"] = self.model_revision completion = self.client.json( prompt, json_schema, **inference_kwargs, ) return completion.data def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "Dottxt does not support batch generation." ) def generate_stream( self, model_input, output_type=None, **inference_kwargs, ): """Not available for Dottxt.""" raise NotImplementedError( "Dottxt does not support streaming. Call the model/generator for " + "regular generation instead." ) def from_dottxt( client: "DottxtClient", model_name: Optional[str] = None, model_revision: Optional[str] = None, ) -> Dottxt: """Create an Outlines `Dottxt` model instance from a `dottxt.Dottxt` client instance. Parameters ---------- client A `dottxt.Dottxt` client instance. model_name The name of the model to use. model_revision The revision of the model to use. Returns ------- Dottxt An Outlines `Dottxt` model instance. """ return Dottxt(client, model_name, model_revision) ================================================ FILE: outlines/models/gemini.py ================================================ """Integration with Gemini's API.""" from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, Iterator, Optional, Union, get_args, ) from outlines.inputs import Image, Chat from outlines.models.base import Model, ModelTypeAdapter from outlines.types import CFG, Choice, JsonSchema, Regex from outlines.types.utils import ( is_enum, get_enum_from_choice, get_enum_from_literal, is_genson_schema_builder, is_literal, is_typing_list, ) if TYPE_CHECKING: from google.genai import Client __all__ = ["Gemini", "from_gemini"] class GeminiTypeAdapter(ModelTypeAdapter): """Type adapter for the `Gemini` model. `GeminiTypeAdapter` is responsible for preparing the arguments to Gemini's client `models.generate_content` method: the input (prompt and possibly image), as well as the output type (either JSON or multiple choice). """ @singledispatchmethod def format_input(self, model_input): """Generate the `contents` argument to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- dict The `contents` argument to pass to the client. """ raise TypeError( f"The input type {type(model_input)} is not available with " "Gemini. The only available types are `str`, `list` and `Chat` " "(containing a prompt and images)." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> dict: return {"contents": [self._create_text_part(model_input)]} @format_input.register(list) def format_list_model_input(self, model_input: list) -> dict: return { "contents": [ self._create_message("user", model_input) ] } @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> dict: """Generate the `contents` argument to pass to the client when the user passes a Chat instance. """ return { "contents": [ self._create_message(message["role"], message["content"]) for message in model_input.messages ] } def _create_message(self, role: str, content: str | list) -> dict: """Create a message.""" # Gemini uses "model" instead of "assistant" if role == "assistant": role = "model" if isinstance(content, str): return { "role": role, "parts": [self._create_text_part(content)], } elif isinstance(content, list): prompt = content[0] images = content[1:] if not all(isinstance(image, Image) for image in images): raise ValueError("All assets provided must be of type Image") image_parts = [ self._create_img_part(image) for image in images ] return { "role": role, "parts": [ self._create_text_part(prompt), *image_parts, ], } else: raise ValueError( f"Invalid content type: {type(content)}. " "The content must be a string or a list containing a string " "and a list of images." ) return {"contents": [prompt, *image_parts]} def _create_text_part(self, text: str) -> dict: """Create a text input part for a message.""" return { "text": text, } def _create_img_part(self, image: Image) -> dict: """Create an image input part for a message.""" return { "inline_data": { "mime_type": image.image_format, "data": image.image_str, } } def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the `generation_config` argument to pass to the client. Parameters ---------- output_type The output type provided by the user. Returns ------- dict The `generation_config` argument to pass to the client. """ # Unsupported output pytes if isinstance(output_type, Regex): raise TypeError( "Neither regex-based structured outputs nor the `pattern` " "keyword in Json Schema are available with Gemini. Use an " "open source model or dottxt instead." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs are not available with Gemini. " "Use an open source model or dottxt instead." ) if output_type is None: return {} # JSON schema types elif JsonSchema.is_json_schema(output_type): return self.format_json_output_type( JsonSchema.convert_to( output_type, ["dataclass", "typeddict", "pydantic"] ) ) # List of structured types elif is_typing_list(output_type): return self.format_list_output_type(output_type) # Multiple choice types elif is_enum(output_type): return self.format_enum_output_type(output_type) elif is_literal(output_type): enum = get_enum_from_literal(output_type) return self.format_enum_output_type(enum) elif isinstance(output_type, Choice): enum = get_enum_from_choice(output_type) return self.format_enum_output_type(enum) else: type_name = getattr(output_type, "__name__", output_type) raise TypeError( f"The type `{type_name}` is not supported by Gemini. " "Consider using a local model or dottxt instead." ) def format_enum_output_type(self, output_type: Optional[Any]) -> dict: return { "response_mime_type": "text/x.enum", "response_schema": output_type, } def format_json_output_type(self, output_type: Optional[Any]) -> dict: return { "response_mime_type": "application/json", "response_schema": output_type, } def format_list_output_type(self, output_type: Optional[Any]) -> dict: args = get_args(output_type) if len(args) == 1: item_type = args[0] if JsonSchema.is_json_schema(item_type): return { "response_mime_type": "application/json", "response_schema": list[ # type: ignore JsonSchema.convert_to( item_type, ["dataclass", "typeddict", "pydantic"] ) ], } else: raise TypeError( "The list items output type must contain a JSON schema " "type." ) raise TypeError( f"Gemini only supports homogeneous lists: " "list[BaseModel], list[TypedDict] or list[dataclass]. " f"Got {output_type} instead." ) class Gemini(Model): """Thin wrapper around the `google.genai.Client` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `google.genai.Client` client. """ def __init__(self, client: "Client", model_name: Optional[str] = None): """ Parameters ---------- client A `google.genai.Client` instance. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = GeminiTypeAdapter() def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs, ) -> str: """Generate a response from the model. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- str The response generated by the model. """ contents = self.type_adapter.format_input(model_input) generation_config = self.type_adapter.format_output_type(output_type) completion = self.client.models.generate_content( **contents, model=inference_kwargs.pop("model", self.model_name), config={**generation_config, **inference_kwargs} ) return completion.text def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "Gemini does not support batch generation." ) def generate_stream( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs, ) -> Iterator[str]: """Generate a stream of responses from the model. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema, a list of such types, or a multiple choice type. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ contents = self.type_adapter.format_input(model_input) generation_config = self.type_adapter.format_output_type(output_type) stream = self.client.models.generate_content_stream( **contents, model=inference_kwargs.pop("model", self.model_name), config={**generation_config, **inference_kwargs}, ) for chunk in stream: if hasattr(chunk, "text") and chunk.text: yield chunk.text def from_gemini(client: "Client", model_name: Optional[str] = None) -> Gemini: """Create an Outlines `Gemini` model instance from a `google.genai.Client` instance. Parameters ---------- client A `google.genai.Client` instance. model_name The name of the model to use. Returns ------- Gemini An Outlines `Gemini` model instance. """ return Gemini(client, model_name) ================================================ FILE: outlines/models/llamacpp.py ================================================ """Integration with the `llama-cpp-python` library.""" import ctypes from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Set, Tuple, Union, ) from outlines.inputs import Chat from outlines.models.base import Model, ModelTypeAdapter from outlines.models.tokenizer import Tokenizer from outlines.processors import OutlinesLogitsProcessor if TYPE_CHECKING: from llama_cpp import Llama, LogitsProcessorList __all__ = ["LlamaCpp", "from_llamacpp"] class LlamaCppTokenizer(Tokenizer): def __init__(self, model: "Llama"): self.tokenizer = model.tokenizer() self.special_tokens: Set[str] = set() self.vocabulary: Dict[str, int] = dict() # TODO: Remove when https://github.com/ggerganov/llama.cpp/pull/5613 # is resolved self._hf_tokenizer = None if ( hasattr(model, "tokenizer_") and hasattr(model.tokenizer_, "hf_tokenizer") ): self._hf_tokenizer = model.tokenizer_.hf_tokenizer self.eos_token_id = self._hf_tokenizer.eos_token_id self.eos_token = self._hf_tokenizer.eos_token self.vocabulary = self._hf_tokenizer.get_vocab() else: from llama_cpp import ( llama_model_get_vocab, llama_token_to_piece, ) self.eos_token_id = model.token_eos() size = 32 buffer = (ctypes.c_char * size)() vocab = llama_model_get_vocab(model.model) for i in range(model.n_vocab()): n = llama_token_to_piece( vocab, i, buffer, size, 0, True ) # n < 0 is an error return from llama_token_to_piece; # skip invalid tokens so they don't pollute the vocabulary. if n < 0: continue # n > size means the piece was truncated; retry with a # larger buffer so distinct tokens are not collapsed. if n > size: big = (ctypes.c_char * n)() llama_token_to_piece(vocab, i, big, n, 0, True) token_piece = big[:n].decode("utf-8", errors="replace") # type: ignore else: token_piece = buffer[:n].decode("utf-8", errors="replace") # type: ignore self.vocabulary[token_piece] = i if i == self.eos_token_id: self.eos_token = token_piece self.pad_token_id = self.eos_token_id # ensure stable ordering of vocabulary self.vocabulary = { tok: tok_id for tok, tok_id in sorted(self.vocabulary.items(), key=lambda x: x[1]) } self._hash = None def decode(self, token_ids: List[int]) -> List[str]: decoded_bytes = self.tokenizer.detokenize(token_ids) return [decoded_bytes.decode("utf-8", errors="ignore")] def encode( self, prompt: Union[str, List[str]], add_bos: bool = True, special: bool = True, ) -> Tuple[List[int], List[int]]: if isinstance(prompt, list): raise NotImplementedError( "llama-cpp-python tokenizer doesn't support batch tokenization" ) token_ids = self.tokenizer.tokenize( prompt.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special, ) # generate attention mask, missing from llama-cpp-python. # For a single (non-batched) prompt there is no real padding, so # every token — including EOS when it appears inside the prompt — # should be attended. We therefore always set the mask to 1. attention_mask = [1] * len(token_ids) return token_ids, attention_mask def convert_token_to_string(self, token: str) -> str: if self._hf_tokenizer is not None: from transformers.file_utils import SPIECE_UNDERLINE token_str = self._hf_tokenizer.convert_tokens_to_string([token]) if ( token.startswith(SPIECE_UNDERLINE) or token == "<0x20>" ): # pragma: no cover token_str = " " + token_str return token_str else: return token def __eq__(self, other): if not isinstance(other, LlamaCppTokenizer): return False return self.__getstate__() == other.__getstate__() def __hash__(self): # We create a custom hash as pickle.dumps(self) is not stable if self._hash is None: self._hash = hash(( tuple(sorted(self.vocabulary.items())), self.eos_token_id, self.eos_token, self.pad_token_id, tuple(sorted(self.special_tokens)), )) return self._hash def __getstate__(self): """Create a stable representation for outlines.caching""" return ( self.vocabulary, self.eos_token_id, self.eos_token, self.pad_token_id, sorted(self.special_tokens), ) def __setstate__(self, state): raise NotImplementedError("Cannot load a pickled llamacpp tokenizer") class LlamaCppTypeAdapter(ModelTypeAdapter): """Type adapter for the `LlamaCpp` model. `LlamaCppTypeAdapter` is responsible for preparing the arguments to the `Llama` object text generation methods. """ def __init__(self, has_chat_template: bool = False): """ Parameters ---------- has_chat_template Whether the model has a chat template defined. """ self.has_chat_template = has_chat_template @singledispatchmethod def format_input(self, model_input): """Generate the prompt argument to pass to the model. Parameters ---------- model_input The input provided by the user. Returns ------- str The formatted input to be passed to the model. """ raise NotImplementedError( f"The input type {type(model_input)} is not available with " "LlamaCpp. The only available types are `str` and `Chat`." ) @format_input.register(str) def format_str_input(self, model_input: str) -> str | list: if self.has_chat_template: return [{"role": "user", "content": model_input}] return model_input @format_input.register(Chat) def format_chat_input(self, model_input: Chat) -> list: if not all( isinstance(message["content"], str) for message in model_input.messages ): raise ValueError( "LlamaCpp does not support multi-modal messages." + "The content of each message must be a string." ) return [ { "role": message["role"], "content": message["content"], } for message in model_input.messages ] def format_output_type( self, output_type: Optional[OutlinesLogitsProcessor] = None, ) -> Optional["LogitsProcessorList"]: """Generate the logits processor argument to pass to the model. Parameters ---------- output_type The logits processor provided. Returns ------- LogitsProcessorList The logits processor to pass to the model. """ from llama_cpp import LogitsProcessorList if output_type is not None: return LogitsProcessorList([output_type]) return None class LlamaCpp(Model): """Thin wrapper around the `llama_cpp.Llama` model. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `llama_cpp.Llama` model. """ tensor_library_name = "numpy" def __init__(self, model: "Llama", chat_mode: bool = True): """ Parameters ---------- model A `llama_cpp.Llama` model instance. chat_mode Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation. """ self.model = model self.tokenizer = LlamaCppTokenizer(self.model) # Note: llama-cpp-python provides a default chat-template fallback even when # the user hasn't explicitly configured one: # https://github.com/abetlen/llama-cpp-python/blob/c37132b/llama_cpp/llama.py#L540-L545 # We keep the default as True because the upstream library generally favors chat-style usage. self.type_adapter = LlamaCppTypeAdapter(has_chat_template=chat_mode) def generate( self, model_input: Union[Chat, str], output_type: Optional[OutlinesLogitsProcessor] = None, **inference_kwargs: Any, ) -> str: """Generate text using `llama-cpp-python`. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. **inference_kwargs Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library. Returns ------- str The text generated by the model. """ prompt = self.type_adapter.format_input(model_input) if isinstance(prompt, str): completion = self.model( prompt, logits_processor=self.type_adapter.format_output_type(output_type), **inference_kwargs, ) result = completion["choices"][0]["text"] elif isinstance(prompt, list): completion = self.model.create_chat_completion( prompt, logits_processor=self.type_adapter.format_output_type(output_type), **inference_kwargs, ) result = completion["choices"][0]["message"]["content"] else: # Never reached # pragma: no cover raise ValueError("Unexpected prompt type.") self.model.reset() return result def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError("LlamaCpp does not support batch generation.") def generate_stream( self, model_input: Union[Chat, str], output_type: Optional[OutlinesLogitsProcessor] = None, **inference_kwargs: Any, ) -> Iterator[str]: """Stream text using `llama-cpp-python`. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. **inference_kwargs Additional keyword arguments to pass to the `Llama.__call__` method of the `llama-cpp-python` library. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ prompt = self.type_adapter.format_input(model_input) if isinstance(prompt, str): generator = self.model( prompt, logits_processor=self.type_adapter.format_output_type(output_type), stream=True, **inference_kwargs, ) for chunk in generator: yield chunk["choices"][0]["text"] elif isinstance(prompt, list): generator = self.model.create_chat_completion( prompt, logits_processor=self.type_adapter.format_output_type(output_type), stream=True, **inference_kwargs, ) for chunk in generator: yield chunk["choices"][0]["delta"].get("content", "") else: # Never reached # pragma: no cover raise ValueError("Unexpected prompt type.") def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp: """Create an Outlines `LlamaCpp` model instance from a `llama_cpp.Llama` instance. Parameters ---------- model A `llama_cpp.Llama` instance. chat_mode Whether to enable chat mode. If `False`, the model will regard all `str` inputs as plain text prompts. If `True`, the model will regard all `str` inputs as user messages in a chat conversation. Returns ------- LlamaCpp An Outlines `LlamaCpp` model instance. """ return LlamaCpp(model, chat_mode=chat_mode) ================================================ FILE: outlines/models/lmstudio.py ================================================ """Integration with the `lmstudio` library.""" from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union, cast, ) from outlines.inputs import Chat, Image from outlines.models.base import AsyncModel, Model, ModelTypeAdapter from outlines.types import CFG, JsonSchema, Regex if TYPE_CHECKING: from lmstudio import AsyncClient, Chat as LMStudioChat, Client __all__ = ["LMStudio", "AsyncLMStudio", "from_lmstudio"] class LMStudioTypeAdapter(ModelTypeAdapter): """Type adapter for the `LMStudio` model.""" def _prepare_lmstudio_image(self, image: Image): """Convert Outlines Image to LMStudio image handle. LMStudio's SDK only accepts file paths, raw bytes, or binary IO objects. Unlike Ollama which accepts base64 directly, we must decode from base64. """ import base64 import lmstudio as lms image_bytes = base64.b64decode(image.image_str) return lms.prepare_image(image_bytes) @singledispatchmethod def format_input(self, model_input): """Format input for LMStudio model. Parameters ---------- model_input The input provided by the user. Returns ------- str | LMStudioChat The formatted input to be passed to the model. """ raise TypeError( f"The input type {type(model_input)} is not available with " "LMStudio. The only available types are `str`, `list` and `Chat`." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> str: """Pass through string input directly to LMStudio.""" return model_input @format_input.register(list) def format_list_model_input(self, model_input: list) -> "LMStudioChat": """Handle list input containing prompt and images.""" from lmstudio import Chat as LMSChat prompt = model_input[0] images = model_input[1:] if not all(isinstance(img, Image) for img in images): raise ValueError("All assets provided must be of type Image") chat = LMSChat() image_handles = [self._prepare_lmstudio_image(img) for img in images] chat.add_user_message(prompt, images=image_handles) return chat @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat": """Convert Outlines Chat to LMStudio Chat with image support.""" from lmstudio import Chat as LMSChat system_prompt = None messages = model_input.messages if messages and messages[0]["role"] == "system": system_prompt = messages[0]["content"] messages = messages[1:] chat = LMSChat(system_prompt) if system_prompt else LMSChat() for message in messages: role = message["role"] content = message["content"] if role == "user": if isinstance(content, str): chat.add_user_message(content) elif isinstance(content, list): prompt = content[0] images = content[1:] if not all(isinstance(img, Image) for img in images): raise ValueError("All assets provided must be of type Image") image_handles = [self._prepare_lmstudio_image(img) for img in images] chat.add_user_message(prompt, images=image_handles) else: raise ValueError( f"Invalid content type: {type(content)}. " "The content must be a string or a list containing a string " "and a list of images." ) elif role == "assistant": chat.add_assistant_response(content) else: raise ValueError(f"Unsupported role: {role}") return chat def format_output_type( self, output_type: Optional[Any] = None ) -> Optional[dict]: """Format the output type to pass to the model. Parameters ---------- output_type The output type provided by the user. Returns ------- Optional[dict] The formatted output type (JSON schema) to be passed to the model. """ if output_type is None: return None elif isinstance(output_type, Regex): raise TypeError( "Regex-based structured outputs are not supported by LMStudio. " "Use an open source model in the meantime." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs are not supported by LMStudio. " "Use an open source model in the meantime." ) elif JsonSchema.is_json_schema(output_type): return cast(dict, JsonSchema.convert_to(output_type, ["dict"])) else: type_name = getattr(output_type, "__name__", output_type) raise TypeError( f"The type `{type_name}` is not supported by LMStudio. " "Consider using a local model instead." ) class LMStudio(Model): """Thin wrapper around a `lmstudio.Client` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio client. """ def __init__(self, client: "Client", model_name: Optional[str] = None): """ Parameters ---------- client A LMStudio Client instance obtained via `lmstudio.Client()` or `lmstudio.get_default_client()`. model_name The name of the model to use. If not provided, uses the default loaded model in LMStudio. """ self.client = client self.model_name = model_name self.type_adapter = LMStudioTypeAdapter() def generate( self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> str: """Generate text using LMStudio. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the model. Returns ------- str The text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name model_key = kwargs.pop("model", None) model = self.client.llm.model(model_key) if model_key else self.client.llm.model() formatted_input = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if response_format is not None: kwargs["response_format"] = response_format result = model.respond(formatted_input, **kwargs) return result.content def generate_batch( self, model_input, output_type=None, **kwargs, ): raise NotImplementedError( "The `lmstudio` library does not support batch inference." ) def generate_stream( self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> Iterator[str]: """Stream text using LMStudio. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the model. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name model_key = kwargs.pop("model", None) model = self.client.llm.model(model_key) if model_key else self.client.llm.model() formatted_input = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if response_format is not None: kwargs["response_format"] = response_format stream = model.respond_stream(formatted_input, **kwargs) for fragment in stream: yield fragment.content class AsyncLMStudio(AsyncModel): """Thin wrapper around a `lmstudio.AsyncClient` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the LMStudio async client. """ def __init__( self, client: "AsyncClient", model_name: Optional[str] = None ): """ Parameters ---------- client A LMStudio AsyncClient instance. model_name The name of the model to use. If not provided, uses the default loaded model in LMStudio. """ self.client = client self.model_name = model_name self.type_adapter = LMStudioTypeAdapter() self._context_entered = False async def close(self) -> None: """Close the async client and release resources.""" if self._context_entered: await self.client.__aexit__(None, None, None) self._context_entered = False async def generate( self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> str: """Generate text using LMStudio asynchronously. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the model. Returns ------- str The text generated by the model. """ if not self._context_entered: await self.client.__aenter__() self._context_entered = True if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name model_key = kwargs.pop("model", None) model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model() formatted_input = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if response_format is not None: kwargs["response_format"] = response_format result = await model.respond(formatted_input, **kwargs) return result.content async def generate_batch( self, model_input, output_type=None, **kwargs, ): raise NotImplementedError( "The `lmstudio` library does not support batch inference." ) async def generate_stream( # type: ignore self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> AsyncIterator[str]: """Stream text using LMStudio asynchronously. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the model. Returns ------- AsyncIterator[str] An async iterator that yields the text generated by the model. """ if not self._context_entered: await self.client.__aenter__() self._context_entered = True if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name model_key = kwargs.pop("model", None) model = await self.client.llm.model(model_key) if model_key else await self.client.llm.model() formatted_input = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if response_format is not None: kwargs["response_format"] = response_format stream = await model.respond_stream(formatted_input, **kwargs) async for fragment in stream: yield fragment.content def from_lmstudio( client: Union["Client", "AsyncClient"], model_name: Optional[str] = None, ) -> Union[LMStudio, AsyncLMStudio]: """Create an Outlines `LMStudio` model instance from a `lmstudio.Client` or `lmstudio.AsyncClient` instance. Parameters ---------- client A `lmstudio.Client` or `lmstudio.AsyncClient` instance. model_name The name of the model to use. Returns ------- Union[LMStudio, AsyncLMStudio] An Outlines `LMStudio` or `AsyncLMStudio` model instance. """ from lmstudio import AsyncClient, Client if isinstance(client, Client): return LMStudio(client, model_name) elif isinstance(client, AsyncClient): return AsyncLMStudio(client, model_name) else: raise ValueError( "Invalid client type, the client must be an instance of " "`lmstudio.Client` or `lmstudio.AsyncClient`." ) ================================================ FILE: outlines/models/mistral.py ================================================ """Integration with Mistral AI API.""" import json from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, Iterator, List, Dict, Optional, Union, ) from pydantic import TypeAdapter from outlines.inputs import Chat, Image from outlines.models.base import AsyncModel, Model, ModelTypeAdapter from outlines.models.utils import set_additional_properties_false_json_schema from outlines.types import JsonSchema, Regex, CFG from outlines.types.utils import ( is_dataclass, is_genson_schema_builder, is_native_dict, is_pydantic_model, is_typed_dict, ) if TYPE_CHECKING: from mistralai import Mistral as MistralClient __all__ = ["AsyncMistral", "Mistral", "from_mistral"] class MistralTypeAdapter(ModelTypeAdapter): """Type adapter for the `Mistral` model. Prepares arguments for Mistral's client `chat.complete`, `chat.complete_async`, or `chat.stream` methods. Handles input (prompt or chat messages) and output type (JSON schema types). """ @singledispatchmethod def format_input(self, model_input): """Generate the `messages` argument to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- list The `messages` argument to pass to the client. """ raise TypeError( f"The input type {type(model_input)} is not available with " "Mistral. The only available types are `str`, `list` and `Chat`." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> list: """Format a string input into a list of messages. Parameters ---------- model_input : str The input string prompt. Returns ------- list A list of Mistral message objects. """ from mistralai import UserMessage return [UserMessage(content=model_input)] @format_input.register(list) def format_list_model_input(self, model_input: list) -> list: """Format a list input into a list of messages. Parameters ---------- model_input : list The input list, containing a string prompt and optionally Image objects (vision models only). Returns ------- list A list of Mistral message objects. """ from mistralai import UserMessage return [UserMessage(content=self._create_message_content(model_input))] @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> list: """Format a Chat input into a list of messages. Parameters ---------- model_input : Chat The Chat object containing a list of message dictionaries. Returns ------- list A list of Mistral message objects. """ from mistralai import UserMessage, AssistantMessage, SystemMessage messages = [] for message in model_input.messages: role = message["role"] content = message["content"] if role == "user": messages.append( UserMessage(content=self._create_message_content(content)) ) elif role == "assistant": messages.append(AssistantMessage(content=content)) elif role == "system": messages.append(SystemMessage(content=content)) else: raise ValueError(f"Unsupported role: {role}") return messages def _create_message_content( self, content: Union[str, list] ) -> Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]: """Create message content from an input. Parameters ---------- content : Union[str, list] The content to format, either a string or a list containing a string and optionally Image objects. Returns ------- Union[str, List[Dict[str, Union[str, Dict[str, str]]]]] The formatted content, either a string or a list of content parts (text and image URLs). """ if isinstance(content, str): return content elif isinstance(content, list): if not content: raise ValueError("Content list cannot be empty.") if not isinstance(content[0], str): raise ValueError( "The first item in the list should be a string." ) if len(content) == 1: return content[0] content_parts: List[Dict[str, Union[str, Dict[str, str]]]] = [ {"type": "text", "text": content[0]} ] for item in content[1:]: if isinstance(item, Image): data_url = f"data:{item.image_format};base64,{item.image_str}" content_parts.append({ "type": "image_url", "image_url": {"url": data_url} }) else: raise ValueError( f"Invalid item type in content list: {type(item)}. " + "Expected Image objects after the first string." ) return content_parts else: raise TypeError( f"Invalid content type: {type(content)}. " + "Content must be a string or a list starting with a string " + "followed by optional Image objects." ) def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the `response_format` argument to pass to the client. Parameters ---------- output_type : Optional[Any] The desired output type provided by the user. Returns ------- dict The `response_format` dict to pass to the client. """ if output_type is None: return {} # JSON schema types elif is_pydantic_model(output_type): schema = output_type.model_json_schema() return self.format_json_schema_type(schema, output_type.__name__) elif is_dataclass(output_type): schema = TypeAdapter(output_type).json_schema() return self.format_json_schema_type(schema, output_type.__name__) elif is_typed_dict(output_type): schema = TypeAdapter(output_type).json_schema() return self.format_json_schema_type(schema, output_type.__name__) elif is_genson_schema_builder(output_type): schema = json.loads(output_type.to_json()) return self.format_json_schema_type(schema) elif isinstance(output_type, JsonSchema): return self.format_json_schema_type(json.loads(output_type.schema)) # Json mode elif is_native_dict(output_type): return {"type": "json_object"} # Unsupported types elif isinstance(output_type, Regex): raise TypeError( "Regex-based structured outputs are not available with " "Mistral." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs are not available with Mistral." ) else: type_name = getattr(output_type, "__name__", str(output_type)) raise TypeError( f"The type {type_name} is not available with Mistral." ) def format_json_schema_type( self, schema: dict, schema_name: str = "default" ) -> dict: """Create the `response_format` argument to pass to the client from a JSON schema dictionary. Parameters ---------- schema : dict The JSON schema to format. schema_name : str The name of the schema. Returns ------- dict The value of the `response_format` argument to pass to the client. """ schema = set_additional_properties_false_json_schema(schema) return { "type": "json_schema", "json_schema": { "schema": schema, "name": schema_name.lower(), "strict": True } } class Mistral(Model): """Thin wrapper around the `mistralai.Mistral` client. Converts input and output types to arguments for the `mistralai.Mistral` client's `chat.complete` or `chat.stream` methods. """ def __init__( self, client: "MistralClient", model_name: Optional[str] = None ): """ Parameters ---------- client : MistralClient A mistralai.Mistral client instance. model_name : Optional[str] The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = MistralTypeAdapter() def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate a response from the model. Parameters ---------- model_input : Union[Chat, list, str] The prompt or chat messages to generate a response from. output_type : Optional[Any] The desired format of the response (e.g., JSON schema). **inference_kwargs : Any Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The response generated by the model as text. """ messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: result = self.client.chat.complete( messages=messages, response_format=response_format, **inference_kwargs, ) except Exception as e: if "schema" in str(e).lower() or "json_schema" in str(e).lower(): raise TypeError( f"Mistral does not support your schema: {e}. " "Try a local model or dottxt instead." ) else: raise RuntimeError(f"Mistral API error: {e}") from e outputs = [choice.message for choice in result.choices] if len(outputs) == 1: return outputs[0].content else: return [m.content for m in outputs] def generate_batch( self, model_input, output_type=None, **inference_kwargs, ): raise NotImplementedError( "The `mistralai` library does not support batch inference." ) def generate_stream( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs, ) -> Iterator[str]: """Generate a stream of responses from the model. Parameters ---------- model_input : Union[Chat, list, str] The prompt or chat messages to generate a response from. output_type : Optional[Any] The desired format of the response (e.g., JSON schema). **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text chunks generated by the model. """ messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: stream = self.client.chat.stream( messages=messages, response_format=response_format, **inference_kwargs ) except Exception as e: if "schema" in str(e).lower() or "json_schema" in str(e).lower(): raise TypeError( f"Mistral does not support your schema: {e}. " "Try a local model or dottxt instead." ) else: raise RuntimeError(f"Mistral API error: {e}") from e for chunk in stream: if ( hasattr(chunk, "data") and chunk.data.choices and chunk.data.choices[0].delta.content is not None ): yield chunk.data.choices[0].delta.content class AsyncMistral(AsyncModel): """Async thin wrapper around the `mistralai.Mistral` client. Converts input and output types to arguments for the `mistralai.Mistral` client's async methods (`chat.complete_async` or `chat.stream_async`). """ def __init__( self, client: "MistralClient", model_name: Optional[str] = None ): """ Parameters ---------- client : MistralClient A mistralai.Mistral client instance. model_name : Optional[str] The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = MistralTypeAdapter() async def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate a response from the model asynchronously. Parameters ---------- model_input : Union[Chat, list, str] The prompt or chat messages to generate a response from. output_type : Optional[Any] The desired format of the response (e.g., JSON schema). **inference_kwargs : Any Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The response generated by the model as text. """ messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: result = await self.client.chat.complete_async( messages=messages, response_format=response_format, stream=False, **inference_kwargs, ) except Exception as e: if "schema" in str(e).lower() or "json_schema" in str(e).lower(): raise TypeError( f"Mistral does not support your schema: {e}. " "Try a local model or dottxt instead." ) else: raise RuntimeError(f"Mistral API error: {e}") from e outputs = [choice.message for choice in result.choices] if len(outputs) == 1: return outputs[0].content else: return [m.content for m in outputs] async def generate_batch( self, model_input, output_type=None, **inference_kwargs, ): raise NotImplementedError( "The mistralai library does not support batch inference." ) async def generate_stream( self, model_input, output_type=None, **inference_kwargs, ): """Generate text from the model as an async stream of chunks. Parameters ---------- model_input str, list, or chat input to generate from. output_type Optional type for structured output. **inference_kwargs Extra kwargs like "model" name. Yields ------ str Chunks of text as they are streamed. """ messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: response = await self.client.chat.stream_async( messages=messages, response_format=response_format, **inference_kwargs ) except Exception as e: if "schema" in str(e).lower() or "json_schema" in str(e).lower(): raise TypeError( f"Mistral does not support your schema: {e}. " "Try a local model or dottxt instead." ) else: raise RuntimeError(f"Mistral API error: {e}") from e async for chunk in response: if ( hasattr(chunk, "data") and chunk.data.choices and len(chunk.data.choices) > 0 and hasattr(chunk.data.choices[0], "delta") and chunk.data.choices[0].delta.content is not None ): yield chunk.data.choices[0].delta.content def from_mistral( client: "MistralClient", model_name: Optional[str] = None, async_client: bool = False, ) -> Union[Mistral, AsyncMistral]: """Create an Outlines Mistral model instance from a mistralai.Mistral client. Parameters ---------- client : MistralClient A mistralai.Mistral client instance. model_name : Optional[str] The name of the model to use. async_client : bool If True, return an AsyncMistral instance; otherwise, return a Mistral instance. Returns ------- Union[Mistral, AsyncMistral] An Outlines Mistral or AsyncMistral model instance. """ from mistralai import Mistral as MistralClient if not isinstance(client, MistralClient): raise ValueError( "Invalid client type. The client must be an instance of " "`mistralai.Mistral`." ) if async_client: return AsyncMistral(client, model_name) else: return Mistral(client, model_name) ================================================ FILE: outlines/models/mlxlm.py ================================================ """Integration with the `mlx_lm` library.""" from functools import singledispatchmethod from typing import TYPE_CHECKING, Iterator, List, Optional from outlines.inputs import Chat from outlines.models.base import Model, ModelTypeAdapter from outlines.models.tokenizer import _check_hf_chat_template from outlines.models.transformers import TransformerTokenizer from outlines.processors import OutlinesLogitsProcessor if TYPE_CHECKING: import mlx.nn as nn from transformers import PreTrainedTokenizer __all__ = ["MLXLM", "from_mlxlm"] class MLXLMTypeAdapter(ModelTypeAdapter): """Type adapter for the `MLXLM` model.""" def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False): self.tokenizer = tokenizer self.has_chat_template = has_chat_template @singledispatchmethod def format_input(self, model_input): """Generate the prompt argument to pass to the model. Parameters ---------- model_input The input provided by the user. Returns ------- str The formatted input to be passed to the model. """ raise NotImplementedError( f"The input type {type(model_input)} is not available with " "mlx-lm. The available types are `str` and `Chat`." ) @format_input.register(str) def format_str_input(self, model_input: str) -> str: if self.has_chat_template: return self.format_chat_input(Chat([{"role": "user", "content": model_input}])) return model_input @format_input.register(Chat) def format_chat_input(self, model_input: Chat) -> str: if not all( isinstance(message["content"], str) for message in model_input.messages ): raise ValueError( "mlx-lm does not support multi-modal messages." + "The content of each message must be a string." ) return self.tokenizer.apply_chat_template( model_input.messages, tokenize=False, add_generation_prompt=True, ) def format_output_type( self, output_type: Optional[OutlinesLogitsProcessor] = None, ) -> Optional[List[OutlinesLogitsProcessor]]: """Generate the logits processor argument to pass to the model. Parameters ---------- output_type The logits processor provided. Returns ------- Optional[list[OutlinesLogitsProcessor]] The logits processor argument to be passed to the model. """ if not output_type: return None return [output_type] class MLXLM(Model): """Thin wrapper around an `mlx_lm` model. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `mlx_lm` library. """ tensor_library_name = "mlx" def __init__( self, model: "nn.Module", tokenizer: "PreTrainedTokenizer", ): """ Parameters ---------- model An instance of an `mlx_lm` model. tokenizer An instance of an `mlx_lm` tokenizer or of a compatible `transformers` tokenizer. """ self.model = model # self.mlx_tokenizer is used by the mlx-lm in its generate function self.mlx_tokenizer = tokenizer # self.tokenizer is used by the logits processor self.tokenizer = TransformerTokenizer(tokenizer._tokenizer) self.type_adapter = MLXLMTypeAdapter( tokenizer=tokenizer, has_chat_template=_check_hf_chat_template(tokenizer) ) def generate( self, model_input: str, output_type: Optional[OutlinesLogitsProcessor] = None, **kwargs, ) -> str: """Generate text using `mlx-lm`. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. kwargs Additional keyword arguments to pass to the `mlx-lm` library. Returns ------- str The text generated by the model. """ from mlx_lm import generate return generate( self.model, self.mlx_tokenizer, self.type_adapter.format_input(model_input), logits_processors=self.type_adapter.format_output_type(output_type), **kwargs, ) def generate_batch( self, model_input: list[str], output_type: Optional[OutlinesLogitsProcessor] = None, **kwargs, ) -> list[str]: """Generate a batch of text using `mlx-lm`. Parameters ---------- model_input The list of prompts based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. kwargs Additional keyword arguments to pass to the `mlx-lm` library. Returns ------- list[str] The list of text generated by the model. """ from mlx_lm import batch_generate if output_type: raise NotImplementedError( "mlx-lm does not support constrained generation with batching." + "You cannot provide an `output_type` with this method." ) model_input = [self.type_adapter.format_input(item) for item in model_input] # Contrarily to the other generate methods, batch_generate requires # tokenized prompts add_special_tokens = [ ( self.mlx_tokenizer.bos_token is None or not prompt.startswith(self.mlx_tokenizer.bos_token) ) for prompt in model_input ] tokenized_model_input = [ self.mlx_tokenizer.encode( model_input[i], add_special_tokens=add_special_tokens[i] ) for i in range(len(model_input)) ] response = batch_generate( self.model, self.mlx_tokenizer, tokenized_model_input, **kwargs, ) return response.texts def generate_stream( self, model_input: str, output_type: Optional[OutlinesLogitsProcessor] = None, **kwargs, ) -> Iterator[str]: """Stream text using `mlx-lm`. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. kwargs Additional keyword arguments to pass to the `mlx-lm` library. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ from mlx_lm import stream_generate for gen_response in stream_generate( self.model, self.mlx_tokenizer, self.type_adapter.format_input(model_input), logits_processors=self.type_adapter.format_output_type(output_type), **kwargs, ): yield gen_response.text def from_mlxlm(model: "nn.Module", tokenizer: "PreTrainedTokenizer") -> MLXLM: """Create an Outlines `MLXLM` model instance from an `mlx_lm` model and a tokenizer. Parameters ---------- model An instance of an `mlx_lm` model. tokenizer An instance of an `mlx_lm` tokenizer or of a compatible transformers tokenizer. Returns ------- MLXLM An Outlines `MLXLM` model instance. """ return MLXLM(model, tokenizer) ================================================ FILE: outlines/models/ollama.py ================================================ """Integration with the `ollama` library.""" from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union, cast, ) from outlines.inputs import Chat, Image from outlines.models.base import AsyncModel, Model, ModelTypeAdapter from outlines.types import CFG, JsonSchema, Regex if TYPE_CHECKING: from ollama import Client from ollama import AsyncClient __all__ = ["AsyncOllama", "Ollama", "from_ollama"] class OllamaTypeAdapter(ModelTypeAdapter): """Type adapter for the `Ollama` model.""" @singledispatchmethod def format_input(self, model_input): """Generate the value of the `messages` argument to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- list The formatted value of the `messages` argument to be passed to the client. """ raise TypeError( f"The input type {type(model_input)} is not available with " "Ollama. The only available types are `str`, `list` and `Chat`." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> list: """Generate the value of the `messages` argument to pass to the client when the user only passes a prompt. """ return [ self._create_message("user", model_input) ] @format_input.register(list) def format_list_model_input(self, model_input: list) -> list: """Generate the value of the `messages` argument to pass to the client when the user passes a prompt and images. """ return [ self._create_message("user", model_input) ] @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> list: """Generate the value of the `messages` argument to pass to the client when the user passes a Chat instance. """ return [ self._create_message(message["role"], message["content"]) for message in model_input.messages ] def _create_message(self, role: str, content: str | list) -> dict: """Create a message.""" if isinstance(content, str): return { "role": role, "content": content, } elif isinstance(content, list): prompt = content[0] images = content[1:] if not all(isinstance(image, Image) for image in images): raise ValueError("All assets provided must be of type Image") return { "role": role, "content": prompt, "images": [image.image_str for image in images], } else: raise ValueError( f"Invalid content type: {type(content)}. " "The content must be a string or a list containing a string " "and a list of images." ) def format_output_type( self, output_type: Optional[Any] = None ) -> Optional[dict]: """Format the output type to pass to the client. Parameters ---------- output_type The output type provided by the user. Returns ------- Optional[str] The formatted output type to be passed to the model. """ if output_type is None: return None elif isinstance(output_type, Regex): raise TypeError( "Regex-based structured outputs are not supported by Ollama. " "Use an open source model in the meantime." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs are not supported by Ollama. " "Use an open source model in the meantime." ) elif JsonSchema.is_json_schema(output_type): return cast(dict, JsonSchema.convert_to(output_type, ["dict"])) else: type_name = getattr(output_type, "__name__", output_type) raise TypeError( f"The type `{type_name}` is not supported by Ollama. " "Consider using a local model instead." ) class Ollama(Model): """Thin wrapper around the `ollama.Client` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `ollama.Client` client. """ def __init__(self, client: "Client", model_name: Optional[str] = None): """ Parameters ---------- client The `ollama.Client` client. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = OllamaTypeAdapter() def generate(self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> str: """Generate text using Ollama. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the client. Returns ------- str The text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name print(self.type_adapter.format_input(model_input)) response = self.client.chat( messages=self.type_adapter.format_input(model_input), format=self.type_adapter.format_output_type(output_type), **kwargs, ) return response.message.content def generate_batch( self, model_input, output_type = None, **kwargs, ): raise NotImplementedError( "The `ollama` library does not support batch inference." ) def generate_stream( self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> Iterator[str]: """Stream text using Ollama. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name response = self.client.chat( messages=self.type_adapter.format_input(model_input), format=self.type_adapter.format_output_type(output_type), stream=True, **kwargs, ) for chunk in response: yield chunk.message.content class AsyncOllama(AsyncModel): """Thin wrapper around the `ollama.AsyncClient` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `ollama.AsyncClient` client. """ def __init__( self,client: "AsyncClient", model_name: Optional[str] = None, ): """ Parameters ---------- client The `ollama.Client` client. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = OllamaTypeAdapter() async def generate(self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> str: """Generate text using Ollama. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the client. Returns ------- str The text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name response = await self.client.chat( messages=self.type_adapter.format_input(model_input), format=self.type_adapter.format_output_type(output_type), **kwargs, ) return response.message.content async def generate_batch( self, model_input, output_type = None, **kwargs, ): raise NotImplementedError( "The `ollama` library does not support batch inference." ) async def generate_stream( # type: ignore self, model_input: Chat | str | list, output_type: Optional[Any] = None, **kwargs: Any, ) -> AsyncIterator[str]: """Stream text using Ollama. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema. **kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ if "model" not in kwargs and self.model_name is not None: kwargs["model"] = self.model_name stream = await self.client.chat( messages=self.type_adapter.format_input(model_input), format=self.type_adapter.format_output_type(output_type), stream=True, **kwargs, ) async for chunk in stream: yield chunk.message.content def from_ollama( client: Union["Client", "AsyncClient"], model_name: Optional[str] = None ) -> Union[Ollama, AsyncOllama]: """Create an Outlines `Ollama` model instance from an `ollama.Client` or `ollama.AsyncClient` instance. Parameters ---------- client A `ollama.Client` or `ollama.AsyncClient` instance. model_name The name of the model to use. Returns ------- Union[Ollama, AsyncOllama] An Outlines `Ollama` or `AsyncOllama` model instance. """ from ollama import AsyncClient, Client if isinstance(client, Client): return Ollama(client, model_name) elif isinstance(client, AsyncClient): return AsyncOllama(client, model_name) else: raise ValueError( "Invalid client type, the client must be an instance of " "`ollama.Client` or `ollama.AsyncClient`." ) ================================================ FILE: outlines/models/openai.py ================================================ """Integration with OpenAI's API.""" from typing import ( TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union, cast, ) from functools import singledispatchmethod from pydantic import BaseModel from outlines.inputs import Chat, Image from outlines.models.base import AsyncModel, Model, ModelTypeAdapter from outlines.models.utils import set_additional_properties_false_json_schema from outlines.types import JsonSchema, Regex, CFG from outlines.types.utils import is_native_dict if TYPE_CHECKING: from openai import ( OpenAI as OpenAIClient, AsyncOpenAI as AsyncOpenAIClient, AzureOpenAI as AzureOpenAIClient, AsyncAzureOpenAI as AsyncAzureOpenAIClient, ) __all__ = ["AsyncOpenAI", "OpenAI", "from_openai"] class OpenAITypeAdapter(ModelTypeAdapter): """Type adapter for the `OpenAI` model. `OpenAITypeAdapter` is responsible for preparing the arguments to OpenAI's `completions.create` methods: the input (prompt and possibly image), as well as the output type (only JSON). """ @singledispatchmethod def format_input(self, model_input): """Generate the `messages` argument to pass to the client. Parameters ---------- model_input The input provided by the user. Returns ------- dict The formatted input to be passed to the client. """ raise TypeError( f"The input type {type(model_input)} is not available with " "OpenAI. The only available types are `str`, `list` and `Chat`." ) @format_input.register(str) def format_str_model_input(self, model_input: str) -> list: """Generate the value of the `messages` argument to pass to the client when the user only passes a prompt. """ return [ self._create_message("user", model_input) ] @format_input.register(list) def format_list_model_input(self, model_input: list) -> list: """Generate the value of the `messages` argument to pass to the client when the user passes a prompt and images. """ return [ self._create_message("user", model_input) ] @format_input.register(Chat) def format_chat_model_input(self, model_input: Chat) -> list: """Generate the value of the `messages` argument to pass to the client when the user passes a Chat instance. """ return [ self._create_message(message["role"], message["content"]) for message in model_input.messages ] def _create_message(self, role: str, content: str | list) -> dict: """Create a message.""" if isinstance(content, str): return { "role": role, "content": content, } elif isinstance(content, list): prompt = content[0] images = content[1:] if not all(isinstance(image, Image) for image in images): raise ValueError("All assets provided must be of type Image") image_parts = [ self._create_img_content(image) for image in images ] return { "role": role, "content": [ {"type": "text", "text": prompt}, *image_parts, ], } else: raise ValueError( f"Invalid content type: {type(content)}. " "The content must be a string or a list containing a string " "and a list of images." ) def _create_img_content(self, image: Image) -> dict: """Create the content for an image input.""" return { "type": "image_url", "image_url": { "url": f"data:{image.image_format};base64,{image.image_str}" # noqa: E702 }, } def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the `response_format` argument to the client based on the output type specified by the user. Parameters ---------- output_type The output type provided by the user. Returns ------- dict The formatted output type to be passed to the client. """ # Unsupported languages if isinstance(output_type, Regex): raise TypeError( "Neither regex-based structured outputs nor the `pattern` keyword " "in Json Schema are available with OpenAI. Use an open source " "model or dottxt instead." ) elif isinstance(output_type, CFG): raise TypeError( "CFG-based structured outputs are not available with OpenAI. " "Use an open source model or dottxt instead." ) if output_type is None: return {} elif is_native_dict(output_type): return self.format_json_mode_type() elif JsonSchema.is_json_schema(output_type): return self.format_json_output_type( cast(dict, JsonSchema.convert_to(output_type, ["dict"])) ) else: type_name = getattr(output_type, "__name__", output_type) raise TypeError( f"The type `{type_name}` is not available with OpenAI. " "Use an open source model or dottxt instead." ) def format_json_output_type(self, schema: dict) -> dict: """Generate the `response_format` argument to the client when the user specified a `Json` output type. """ # OpenAI requires `additionalProperties` to be set to False schema = set_additional_properties_false_json_schema(schema) return { "response_format": { "type": "json_schema", "json_schema": { "name": "default", "strict": True, "schema": schema, }, } } def format_json_mode_type(self) -> dict: """Generate the `response_format` argument to the client when the user specified the output type should be a JSON but without specifying the schema (also called "JSON mode"). """ return {"response_format": {"type": "json_object"}} class OpenAI(Model): """Thin wrapper around the `openai.OpenAI` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.OpenAI` client. """ def __init__( self, client: Union["OpenAIClient", "AzureOpenAIClient"], model_name: Optional[str] = None, ): """ Parameters ---------- client The `openai.OpenAI` client. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = OpenAITypeAdapter() def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Union[type[BaseModel], str]] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using OpenAI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ import openai messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: result = self.client.chat.completions.create( messages=messages, **response_format, **inference_kwargs, ) except openai.BadRequestError as e: if e.body["message"].startswith("Invalid schema"): raise TypeError( f"OpenAI does not support your schema: {e.body['message']}. " "Try a local model or dottxt instead." ) else: raise e messages = [choice.message for choice in result.choices] for message in messages: if message.refusal is not None: raise ValueError( f"OpenAI refused to answer the request: {message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "The `openai` library does not support batch inference." ) def generate_stream( self, model_input: Union[Chat, list, str], output_type: Optional[Union[type[BaseModel], str]] = None, **inference_kwargs, ) -> Iterator[str]: """Stream text using OpenAI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ import openai messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: stream = self.client.chat.completions.create( stream=True, messages=messages, **response_format, **inference_kwargs ) except openai.BadRequestError as e: if e.body["message"].startswith("Invalid schema"): raise TypeError( f"OpenAI does not support your schema: {e.body['message']}. " "Try a local model or dottxt instead." ) else: raise e for chunk in stream: if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content class AsyncOpenAI(AsyncModel): """Thin wrapper around the `openai.AsyncOpenAI` client. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.AsyncOpenAI` client. """ def __init__( self, client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"], model_name: Optional[str] = None, ): """ Parameters ---------- client The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = OpenAITypeAdapter() async def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Union[type[BaseModel], str]] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using OpenAI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ import openai messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: result = await self.client.chat.completions.create( messages=messages, **response_format, **inference_kwargs, ) except openai.BadRequestError as e: if e.body["message"].startswith("Invalid schema"): raise TypeError( f"OpenAI does not support your schema: {e.body['message']}. " "Try a local model or dottxt instead." ) else: raise e messages = [choice.message for choice in result.choices] for message in messages: if message.refusal is not None: raise ValueError( f"OpenAI refused to answer the request: {message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] async def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "The `openai` library does not support batch inference." ) async def generate_stream( # type: ignore self, model_input: Union[Chat, list, str], output_type: Optional[Union[type[BaseModel], str]] = None, **inference_kwargs, ) -> AsyncIterator[str]: """Stream text using OpenAI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. The output type must be of a type that can be converted to a JSON schema or an empty dictionary. **inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ import openai messages = self.type_adapter.format_input(model_input) response_format = self.type_adapter.format_output_type(output_type) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name try: stream = await self.client.chat.completions.create( stream=True, messages=messages, **response_format, **inference_kwargs ) except openai.BadRequestError as e: if e.body["message"].startswith("Invalid schema"): raise TypeError( f"OpenAI does not support your schema: {e.body['message']}. " "Try a local model or dottxt instead." ) else: raise e async for chunk in stream: if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content def from_openai( client: Union[ "OpenAIClient", "AsyncOpenAIClient", "AzureOpenAIClient", "AsyncAzureOpenAIClient", ], model_name: Optional[str] = None, ) -> Union[OpenAI, AsyncOpenAI]: """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an `openai.OpenAI` or `openai.AsyncOpenAI` client. Parameters ---------- client An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or `openai.AsyncAzureOpenAI` client instance. model_name The name of the model to use. Returns ------- OpenAI An Outlines `OpenAI` or `AsyncOpenAI` model instance. """ import openai if isinstance(client, openai.OpenAI): return OpenAI(client, model_name) elif isinstance(client, openai.AsyncOpenAI): return AsyncOpenAI(client, model_name) else: raise ValueError( "Invalid client type. The client must be an instance of " "+ `openai.OpenAI` or `openai.AsyncOpenAI`." ) ================================================ FILE: outlines/models/sglang.py ================================================ """Integration with an SGLang server.""" import json import warnings from typing import ( TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union ) from outlines.inputs import Chat from outlines.models.base import AsyncModel, Model, ModelTypeAdapter from outlines.models.openai import OpenAITypeAdapter from outlines.types.dsl import ( CFG, JsonSchema, python_types_to_terms, to_regex, ) if TYPE_CHECKING: from openai import AsyncOpenAI, OpenAI __all__ = ["AsyncSGLang", "SGLang", "from_sglang"] class SGLangTypeAdapter(ModelTypeAdapter): """Type adapter for the `SGLang` and `AsyncSGLang` models.""" def format_input(self, model_input: Union[Chat, list, str]) -> list: """Generate the value of the messages argument to pass to the client. We rely on the OpenAITypeAdapter to format the input as the sglang server expects input in the same format as OpenAI. Parameters ---------- model_input The input passed by the user. Returns ------- list The formatted input to be passed to the client. """ return OpenAITypeAdapter().format_input(model_input) def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the structured output argument to pass to the client. Parameters ---------- output_type The structured output type provided. Returns ------- dict The formatted output type to be passed to the client. """ if output_type is None: return {} term = python_types_to_terms(output_type) if isinstance(term, CFG): warnings.warn( "SGLang grammar-based structured outputs expects an EBNF " "grammar instead of a Lark grammar as is generally used in " "Outlines. The grammar cannot be used as a structured output " "type with an outlines backend, it is only compatible with " "the sglang and llguidance backends." ) return {"extra_body": {"ebnf": term.definition}} elif isinstance(term, JsonSchema): return OpenAITypeAdapter().format_json_output_type( json.loads(term.schema) ) else: return {"extra_body": {"regex": to_regex(term)}} class SGLang(Model): """Thin wrapper around the `openai.OpenAI` client used to communicate with an SGLang server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.OpenAI` client for the SGLang server. """ def __init__(self, client, model_name: Optional[str] = None): """ Parameters ---------- client An `openai.OpenAI` client instance. model_name The name of the model to use. """ self.client = client self.model_name = model_name self.type_adapter = SGLangTypeAdapter() def generate( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using SGLang. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) response = self.client.chat.completions.create(**client_args) messages = [choice.message for choice in response.choices] for message in messages: if message.refusal is not None: # pragma: no cover raise ValueError( f"The SGLang server refused to answer the request: " f"{message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "SGLang does not support batch inference." ) def generate_stream( self, model_input: Union[Chat, list, str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Iterator[str]: """Stream text using SGLang. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = self.client.chat.completions.create( **client_args, stream=True, ) for chunk in stream: # pragma: no cover if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content def _build_client_args( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the SGLang client.""" messages = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) inference_kwargs.update(output_type_args) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name client_args = { "messages": messages, **inference_kwargs, } return client_args class AsyncSGLang(AsyncModel): """Thin async wrapper around the `openai.OpenAI` client used to communicate with an SGLang server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.OpenAI` client for the SGLang server. """ def __init__(self, client, model_name: Optional[str] = None): """ Parameters ---------- client An `openai.AsyncOpenAI` client instance. model_name The name of the model to use. Parameters ---------- client An `openai.AsyncOpenAI` client instance. """ self.client = client self.model_name = model_name self.type_adapter = SGLangTypeAdapter() async def generate( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using `sglang`. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) response = await self.client.chat.completions.create(**client_args) messages = [choice.message for choice in response.choices] for message in messages: if message.refusal is not None: # pragma: no cover raise ValueError( f"The sglang server refused to answer the request: " f"{message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] async def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError( "SGLang does not support batch inference." ) async def generate_stream( # type: ignore self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> AsyncIterator[str]: """Return a text generator. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- AsyncIterator[str] An async iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = await self.client.chat.completions.create( **client_args, stream=True, ) async for chunk in stream: # pragma: no cover if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content def _build_client_args( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the SGLang client.""" messages = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) inference_kwargs.update(output_type_args) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name client_args = { "messages": messages, **inference_kwargs, } return client_args def from_sglang( client: Union["OpenAI", "AsyncOpenAI"], model_name: Optional[str] = None, ) -> Union[SGLang, AsyncSGLang]: """Create a `SGLang` or `AsyncSGLang` instance from an `openai.OpenAI` or `openai.AsyncOpenAI` instance. Parameters ---------- client An `openai.OpenAI` or `openai.AsyncOpenAI` instance. model_name The name of the model to use. Returns ------- Union[SGLang, AsyncSGLang] An Outlines `SGLang` or `AsyncSGLang` model instance. """ from openai import AsyncOpenAI, OpenAI if isinstance(client, OpenAI): return SGLang(client, model_name) elif isinstance(client, AsyncOpenAI): return AsyncSGLang(client, model_name) else: raise ValueError( f"Unsupported client type: {type(client)}.\n" "Please provide an OpenAI or AsyncOpenAI instance." ) ================================================ FILE: outlines/models/tgi.py ================================================ """Integration with a TGI server.""" import json from functools import singledispatchmethod from typing import ( TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union, ) from outlines.models.base import AsyncModel,Model, ModelTypeAdapter from outlines.types.dsl import python_types_to_terms, to_regex, JsonSchema, CFG if TYPE_CHECKING: from huggingface_hub import AsyncInferenceClient, InferenceClient __all__ = ["AsyncTGI", "TGI", "from_tgi"] class TGITypeAdapter(ModelTypeAdapter): """Type adapter for the `TGI` and `AsyncTGI` models.""" @singledispatchmethod def format_input(self, model_input): """Generate the prompt argument to pass to the client. Argument -------- model_input The input passed by the user. Returns ------- str The formatted input to be passed to the model. """ raise NotImplementedError( f"The input type {input} is not available with TGI. " + "The only available type is `str`." ) @format_input.register(str) def format_str_input(self, model_input: str) -> str: return model_input def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the structured output argument to pass to the client. Argument -------- output_type The structured output type provided. Returns ------- dict The structured output argument to pass to the client. """ if output_type is None: return {} term = python_types_to_terms(output_type) if isinstance(term, CFG): raise NotImplementedError( "TGI does not support CFG-based structured outputs." ) elif isinstance(term, JsonSchema): return { "grammar": { "type": "json", "value": json.loads(term.schema), } } else: return { "grammar": { "type": "regex", "value": to_regex(term), } } class TGI(Model): """Thin wrapper around a `huggingface_hub.InferenceClient` client used to communicate with a `TGI` server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `huggingface_hub.InferenceClient` client. """ def __init__(self, client): """ Parameters ---------- client A huggingface `InferenceClient` client instance. """ self.client = client self.type_adapter = TGITypeAdapter() def generate( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> str: """Generate text using TGI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- str The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) return self.client.text_generation(**client_args) def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError("TGI does not support batch inference.") def generate_stream( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Iterator[str]: """Stream text using TGI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = self.client.text_generation( **client_args, stream=True, ) for chunk in stream: # pragma: no cover yield chunk def _build_client_args( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the TGI client.""" prompt = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) inference_kwargs.update(output_type_args) client_args = { "prompt": prompt, **inference_kwargs, } return client_args class AsyncTGI(AsyncModel): """Thin async wrapper around a `huggingface_hub.AsyncInferenceClient` client used to communicate with a `TGI` server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `huggingface_hub.AsyncInferenceClient` client. """ def __init__(self, client): """ Parameters ---------- client A huggingface `AsyncInferenceClient` client instance. """ self.client = client self.type_adapter = TGITypeAdapter() async def generate( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> str: """Generate text using TGI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- str The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) response = await self.client.text_generation(**client_args) return response async def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError("TGI does not support batch inference.") async def generate_stream( # type: ignore self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> AsyncIterator[str]: """Stream text using TGI. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types except `CFG` are supported provided your server uses a backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- AsyncIterator[str] An async iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = await self.client.text_generation( **client_args, stream=True ) async for chunk in stream: # pragma: no cover yield chunk def _build_client_args( self, model_input: str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the TGI client.""" prompt = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) inference_kwargs.update(output_type_args) client_args = { "prompt": prompt, **inference_kwargs, } return client_args def from_tgi( client: Union["InferenceClient", "AsyncInferenceClient"], ) -> Union[TGI, AsyncTGI]: """Create an Outlines `TGI` or `AsyncTGI` model instance from an `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance. Parameters ---------- client An `huggingface_hub.InferenceClient` or `huggingface_hub.AsyncInferenceClient` instance. Returns ------- Union[TGI, AsyncTGI] An Outlines `TGI` or `AsyncTGI` model instance. """ from huggingface_hub import AsyncInferenceClient, InferenceClient if isinstance(client, InferenceClient): return TGI(client) elif isinstance(client, AsyncInferenceClient): return AsyncTGI(client) else: raise ValueError( f"Unsupported client type: {type(client)}.\n" + "Please provide an HuggingFace InferenceClient " + "or AsyncInferenceClient instance." ) ================================================ FILE: outlines/models/tokenizer.py ================================================ from typing import Dict, Hashable, List, Protocol, Set, Tuple, Union, TYPE_CHECKING if TYPE_CHECKING: import numpy as np from numpy.typing import NDArray from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast class Tokenizer(Hashable, Protocol): eos_token: str eos_token_id: int pad_token_id: int vocabulary: Dict[str, int] special_tokens: Set[str] def encode( self, prompt: Union[str, List[str]] ) -> "Tuple['NDArray[np.int64]', 'NDArray[np.int64]']": """Translate the input prompts into arrays of token ids and attention mask.""" ... def decode(self, token_ids: "NDArray[np.int64]") -> List[str]: """Translate an array of token ids to a string or list of strings.""" ... def convert_token_to_string(self, token: str) -> str: """Convert a token to its equivalent string. This is for instance useful for BPE tokenizers where whitespaces are represented by the special characted `Ġ`. This prevents matching a raw token that includes `Ġ` with a string. """ ... def _check_hf_chat_template(tokenizer: "PreTrainedTokenizer | PreTrainedTokenizerFast") -> bool: """Check if the HuggingFace tokenizer has a chat template.""" try: tokenizer.get_chat_template() return True except ValueError: return False ================================================ FILE: outlines/models/transformers.py ================================================ """Integration with the `transformers` library. """ import warnings from collections import defaultdict from functools import singledispatchmethod from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union from outlines.inputs import Audio, Chat, Image, Video from outlines.models.base import Model, ModelTypeAdapter from outlines.models.tokenizer import Tokenizer, _check_hf_chat_template from outlines.processors import OutlinesLogitsProcessor if TYPE_CHECKING: import torch from transformers import ( PreTrainedTokenizer, PreTrainedModel, ProcessorMixin, LogitsProcessorList, ) __all__ = ["Transformers", "TransformersMultiModal", "from_transformers"] def get_llama_tokenizer_types(): """Get all the Llama tokenizer types/classes that need work-arounds. When they can't be imported, a dummy class is created. """ try: from transformers.models.llama import LlamaTokenizer except ImportError: # pragma: no cover class LlamaTokenizer: # type: ignore pass try: from transformers.models.llama import LlamaTokenizerFast except ImportError: # pragma: no cover class LlamaTokenizerFast: # type: ignore pass try: from transformers.models.code_llama import CodeLlamaTokenizer except ImportError: # pragma: no cover class CodeLlamaTokenizer: # type: ignore pass try: from transformers.models.code_llama import CodeLlamaTokenizerFast except ImportError: # pragma: no cover class CodeLlamaTokenizerFast: # type: ignore pass return ( LlamaTokenizer, LlamaTokenizerFast, CodeLlamaTokenizer, CodeLlamaTokenizerFast, ) class TransformerTokenizer(Tokenizer): """Represents a tokenizer for models in the `transformers` library.""" def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs): self.tokenizer = tokenizer self.eos_token_id = self.tokenizer.eos_token_id self.eos_token = self.tokenizer.eos_token self.get_vocab = self.tokenizer.get_vocab if self.tokenizer.pad_token_id is None: self.tokenizer.pad_token_id = self.tokenizer.eos_token_id self.pad_token_id = self.eos_token_id else: self.pad_token_id = self.tokenizer.pad_token_id self.pad_token = self.tokenizer.pad_token self.special_tokens = set(self.tokenizer.all_special_tokens) self.vocabulary = self.tokenizer.get_vocab() self.is_llama = isinstance(self.tokenizer, get_llama_tokenizer_types()) def encode( self, prompt: Union[str, List[str]], **kwargs ) -> Tuple["torch.LongTensor", "torch.LongTensor"]: kwargs["padding"] = True kwargs["return_tensors"] = "pt" output = self.tokenizer(prompt, **kwargs) return output["input_ids"], output["attention_mask"] def decode(self, token_ids: "torch.LongTensor") -> List[str]: text = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True) return text def convert_token_to_string(self, token: str) -> str: from transformers.file_utils import SPIECE_UNDERLINE string = self.tokenizer.convert_tokens_to_string([token]) if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>": return " " + string return string def __eq__(self, other): if isinstance(other, type(self)): if hasattr(self, "model_name") and hasattr(self, "kwargs"): return ( other.model_name == self.model_name and other.kwargs == self.kwargs ) else: return other.tokenizer == self.tokenizer return NotImplemented def __hash__(self): from datasets.fingerprint import Hasher return hash(Hasher.hash(self.tokenizer)) def __getstate__(self): state = {"tokenizer": self.tokenizer} return state def __setstate__(self, state): self.__init__(state["tokenizer"]) class TransformersTypeAdapter(ModelTypeAdapter): """Type adapter for the `Transformers` model.""" def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template: bool = False): self.tokenizer = tokenizer self.has_chat_template = has_chat_template @singledispatchmethod def format_input(self, model_input): """Generate the prompt argument to pass to the model. Parameters ---------- model_input The input passed by the user. Returns ------- str The formatted input to be passed to the model. """ raise TypeError( f"The input type {type(model_input)} is not available." "The only available types are `str` and `Chat`." ) @format_input.register(str) def format_str_input(self, model_input: str) -> str: if self.has_chat_template: return self.format_chat_input(Chat([{"role": "user", "content": model_input}])) return model_input @format_input.register(Chat) def format_chat_input(self, model_input: Chat) -> str: return self.tokenizer.apply_chat_template( model_input.messages, tokenize=False, add_generation_prompt=True, ) def format_output_type( self, output_type: Optional[OutlinesLogitsProcessor] = None, ) -> Optional["LogitsProcessorList"]: """Generate the logits processor argument to pass to the model. Parameters ---------- output_type The logits processor provided. Returns ------- Optional[LogitsProcessorList] The logits processor to pass to the model. """ from transformers import LogitsProcessorList if output_type is not None: return LogitsProcessorList([output_type]) return None class Transformers(Model): """Thin wrapper around a `transformers` model and a `transformers` tokenizer. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `transformers` model and tokenizer. """ def __init__( self, model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer", *, device_dtype: Optional["torch.dtype"] = None, ): """ Parameters: ---------- model A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models. tokenizer A `PreTrainedTokenizer`, or any tokenizer that is compatible with the `transformers` API for tokenizers. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype. """ # We need to handle the cases in which jax/flax or tensorflow # is not available in the environment. try: from transformers import FlaxPreTrainedModel except ImportError: # pragma: no cover FlaxPreTrainedModel = None try: from transformers import TFPreTrainedModel except ImportError: # pragma: no cover TFPreTrainedModel = None tokenizer.padding_side = "left" self.model = model self.hf_tokenizer = tokenizer self.tokenizer = TransformerTokenizer(tokenizer) self.device_dtype = device_dtype self.type_adapter = TransformersTypeAdapter( tokenizer=tokenizer, has_chat_template=_check_hf_chat_template(tokenizer) ) if ( FlaxPreTrainedModel is not None and isinstance(model, FlaxPreTrainedModel) ): # pragma: no cover self.tensor_library_name = "jax" warnings.warn(""" Support for `jax` has been deprecated and will be removed in version 1.4.0 of Outlines. Please use `torch` instead. Transformers models using `jax` do not support structured generation. """, DeprecationWarning, stacklevel=2, ) elif ( TFPreTrainedModel is not None and isinstance(model, TFPreTrainedModel) ): # pragma: no cover self.tensor_library_name = "tensorflow" warnings.warn(""" Support for `tensorflow` has been deprecated and will be removed in version 1.4.0 of Outlines. Please use `torch` instead. Transformers models using `tensorflow` do not support structured generation. """, DeprecationWarning, stacklevel=2, ) else: self.tensor_library_name = "torch" def _prepare_model_inputs( self, model_input, is_batch: bool = False, ) -> Tuple[Union[str, List[str]], dict]: """Turn the user input into arguments to pass to the model""" # Format validation if is_batch: prompts = [ self.type_adapter.format_input(item) for item in model_input ] else: prompts = self.type_adapter.format_input(model_input) input_ids, attention_mask = self.tokenizer.encode(prompts) inputs = { "input_ids": input_ids.to(self.model.device), "attention_mask": ( attention_mask.to(self.model.device, dtype=self.device_dtype) if self.device_dtype is not None else attention_mask.to(self.model.device) ), } return prompts, inputs def generate( self, model_input: Union[str, dict, Chat], output_type: Optional[OutlinesLogitsProcessor] = None, **inference_kwargs: Any, ) -> Union[str, List[str]]: """Generate text using `transformers`. Parameters ---------- model_input The prompt based on which the model will generate a response. For multi-modal models, the input should be a dictionary containing the `text` key with a value of type `Union[str, List[str]]` and the other keys required by the model. output_type The logits processor the model will use to constrain the format of the generated text. inference_kwargs Additional keyword arguments to pass to the `generate` method of the `transformers` model. Returns ------- Union[str, List[str]] The text generated by the model. """ prompts, inputs = self._prepare_model_inputs(model_input, False) logits_processor = self.type_adapter.format_output_type(output_type) generated_ids = self._generate_output_seq( prompts, inputs, logits_processor=logits_processor, **inference_kwargs, ) # required for multi-modal models that return a 2D tensor even when # num_return_sequences is 1 num_samples = inference_kwargs.get("num_return_sequences", 1) if num_samples == 1 and len(generated_ids.shape) == 2: generated_ids = generated_ids.squeeze(0) return self._decode_generation(generated_ids) def generate_batch( self, model_input: List[Union[str, dict, Chat]], output_type: Optional[OutlinesLogitsProcessor] = None, **inference_kwargs: Any, ) -> List[Union[str, List[str]]]: """""" prompts, inputs = self._prepare_model_inputs(model_input, True) # type: ignore logits_processor = self.type_adapter.format_output_type(output_type) generated_ids = self._generate_output_seq( prompts, inputs, logits_processor=logits_processor, **inference_kwargs ) # if there are multiple samples per input, convert generated_id to 3D num_samples = inference_kwargs.get("num_return_sequences", 1) if num_samples > 1: generated_ids = generated_ids.view(len(model_input), num_samples, -1) return self._decode_generation(generated_ids) def generate_stream(self, model_input, output_type, **inference_kwargs): """Not available for `transformers` models. TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810 """ raise NotImplementedError( "Streaming is not implemented for Transformers models." ) def _generate_output_seq(self, prompts, inputs, **inference_kwargs): input_ids = inputs["input_ids"] output_ids = self.model.generate( **inputs, **inference_kwargs, ) # encoder-decoder returns output_ids only, decoder-only returns full seq ids if self.model.config.is_encoder_decoder: generated_ids = output_ids else: generated_ids = output_ids[:, input_ids.shape[1] :] return generated_ids def _decode_generation(self, generated_ids: "torch.Tensor"): if len(generated_ids.shape) == 1: return self.tokenizer.decode([generated_ids])[0] elif len(generated_ids.shape) == 2: return self.tokenizer.decode(generated_ids) elif len(generated_ids.shape) == 3: return [ self.tokenizer.decode(generated_ids[i]) for i in range(len(generated_ids)) ] else: # pragma: no cover raise TypeError( "Generated outputs aren't 1D, 2D or 3D, but instead are " f"{generated_ids.shape}" ) class TransformersMultiModalTypeAdapter(ModelTypeAdapter): """Type adapter for `TransformersMultiModal` model.""" def __init__(self, **kwargs): self.tokenizer = kwargs.get("tokenizer") @singledispatchmethod def format_input(self, model_input): """Fomat the prompt arguments to pass to the model. Argument -------- model_input The input passed by the user. Returns ------- dict The formatted input. """ raise TypeError( f"The input type {type(model_input)} is not available. Please " + "provide a list containing a text prompt and assets " + "(`Image`, `Audio` or `Video` instances) supported by your " + "model or a `Chat` instance." ) @format_input.register(Chat) def format_chat_input(self, model_input: Chat) -> dict: conversation = [] assets = [] # process each message, convert if needed to standardized multimodal chat template format # and collect assets for HF processor for message in model_input.messages: processed_message, message_assets = self._prepare_message( message["role"], message["content"] ) conversation.append(processed_message) assets.extend(message_assets) formatted_prompt = self.tokenizer.apply_chat_template( conversation, tokenize=False, add_generation_prompt=True ) # use the formatted prompt and the assets to format the input return self.format_list_input([formatted_prompt, *assets]) def _prepare_message(self, role: str, content: str | list) -> tuple[dict, list]: """Create a message.""" if isinstance(content, str): return {"role": role, "content": content}, [] elif isinstance(content, list): if all(isinstance(item, dict) for item in content): # HF multimodal chat template return {"role": role, "content": content}, self._extract_assets_from_content(content) else: # list of string + assets prompt = content[0] assets = content[1:] assets_dict = [self._format_asset_for_template(asset) for asset in assets] return {"role": role, "content": [ {"type": "text", "text": prompt}, *assets_dict ]}, assets else: raise ValueError( f"Invalid content type: {type(content)}. " + "The content must be a string or a list containing text and assets " + "or a list of dict items with explicit types." ) def _extract_assets_from_content(self, content: list) -> list: """Process a list of dict items.""" assets = [] for item in content: if len(item) > 2: raise ValueError( f"Found item with multiple keys: {item}. " + "Each item in the content list must be a dictionary with a 'type' key and a single asset key. " + "To include multiple assets, use separate dictionary items. " + "For example: [{{'type': 'image', 'image': image1}}, {{'type': 'image', 'image': image2}}]. " ) if "type" not in item: raise ValueError( "Each item in the content list must be a dictionary with a 'type' key. " + "Valid types are 'text', 'image', 'video', or 'audio'. " + "For instance {{'type': 'text', 'text': 'your message'}}. " + f"Found item without 'type' key: {item}" ) if item["type"] == "text": continue elif item["type"] in ["image", "video", "audio"]: asset_key = item["type"] if asset_key not in item: raise ValueError( f"Item with type '{asset_key}' must contain a '{asset_key}' key. " + f"Found item: {item}" ) if isinstance(item[asset_key], (Image, Video, Audio)): assets.append(item[asset_key]) else: raise ValueError( "Assets must be of type `Image`, `Video` or `Audio`. " + f"Unsupported asset type: {type(item[asset_key])}" ) else: raise ValueError( "Content must be 'text', 'image', 'video' or 'audio'. " + f"Unsupported content type: {item['type']}") return assets def _format_asset_for_template(self, asset: Image | Video | Audio) -> dict: """Process an asset.""" if isinstance(asset, Image): return {"type": "image", "image": asset} elif isinstance(asset, Video): return {"type": "video", "video": asset} elif isinstance(asset, Audio): return {"type": "audio", "audio": asset} else: raise ValueError( "Assets must be of type `Image`, `Video` or `Audio`. " + f"Unsupported asset type: {type(asset)}" ) @format_input.register(list) def format_list_input(self, model_input: list) -> dict: prompt = model_input[0] assets = model_input[1:] if not assets: # handle empty assets case return {"text": prompt} asset_types = set(type(asset) for asset in assets) if len(asset_types) > 1: raise ValueError( "All assets must be of the same type. " + f"Found types: {asset_types}" ) asset_type = asset_types.pop() if asset_type == Image: return { "text": prompt, "images": [asset.image for asset in assets] } elif asset_type == Audio: # pragma: no cover return { "text": prompt, "audio": [asset.audio for asset in assets] } elif asset_type == Video: # pragma: no cover return { "text": prompt, "videos": [asset.video for asset in assets] } else: raise ValueError(f"Unsupported asset type: {asset_type}") def format_output_type( self, output_type: Optional[OutlinesLogitsProcessor] = None, ) -> Optional["LogitsProcessorList"]: """Generate the logits processor argument to pass to the model. Argument -------- output_type The logits processor provided. Returns ------- Optional[LogitsProcessorList] The logits processor to pass to the model. """ from transformers import LogitsProcessorList if output_type is not None: return LogitsProcessorList([output_type]) return None class TransformersMultiModal(Transformers): """Thin wrapper around a `transformers` model and a `transformers` processor. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `transformers` model and processor. """ def __init__( self, model: "PreTrainedModel", processor, *, device_dtype: Optional["torch.dtype"] = None, ): """Create a TransformersMultiModal model instance We rely on the `__init__` method of the `Transformers` class to handle most of the initialization and then add elements specific to multimodal models. Parameters ---------- model A `PreTrainedModel`, or any model that is compatible with the `transformers` API for models. processor A `ProcessorMixin` instance. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype. """ self.processor = processor self.processor.padding_side = "left" self.processor.pad_token = "[PAD]" tokenizer: "PreTrainedTokenizer" = self.processor.tokenizer super().__init__(model, tokenizer, device_dtype=device_dtype) self.type_adapter = TransformersMultiModalTypeAdapter( tokenizer=tokenizer ) def _prepare_model_inputs( self, model_input, is_batch: bool = False, ) -> Tuple[Union[str, List[str]], dict]: """Turn the user input into arguments to pass to the model""" if is_batch: prompts = [ self.type_adapter.format_input(item) for item in model_input ] else: prompts = self.type_adapter.format_input(model_input) # The expected format is a single dict if is_batch: merged_prompts = defaultdict(list) for d in prompts: for key, value in d.items(): if key == "text": merged_prompts[key].append(value) else: merged_prompts[key].extend(value) else: merged_prompts = prompts # type: ignore inputs = self.processor( **merged_prompts, padding=True, return_tensors="pt" ) if self.device_dtype is not None: inputs = inputs.to(self.model.device, dtype=self.device_dtype) else: inputs = inputs.to(self.model.device) return merged_prompts["text"], inputs def from_transformers( model: "PreTrainedModel", tokenizer_or_processor: Union["PreTrainedTokenizer", "ProcessorMixin"], *, device_dtype: Optional["torch.dtype"] = None, ) -> Union[Transformers, TransformersMultiModal]: """Create an Outlines `Transformers` or `TransformersMultiModal` model instance from a `PreTrainedModel` instance and a `PreTrainedTokenizer` or `ProcessorMixin` instance. `outlines` supports `PreTrainedModelForCausalLM`, `PreTrainedMambaForCausalLM`, `PreTrainedModelForSeq2Seq` and any model that implements the `transformers` model API. Parameters ---------- model A `transformers.PreTrainedModel` instance. tokenizer_or_processor A `transformers.PreTrainedTokenizer` or `transformers.ProcessorMixin` instance. device_dtype The dtype to use for the model. If not provided, the model will use the default dtype. Returns ------- Union[Transformers, TransformersMultiModal] An Outlines `Transformers` or `TransformersMultiModal` model instance. """ from transformers import ( PreTrainedTokenizer, PreTrainedTokenizerFast, ProcessorMixin) if isinstance( tokenizer_or_processor, (PreTrainedTokenizer, PreTrainedTokenizerFast) ): tokenizer = tokenizer_or_processor return Transformers(model, tokenizer, device_dtype=device_dtype) elif isinstance(tokenizer_or_processor, ProcessorMixin): processor = tokenizer_or_processor return TransformersMultiModal(model, processor, device_dtype=device_dtype) else: raise ValueError( "We could determine whether the model passed to `from_transformers`" + " is a text-2-text or a multi-modal model. Please provide a " + "a transformers tokenizer or processor." ) ================================================ FILE: outlines/models/utils.py ================================================ import jsonpath_ng def set_additional_properties_false_json_schema(schema: dict) -> dict: """Set additionalProperties to False to all objects in the schema using jsonpath. Parameters ---------- schema The JSON schema to modify Returns ------- dict The modified schema with additionalProperties set to False """ # Get all nodes jsonpath_expr = jsonpath_ng.parse('$..*') matches = jsonpath_expr.find(schema) # Go over all nodes and set additionalProperties to False if it's an object for match in matches: if match.value == 'object': if 'additionalProperties' not in match.context.value: match.context.value['additionalProperties'] = False return schema ================================================ FILE: outlines/models/vllm.py ================================================ """Integration with a vLLM server.""" import json from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, Optional, Union from outlines.inputs import Chat from outlines.models.base import AsyncModel,Model, ModelTypeAdapter from outlines.models.openai import OpenAITypeAdapter from outlines.types.dsl import CFG, JsonSchema, python_types_to_terms, to_regex if TYPE_CHECKING: from openai import AsyncOpenAI, OpenAI __all__ = ["VLLM", "AsyncVLLM", "from_vllm"] class VLLMTypeAdapter(ModelTypeAdapter): """Type adapter for the `VLLM` and `AsyncVLLM` models.""" def format_input(self, model_input: Union[Chat, str, list]) -> list: """Generate the value of the messages argument to pass to the client. We rely on the OpenAITypeAdapter to format the input as the vLLM server expects input in the same format as OpenAI. Parameters ---------- model_input The input passed by the user. Returns ------- list The formatted input to be passed to the model. """ return OpenAITypeAdapter().format_input(model_input) def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the structured output argument to pass to the client. Parameters ---------- output_type The structured output type provided. Returns ------- dict The structured output argument to pass to the model. """ if output_type is None: return {} term = python_types_to_terms(output_type) if isinstance(term, CFG): return {"guided_grammar": term.definition} elif isinstance(term, JsonSchema): extra_body = {"guided_json": json.loads(term.schema)} if term.whitespace_pattern: extra_body["whitespace_pattern"] = term.whitespace_pattern return extra_body else: return {"guided_regex": to_regex(term)} class VLLM(Model): """Thin wrapper around the `openai.OpenAI` client used to communicate with a `vllm` server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.OpenAI` client for the `vllm` server. """ def __init__( self, client: "OpenAI", model_name: Optional[str] = None, ): """ Parameters ---------- client An `openai.OpenAI` client instance. """ self.client = client self.model_name = model_name self.type_adapter = VLLMTypeAdapter() def generate( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using vLLM. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) response = self.client.chat.completions.create(**client_args) messages = [choice.message for choice in response.choices] for message in messages: if message.refusal is not None: # pragma: no cover raise ValueError( f"The vLLM server refused to answer the request: " f"{message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError("VLLM does not support batch inference.") def generate_stream( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Iterator[str]: """Stream text using vLLM. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Iterator[str] An iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = self.client.chat.completions.create( **client_args, stream=True, ) for chunk in stream: # pragma: no cover if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content def _build_client_args( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the OpenAI client.""" messages = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) extra_body = inference_kwargs.pop("extra_body", {}) extra_body.update(output_type_args) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name client_args = { "messages": messages, **inference_kwargs, } if extra_body: client_args["extra_body"] = extra_body return client_args class AsyncVLLM(AsyncModel): """Thin async wrapper around the `openai.OpenAI` client used to communicate with a `vllm` server. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `openai.OpenAI` client for the `vllm` server. """ def __init__( self, client: "AsyncOpenAI", model_name: Optional[str] = None, ): """ Parameters ---------- client An `openai.AsyncOpenAI` client instance. """ self.client = client self.model_name = model_name self.type_adapter = VLLMTypeAdapter() async def generate( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, list[str]]: """Generate text using vLLM. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- Union[str, list[str]] The text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) response = await self.client.chat.completions.create(**client_args) messages = [choice.message for choice in response.choices] for message in messages: if message.refusal is not None: # pragma: no cover raise ValueError( f"The vLLM server refused to answer the request: " f"{message.refusal}" ) if len(messages) == 1: return messages[0].content else: return [message.content for message in messages] async def generate_batch( self, model_input, output_type = None, **inference_kwargs, ): raise NotImplementedError("VLLM does not support batch inference.") async def generate_stream( # type: ignore self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> AsyncIterator[str]: """Stream text using vLLM. Parameters ---------- model_input The prompt based on which the model will generate a response. output_type The desired format of the response generated by the model. All output types available in Outlines are supported provided your server uses a structured generation backend that supports them. inference_kwargs Additional keyword arguments to pass to the client. Returns ------- AsyncIterator[str] An async iterator that yields the text generated by the model. """ client_args = self._build_client_args( model_input, output_type, **inference_kwargs, ) stream = await self.client.chat.completions.create( **client_args, stream=True, ) async for chunk in stream: # pragma: no cover if chunk.choices and chunk.choices[0].delta.content is not None: yield chunk.choices[0].delta.content def _build_client_args( self, model_input: Union[Chat, str, list], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> dict: """Build the arguments to pass to the OpenAI client.""" messages = self.type_adapter.format_input(model_input) output_type_args = self.type_adapter.format_output_type(output_type) extra_body = inference_kwargs.pop("extra_body", {}) extra_body.update(output_type_args) if "model" not in inference_kwargs and self.model_name is not None: inference_kwargs["model"] = self.model_name client_args = { "messages": messages, **inference_kwargs, } if extra_body: client_args["extra_body"] = extra_body return client_args def from_vllm( client: Union["OpenAI", "AsyncOpenAI"], model_name: Optional[str] = None, ) -> Union[VLLM, AsyncVLLM]: """Create an Outlines `VLLM` or `AsyncVLLM` model instance from an `openai.OpenAI` or `openai.AsyncOpenAI` instance. Parameters ---------- client An `openai.OpenAI` or `openai.AsyncOpenAI` instance. model_name The name of the model to use. Returns ------- Union[VLLM, AsyncVLLM] An Outlines `VLLM` or `AsyncVLLM` model instance. """ from openai import AsyncOpenAI, OpenAI if isinstance(client, OpenAI): return VLLM(client, model_name) elif isinstance(client, AsyncOpenAI): return AsyncVLLM(client, model_name) else: raise ValueError( f"Unsupported client type: {type(client)}.\n" "Please provide an OpenAI or AsyncOpenAI instance." ) ================================================ FILE: outlines/models/vllm_offline.py ================================================ """Integration with the `vllm` library (offline mode).""" import json from functools import singledispatchmethod from typing import TYPE_CHECKING, Any, List, Optional, Union from outlines.inputs import Chat from outlines.models.base import Model, ModelTypeAdapter from outlines.models.openai import OpenAITypeAdapter from outlines.types.dsl import CFG, JsonSchema, python_types_to_terms, to_regex if TYPE_CHECKING: from vllm import LLM from vllm.sampling_params import SamplingParams __all__ = ["VLLMOffline", "from_vllm_offline"] class VLLMOfflineTypeAdapter(ModelTypeAdapter): """Type adapter for the `VLLMOffline` model.""" def __init__(self, has_chat_template: bool = False): self.has_chat_template = has_chat_template @singledispatchmethod def format_input(self, model_input): """Generate the prompt argument to pass to the model. Argument -------- model_input The input passed by the user. """ raise TypeError( f"The input type {type(model_input)} is not available with " "VLLM offline. The only available types are `str` and " "`Chat` (containing a prompt and images)." ) @format_input.register(str) def format_input_str(self, model_input: str) -> str | list: """Format a `str` input. """ if self.has_chat_template: return self.format_input_chat(Chat([{"role": "user", "content": model_input}])) return model_input @format_input.register(Chat) def format_input_chat(self, model_input: Chat) -> list: """Format a `Chat` input. """ for message in model_input.messages: content = message["content"] if isinstance(content, list): raise ValueError( "Assets are not supported for vLLM offline." "Please only use text content in the `Chat` input." ) return OpenAITypeAdapter().format_input(model_input) def format_output_type(self, output_type: Optional[Any] = None) -> dict: """Generate the structured output argument to pass to the model. For vLLM, the structured output definition is set in the `GuidedDecodingParams` constructor that is provided as a value to the `guided_decoding` parameter of the `SamplingParams` constructor, itself provided as a value to the `sampling_params` parameter of the `generate` method. Parameters ---------- output_type The structured output type provided. Returns ------- dict The arguments to provide to the `GuidedDecodingParams` constructor. """ if output_type is None: return {} term = python_types_to_terms(output_type) if isinstance(term, CFG): return {"grammar": term.definition} elif isinstance(term, JsonSchema): guided_decoding_params = {"json": json.loads(term.schema)} if term.whitespace_pattern: guided_decoding_params["whitespace_pattern"] = term.whitespace_pattern return guided_decoding_params else: return {"regex": to_regex(term)} class VLLMOffline(Model): """Thin wrapper around a `vllm.LLM` model. This wrapper is used to convert the input and output types specified by the users at a higher level to arguments to the `vllm.LLM` model. """ def __init__(self, model: "LLM"): """Create a VLLM model instance. Parameters ---------- model A `vllm.LLM` model instance. """ self.model = model self.tokenizer = self.model.get_tokenizer() self.type_adapter = VLLMOfflineTypeAdapter(has_chat_template=self._check_chat_template()) def _build_generation_args( self, inference_kwargs: dict, output_type: Optional[Any] = None, ) -> "SamplingParams": """Create the `SamplingParams` object to pass to the `generate` method of the `vllm.LLM` model.""" from vllm.sampling_params import StructuredOutputsParams, SamplingParams sampling_params = inference_kwargs.pop("sampling_params", None) if sampling_params is None: sampling_params = SamplingParams() output_type_args = self.type_adapter.format_output_type(output_type) if output_type_args: original_sampling_params_dict = {f: getattr(sampling_params, f) for f in sampling_params.__struct_fields__} sampling_params_dict = {**original_sampling_params_dict, "structured_outputs": StructuredOutputsParams(**output_type_args)} sampling_params = SamplingParams(**sampling_params_dict) return sampling_params def generate( self, model_input: Chat | str, output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[str, List[str]]: """Generate text using vLLM offline. Parameters ---------- prompt The prompt based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. inference_kwargs Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model. Returns ------- Union[str, List[str]] The text generated by the model. """ sampling_params = self._build_generation_args( inference_kwargs, output_type, ) model_input = self.type_adapter.format_input(model_input) if isinstance(model_input, list): results = self.model.chat( messages=model_input, sampling_params=sampling_params, **inference_kwargs, ) else: results = self.model.generate( prompts=model_input, sampling_params=sampling_params, **inference_kwargs, ) results = [completion.text for completion in results[0].outputs] if len(results) == 1: return results[0] else: return results def generate_batch( self, model_input: List[Chat | str], output_type: Optional[Any] = None, **inference_kwargs: Any, ) -> Union[List[str], List[List[str]]]: """Generate a batch of completions using vLLM offline. Parameters ---------- prompt The list of prompts based on which the model will generate a response. output_type The logits processor the model will use to constrain the format of the generated text. inference_kwargs Additional keyword arguments to pass to the `generate` method in the `vllm.LLM` model. Returns ------- Union[List[str], List[List[str]]] The text generated by the model. """ sampling_params = self._build_generation_args( inference_kwargs, output_type, ) model_inputs = [self.type_adapter.format_input(item) for item in model_input] if model_inputs and isinstance(model_inputs[0], list): results = self.model.chat( messages=model_inputs, sampling_params=sampling_params, **inference_kwargs, ) else: results = self.model.generate( prompts=model_inputs, sampling_params=sampling_params, **inference_kwargs, ) return [[sample.text for sample in batch.outputs] for batch in results] def generate_stream(self, model_input, output_type, **inference_kwargs): """Not available for `vllm.LLM`. TODO: Implement the streaming functionality ourselves. """ raise NotImplementedError( "Streaming is not available for the vLLM offline integration." ) def _check_chat_template(self) -> bool: """Check if the tokenizer has a chat template.""" from vllm.transformers_utils.tokenizer import ( PreTrainedTokenizer, PreTrainedTokenizerFast, TokenizerBase ) from outlines.models.tokenizer import _check_hf_chat_template if isinstance(self.tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)): return _check_hf_chat_template(self.tokenizer) elif isinstance(self.tokenizer, TokenizerBase): # vLLM defines its own TokenizerBase class, and only provides # limited compatibility with HuggingFace tokenizers. So we # need to check for chat template support differently. try: self.tokenizer.apply_chat_template([{"role": "user", "content": "test"}]) return True except Exception: return False else: # Never reached # pragma: no cover return False def from_vllm_offline(model: "LLM") -> VLLMOffline: """Create an Outlines `VLLMOffline` model instance from a `vllm.LLM` instance. Parameters ---------- model A `vllm.LLM` instance. Returns ------- VLLMOffline An Outlines `VLLMOffline` model instance. """ return VLLMOffline(model) ================================================ FILE: outlines/processors/__init__.py ================================================ """Processors to control generation in steerable models.""" from .base_logits_processor import OutlinesLogitsProcessor __all__ = [ "OutlinesLogitsProcessor", ] ================================================ FILE: outlines/processors/base_logits_processor.py ================================================ """Base class for logits processors.""" from abc import abstractmethod from typing import TypeVar from outlines.processors.tensor_adapters import ( TensorAdapterImplementation, tensor_adapters, ) TensorType = TypeVar('TensorType') class OutlinesLogitsProcessor: """Base class for logits processors. This class implements a shared `__call__` method is called by the models and returns the processed logits. It relies on the `process_logits` method that must be implemented by the subclasses to do the actual processing. The `tensor_adapter` attribute, created at initialization based on the tensor library name specified in the constructor, is used to manipulate the tensors using the appropriate library for the model (numpy, torch...). """ tensor_adapter: TensorAdapterImplementation def __init__(self, tensor_library_name: str): """ Parameters ---------- tensor_library_name The name of the library to use to manipulate tensors. Possible values are "mlx", "numpy" and "torch". You must choose the library that your model is using. """ # Temporary fix as torch raises a warning that can cause can an error # with python 3.12. if tensor_library_name == "torch": import torch._dynamo torch._dynamo.config.suppress_errors = True tensor_adapter_class = tensor_adapters.get(tensor_library_name) if tensor_adapter_class is None: raise NotImplementedError( f"Library {tensor_library_name} is not available" ) self.tensor_adapter = tensor_adapter_class() # type: ignore def reset(self): """Reset the logits processor for a new generation Only implement this method in subclasses if the logits processor needs to be reset for a new generation. """ pass # pragma: no cover @abstractmethod def process_logits( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Main method to implement for logits processors subclasses. This method applies a mask on the logits to bias the generation. It is called by the `__call__` method that standardizes the shape of `input_ids` and `logits` to ensure they are 2D tensors. Elements to keep in mind when designing universal logits processors: - logits processors are only used once and never re-applied for a new sequence generator - Some models only pass output_ids, some models such as llamacpp and transformers prefix with input_ids - Some sampling methods, such as beam search, result in unstable sequence ordering in models like vLLM Parameters ---------- input_ids The ids of the tokens of the existing sequences in a 2D tensor. logits The logits for the current generation step in a 2D tensor. Returns ------- TensorType The processed logits as a 2D tensor. """ ... def __call__( self, input_ids: TensorType, logits: TensorType ) -> TensorType: """Entrypoint for logits processors, this is the method that is called by the model. Because different models use different structures to store the input_ids and logits, we standardize their format to 2D tensors before calling the `process_logits` method. After processing, the logits are cast back to the original array library type before being returned. Parameters ---------- input_ids The ids of the tokens of the existing sequences in a tensor. logits The logits for the current generation step in a tensor. Returns ------- TensorType The processed logits as a tensor. """ # if input_ids is 1D and logits is 2D with a single sequence, # reshape input_ids to 2D (needed for mlx-lm) if ( len(self.tensor_adapter.shape(input_ids)) == 1 and len(self.tensor_adapter.shape(logits)) == 2 and self.tensor_adapter.shape(logits)[0] == 1 ): input_ids = self.tensor_adapter.unsqueeze(input_ids) assert ( self.tensor_adapter.shape(logits)[:-1] == self.tensor_adapter.shape(input_ids)[:-1] ) # Guarantee passed as 2D Tensors, then covert back to original # (1D or 2D) shape if len(self.tensor_adapter.shape(logits)) == 2: processed_logits = self.process_logits(input_ids, logits) elif len(self.tensor_adapter.shape(logits)) == 1: processed_logits = self.tensor_adapter.squeeze( self.process_logits( self.tensor_adapter.unsqueeze(input_ids), self.tensor_adapter.unsqueeze(logits), ), ) else: raise ValueError( f"Logits shape {self.tensor_adapter.shape(logits)} is not " + "supported" ) return processed_logits ================================================ FILE: outlines/processors/tensor_adapters/__init__.py ================================================ """Library specific objects to manipulate tensors.""" from typing import Union from .mlx import MLXTensorAdapter from .numpy import NumpyTensorAdapter from .torch import TorchTensorAdapter tensor_adapters = { "mlx": MLXTensorAdapter, "numpy": NumpyTensorAdapter, "torch": TorchTensorAdapter, } TensorAdapterImplementation = Union[ MLXTensorAdapter, NumpyTensorAdapter, TorchTensorAdapter, ] __all__ = [ "MLXTensorAdapter", "NumpyTensorAdapter", "TorchTensorAdapter", "tensor_adapters", "TensorAdapterImplementation", ] ================================================ FILE: outlines/processors/tensor_adapters/base.py ================================================ """Base class for tensor adapters.""" from abc import ABC, abstractmethod from typing import TYPE_CHECKING, TypeVar, Any, Union if TYPE_CHECKING: import torch TensorType = TypeVar('TensorType') class TensorAdapter(ABC): """Abstract base class for tensor adapters. This class defines the interface for tensor adapters that are used to manipulate tensors in different libraries. Concrete implementations of this class should provide specific implementations for each method as well as providing a `library_name` attribute. TODO: Update the version of outlines-core used to receive plain arrays instead of torch tensors. In the meantime, implementations of this class must make sure that their `full_like` and `concatenate` methods can handle torch tensors. """ library_name: str @abstractmethod def shape(self, tensor: TensorType) -> list[int]: """Get the shape of the tensor. Parameters ---------- tensor The tensor to get the shape of. Returns ------- list[int] The shape of the tensor. The list contains as many elements as there are dimensions in the tensor. """ ... @abstractmethod def unsqueeze(self, tensor: TensorType) -> TensorType: """Add a dimension to the tensor at axis 0. Parameters ---------- tensor The tensor to add a dimension to. Returns ------- TensorType The tensor with an additional dimension. """ ... @abstractmethod def squeeze(self, tensor: TensorType) -> TensorType: """Remove a dimension from the tensor at axis 0. Parameters ---------- tensor The tensor to remove a dimension from. Returns ------- TensorType The tensor with one less dimension. """ ... @abstractmethod def to_list(self, tensor: TensorType) -> list: """Convert the tensor to a list. Parameters ---------- tensor The tensor to convert to a list. Returns ------- list The tensor as a list. """ ... @abstractmethod def to_scalar(self, tensor: TensorType) -> Any: """Return the only element of the tensor. Parameters ---------- tensor The tensor to return the only element of. Returns ------- Any The only element of the tensor. """ ... @abstractmethod def full_like(self, tensor: "torch.Tensor", fill_value: Any) -> TensorType: # type: ignore """Create a tensor with the same shape as the input tensor filled with a scalar value. ATTENTION: This method receives a torch tensor regardless of the library used. Parameters ---------- tensor The tensor to create a new tensor with the same shape. fill_value The value to fill the new tensor with. Returns ------- TensorType A tensor with the same shape as the input tensor filled with the specified value. """ ... @abstractmethod def concatenate( self, tensors: list[Union["torch.Tensor", TensorType]] ) -> TensorType: """Concatenate a list of tensors along axis 0. ATTENTION: This method can either receive a list of torch tensors or a list of tensors from the library used. Parameters ---------- tensors The list of tensors to concatenate. Returns ------- TensorType The concatenated tensor. """ ... @abstractmethod def get_device(self, tensor: TensorType) -> str: """Get the name of the tensor's device. Parameters ---------- tensor The tensor to get the device of. Returns ------- str The name of the tensor's device. """ ... @abstractmethod def to_device(self, tensor: TensorType, device: str) -> TensorType: """Move the tensor to a specified device. Parameters ---------- tensor The tensor to move to a specified device. device The name of the device to move the tensor to. Returns ------- TensorType The tensor moved to the specified device. """ ... @abstractmethod def boolean_ones_like(self, tensor: TensorType) -> TensorType: """Create a boolean ones tensor with the same shape as the input tensor. Parameters ---------- tensor The tensor to create a boolean ones tensor with the same shape. Returns ------- TensorType A boolean ones tensor with the same shape as the input tensor. """ ... @abstractmethod def apply_mask( self, tensor: TensorType, mask: TensorType, value: Any ) -> TensorType: """Fill the elements of the tensor where the mask is True with the specified value. Parameters ---------- tensor The tensor to fill. mask The mask to apply to the tensor. value The value to fill the tensor with. Returns ------- TensorType The tensor with the mask applied. """ ... @abstractmethod def argsort_descending( self, tensor: TensorType ) -> TensorType: """Return the indices that would sort the tensor in descending order along axis -1. Parameters ---------- tensor The tensor to sort. Returns ------- TensorType The indices that would sort the tensor in descending order along axis -1. """ ... ================================================ FILE: outlines/processors/tensor_adapters/mlx.py ================================================ """Tensor adapter for the `mlx` library.""" from outlines.processors.tensor_adapters.base import TensorAdapter class MLXTensorAdapter(TensorAdapter): library_name = "mlx" def __init__(self): import mlx.core self.mlx = mlx.core def shape(self, tensor): return tensor.shape def unsqueeze(self, tensor): return self.mlx.expand_dims(tensor, 0) def squeeze(self, tensor): if tensor.shape[0] == 1: return tensor[0] return tensor def to_list(self, tensor): return tensor.tolist() def to_scalar(self, tensor): return tensor.item() def full_like(self, tensor, fill_value): # Compatible with receiving a torch tensor return self.mlx.full(tensor.shape, fill_value) def concatenate(self, tensors): # Can handle both torch and mlx tensors return self.mlx.concatenate( [ self.mlx.array(t) if not isinstance(t, self.mlx.array) else t for t in tensors ], axis=0 ) def get_device(self, tensor): return None def to_device(self, tensor, device): return tensor def boolean_ones_like(self, tensor): return self.mlx.ones(tensor.shape, dtype=self.mlx.bool_) def apply_mask(self, tensor, mask, value): result = tensor.astype(tensor.dtype) result = self.mlx.where(mask, self.mlx.array(value), result) return result def argsort_descending(self, tensor): return self.mlx.argsort(-tensor) ================================================ FILE: outlines/processors/tensor_adapters/numpy.py ================================================ """Tensor adapter for the `numpy` library.""" from outlines.processors.tensor_adapters.base import TensorAdapter class NumpyTensorAdapter(TensorAdapter): library_name = "numpy" def __init__(self): import numpy self.numpy = numpy def shape(self, tensor): return tensor.shape def unsqueeze(self, tensor): return self.numpy.expand_dims(tensor, axis=0) def squeeze(self, tensor): return self.numpy.squeeze(tensor, axis=0) def to_list(self, tensor): return tensor.tolist() def to_scalar(self, tensor): return tensor.item() def full_like(self, tensor, fill_value): return self.numpy.full_like(tensor, fill_value) def concatenate(self, tensors): return self.numpy.concatenate(tensors, axis=0) def get_device(self, tensor): return None def to_device(self, tensor, device): return tensor def boolean_ones_like(self, tensor): return self.numpy.ones_like(tensor, dtype=bool) def apply_mask(self, tensor, mask, value): result = tensor.copy() result[mask] = value return result def argsort_descending(self, tensor): return self.numpy.argsort(-tensor) ================================================ FILE: outlines/processors/tensor_adapters/torch.py ================================================ """Tensor adapter for the `torch` library.""" from outlines.processors.tensor_adapters.base import TensorAdapter class TorchTensorAdapter(TensorAdapter): library_name = "torch" def __init__(self): import torch self.torch = torch def shape(self, tensor): return tensor.shape def unsqueeze(self, tensor): return tensor.unsqueeze(0) def squeeze(self, tensor): return tensor.squeeze(0) def to_list(self, tensor): return tensor.tolist() def to_scalar(self, tensor): return tensor.item() def full_like(self, tensor, fill_value): return self.torch.full_like(tensor, fill_value) def concatenate(self, tensors): return self.torch.cat(tensors, dim=0) def get_device(self, tensor): return tensor.device def to_device(self, tensor, device): return tensor.to(device) def boolean_ones_like(self, tensor): return self.torch.ones_like(tensor, dtype=self.torch.bool) def apply_mask(self, tensor, mask, value): return self.torch.masked_fill(tensor, mask, value) def argsort_descending(self, tensor): return self.torch.argsort(tensor, descending=True) ================================================ FILE: outlines/py.typed ================================================ ================================================ FILE: outlines/release_note.md ================================================ # Release Note ### Why a new major version? The v1 intends on making Outlines more closely focused on constrained generation. To do so, we delegate a wider range of tasks to the users and inference libraries. On top of making Outlines leaner, this design provides more flexibility to the users and let them use interfaces they are already familiar with. Our approach is inspired by the unix best practices — each element does one thing well, and we compose those functional elements. As this new version deprecates some previously available features of Outlines, we have written a migration guide that gives detailed information on how to upgrade your v0 code to v1. ### Deprecated All deprecated features listed below will be removed in version 1.1.0. Until then, a warning will be displayed with information on how to migrate your code to v1. - The model loader functions from the `models` module (`transformers`, `openai`, etc.) have been deprecated. They are replaced by equivalent functions prefixed with `from_` such as `from_transformers`, `from_openai`, etc. The new loader functions accept different arguments compared to the old ones. They now typically require an instance of an engine/client from the associated inference library. This change was made to avoid duplicating inference library logic and to give users more control over inference engine/client initialization. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) ```python # v0 from outlines import models from transformers import BertForSequenceClassification, BertTokenizer model = models.transformers( model_name="prajjwal1/bert-tiny", model_class=BertForSequenceClassification, tokenizer_class=BertTokenizer, model_kwargs={"use_cache": False}, tokenizer_kwargs={"model_max_length": 512}, ) # v1 import outlines from transformers import BertForSequenceClassification, BertTokenizer hf_model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-tiny", use_cache=False) hf_tokenizer = BertTokenizer.from_pretrained("prajjwal1/bert-tiny", model_max_length=512) model = outlines.from_transformers(hf_model, hf_tokenizer) ``` - The `generate` module and the associated functions (`json`, `choice`…) have been deprecated. They are replaced by the `Generator` constructor. While you had to select the right generate function for your output type, you can now provide any output type supported by Outlines to the unique `Generator` object. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/generator) ```python # v0 from pydantic import BaseModel from outlines import generate, models class Character(BaseModel): name: str model = models.openai("gpt-4o") generator = generate.json(model, Character) # v1 from openai import OpenAI from pydantic import BaseModel from outlines import Generator, from_openai class Character(BaseModel): name: str model = from_openai(OpenAI()) generator = Generator(model, Character) ``` - The `TransformersVision` model has been deprecated. It's replaced by `TransformersMultiModal`, which is more general as it supports additional input types beyond images, such as audio. When calling it, instead of providing the prompt and image assets separately, both should now be included in a single dictionary. The model is loaded with `from_transformers` just like the `Transformers` model, but the second argument must be a processor instead of a tokenizer. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models/transformers_multimodal) ```python # v0 from io import BytesIO from urllib.request import urlopen from PIL import Image from transformers import LlavaForConditionalGeneration from outlines import models, generate def img_from_url(url): img_byte_stream = BytesIO(urlopen(url).read()) return Image.open(img_byte_stream).convert("RGB") model = models.transformers_vision( model_name="trl-internal-testing/tiny-LlavaForConditionalGeneration", model_class=LlavaForConditionalGeneration, ) generator = generate.text(model) result = generator( "Describe the image ", img_from_url("https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg") ) # v1 from io import BytesIO from urllib.request import urlopen from PIL import Image from transformers import LlavaForConditionalGeneration, AutoProcessor import outlines def img_from_url(url): img_byte_stream = BytesIO(urlopen(url).read()) return Image.open(img_byte_stream).convert("RGB") model = outlines.from_transformers( LlavaForConditionalGeneration.from_pretrained("trl-internal-testing/tiny-LlavaForConditionalGeneration"), AutoProcessor.from_pretrained("trl-internal-testing/tiny-LlavaForConditionalGeneration") ) image = img_from_url("https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg") result = model({"text": "Describe the image ", "images": image}) ``` - The `Exllamav2` model has been deprecated without replacement because its interface is not fully compatible with Outlines. We had to implement cumbersome patching to make it work, so we decided to remove it entirely. - The `function` module and the associated `Function` class have been deprecated. They are replaced by the `Application` class, which serves a similar purpose to `Function`. There are two notable differences: an `Application` is not initialized with a model (a model must be provided when calling the object), and template variables must be provided in a dictionary instead of as keyword arguments when calling the `Application`. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/utility/application) ```python # v0 from pydantic import BaseModel from outlines import Function, Template class Character(BaseModel): name: str template = Template.from_string("Create a {{ gender }} character.") fn = Function(template, Character, "hf-internal-testing/tiny-random-GPTJForCausalLM") response = fn(gender="female") # v1 from pydantic import BaseModel from outlines import Application, Template, from_transformers from transformers import AutoModelForCausalLM, AutoTokenizer class Character(BaseModel): name: str model = from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) template = Template.from_string("Create a {{ gender }} character.") app = Application(template, Character) response = app(model, {"gender": "female"}) ``` - The `samplers` module and the associated objects (`multinomial`, `greedy`…) have been deprecated. You should now use the inference arguments specific to the inference library your model is based on to control the sampling. ```python # v0 from outlines import generate, models, samplers model = models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = generate.text(model, samplers.beam_search(2)) response = generator("Write a short story about a cat", max_tokens=10) # v1 from outlines import Generator, from_transformers from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) response = model("Write a short story about a cat", num_beams=2) ``` - The `load_lora` methods on the `VLLM` and `LlamaCpp` models have been deprecated. You should now load through the `Llama` instance provided when initializing the model in the case of the `LlamaCpp` model, and provide it as a keyword argument when calling the model in the case of the `VLLM` model. ```python # v0 from outlines import models from vllm import LLM model = models.vllm("erwanf/gpt2-mini") model.load_lora("path/to/lora/file") response = model("Write a short story about a cat.") #v1 from outlines import from_vllm from vllm import LLM from vllm.lora.request import LoRARequest model = from_vllm( LLM("microsoft/Phi-3-mini-4k-instruct") ) lora_request = LoRARequest("path/to/lora/file", 1, "path/to/lora/file") response = model("Write a short story about a cat.", lora_request=lora_request) ``` ### Modified Some objects are maintained, but their interface or behavior has been modified. - The interface of `Model` classes (`Transformers`, `OpenAI`, etc.) has been significantly modified. Models can now be called directly with a prompt and an output type without having to create a generator first. Additionally, all models have a `stream` method that can be invoked directly by the user. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) ```python # v0 from pydantic import BaseModel from outlines import generate, models class Character(BaseModel): name: str model = models.openai("gpt-4o") generator = generate.json(model, Character) result = generator("Create a character") # v1 from openai import OpenAI from pydantic import BaseModel from outlines import from_openai class Character(BaseModel): name: str model = from_openai(OpenAI(), "gpt-4o") result = model("Create a character", Character) ``` - The interface of the `__init__` method of the `OpenAI` model class has been modified. While it previously accepted a client and an `OpenAIConfig` object instance, it now accepts a client and a model name. The inference arguments from the config object should now be specified when calling the model to more closely align with the OpenAI Python library's functionality. If you provide an `OpenAIConfig` instance when initializing the model, a deprecation warning will appear and your model will behave like a v0 model. We recommend using the `from_openai` function instead of initializing models directly. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models/openai) ```python # v0 from outlines.models.openai import OpenAI, OpenAIConfig from openai import OpenAI as OpenAIClient model = OpenAI( OpenAIClient(), OpenAIConfig(model="gpt-4o", stop=["."]) ) # v1 import outlines from openai import OpenAI model = outlines.from_openai(OpenAIClient(), "gpt-4o") ``` - The return type of text generation is now consistently a string (or list/lists of strings for multiple samples or batching). In v0, Outlines automatically cast the inference result into the type provided by the user for constrained generation, but we have removed this behavior. This change was made to create more consistent behavior and to give users more freedom in deciding how to handle the generation result. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) ```python # v0 from pydantic import BaseModel from outlines import generate, models class Character(BaseModel): name: str model = models.openai("gpt-4o") generator = generate.json(model, Character) result = generator("Create a character") print(result) # name='James' # v1 import openai from pydantic import BaseModel from outlines import from_openai class Character(BaseModel): name: str model = from_openai(OpenAI()) result = model("Create a character", Character) print(result) # {"name": "James"} print(Character.model_validate_json(result)) # name='James' ``` - While Outlines was trying to standardize inference argument names across models in v0, we decided to stop doing so and to directly pass on the inference arguments provided by the user to the inference engine/client. Our objective is to let the user use all arguments they are accustomed to with their inference library instead of having to learn Outlines-defined arguments. The deprecation of the `samplers` mentioned above is a part of this change of approach. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) ```python # v0 from outlines import generate, models model = models.transformers("microsoft/Phi-3-mini-4k-instruct") generator = generate.text(model) result = generator("Create a character", max_tokens=256, stop_at=".") # v1 from outlines import from_transformers from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) result = model("Create a character", max_new_tokens=256, stop_strings=".") ``` ### Added features - There are 8 additional models available. All of them are loaded with an associated `from_` function that accepts an inference engine/client instance. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) - `Dottxt` - `Anthopic` - `Gemini` - `Ollama` - `SGLang` - `TGI` - `TransformersMultiModel` - `VLLM` - Some server-based models now have an async version. To create an async model, just provide an async client instance when using the loader function. The async models are the following. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/models) - `AsyncSGLang` - `AsyncTGI` - `AsyncVLLM` ```python import outlines from huggingface_hub import AsyncInferenceClient async_model = outlines.from_tgi(AsyncInferenceClient("http://localhost:11434")) ``` - As explained previously, the `Generator` constructor has been added. It accepts a model and an output type as arguments and returns a generator object that can be used to generate text by providing a prompt and inference arguments. The interest of a generator is that it's reusable such that the user does not have to specify the output type they want each time and the output type compilation (when applicable) happens only once. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/generator) ```python # direct model calling from typing import Literal from outlines import from_transformers from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct") ) result = model("Pizza or burger", Literal["pizza", "burger"]) # using a generator from outlines import Generator, from_transformers from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") ) generator = Generator(model, Literal["pizza", "burger"]) result = generator("Pizza or burger") ``` - As explained previously, the `Application` class has been added. An `Application` is initialized with a prompt template and an output type. The application object returned can then be called with a model, a dictionary containing values for the template variables and inference arguments. The objective of this object is to let users easily switch from a model to another for a given set of prompt and output type. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/utility/application) ```python from pydantic import BaseModel from outlines import Application, Template class Character(BaseModel): name: str template = Template.from_string("Create a {{ gender }} character.") app = Application(template, Character) response = app(model, {"gender": "female"}) ``` - The regex DSL and the associated `Term` classes and functions have been added. Terms (`Regex`, `String`…) can be used as output types to generate text with models or generators (they are turned into a regex). The term functions (`either`, `optional`, `at_least`…) are useful to build more complex regex patterns by combining terms. On top of the objects related to regex patterns, there are also 2 terms that are intended to be used by themselves as output types: `JsonSchema` and `CFG`. [Documentation](https://dottxt-ai.github.io/outlines/latest/features/core/ouput_types) ```python # term used directly as an output type from outlines import from_transformers from outlines.types import JsonSchema from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct") ) json_schema = '{"type": "object", "properties": {"answer": {"type": "number"}}}' result = model("What's 2 + 2? Respond in a json", JsonSchema(json_schema)) # creating a complex regex pattern from outlines import from_transformers from outlines.types import at_least, either, integer, optional from transformers import AutoModelForCausalLM, AutoTokenizer model = from_transformers( AutoModelForCausalLM.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct"), AutoTokenizer.from_pretrained("fmicrosoft/Phi-3-mini-4k-instruct") ) regex_term = "I have " + integer + either("dog", "cat") + optional("s") result = model("How many pets do you have", regex_term) ``` ================================================ FILE: outlines/templates.py ================================================ """Create templates to easily build prompts.""" import functools import inspect import json import os import re import textwrap from dataclasses import dataclass from pathlib import Path from typing import Any, Callable, Dict, Optional, Type, cast import warnings import jinja2 from pydantic import BaseModel from PIL import Image as PILImage from outlines.inputs import Image def Vision(prompt: str, image: PILImage.Image) -> list: """This factory function replaces the deprecated `Vision` class until it is fully removed in outlines v1.2.0. Parameters ---------- prompt The prompt to use to generate the response. image The image to use to generate the response. Returns ------- list A list containing the prompt and Image instance. """ warnings.warn(""" The Vision function is deprecated and will be removed in outlines 1.2.0. Instead of using Vision, please use a prompt along with an outlines.inputs.Image instance. For instance: ```python import openai from outlines import Image, from_openai model = from_openai("gpt-4o") response = model( ["A beautiful image of a cat", Image(my_image)], max_tokens=100 ) ``` """, DeprecationWarning, stacklevel=2, ) return [prompt, Image(image)] @dataclass class Template: """Represents a prompt template. We return a `Template` class instead of a simple function so the template can be accessed by callers. """ template: jinja2.Template def __call__(self, *args, **kwargs) -> str: """Render and return the template. Returns ------- str The rendered template as a Python string. """ return self.template.render(**kwargs) @classmethod def from_string(cls, content: str, filters: Dict[str, Callable] = {}): """Create a `Template` instance from a string containing a Jinja template. Parameters ---------- content : str The string content to be converted into a template. Returns ------- Template An instance of the class with the provided content as a template. """ return cls(build_template_from_string(content, filters)) @classmethod def from_file(cls, path: Path, filters: Dict[str, Callable] = {}): """Create a `Template` instance from a file containing a Jinja template. Note: This method does not allow to include and inheritance to reference files that are outside the folder or subfolders of the file given to `from_file`. Parameters ---------- path : Path The path to the file containing the Jinja template. Returns ------- Template An instance of the Template class with the template loaded from the file. """ # We don't use a `Signature` here because it seems not feasible to # infer one from a Jinja2 environment that is # split across multiple files (since e.g. we support features like # Jinja2 includes and template inheritance) return cls(build_template_from_file(path, filters)) def build_template_from_string( content: str, filters: Dict[str, Callable] = {} ) -> jinja2.Template: # Dedent, and remove extra linebreak cleaned_template = inspect.cleandoc(content) # Add linebreak if there were any extra linebreaks that # `cleandoc` would have removed ends_with_linebreak = content.replace(" ", "").endswith("\n\n") if ends_with_linebreak: cleaned_template += "\n" # Remove extra whitespaces, except those that immediately follow a newline symbol. # This is necessary to avoid introducing whitespaces after backslash `\` characters # used to continue to the next line without linebreak. cleaned_template = re.sub(r"(?![\r\n])(\b\s+)", " ", cleaned_template) env = create_jinja_env(None, filters) return env.from_string(cleaned_template) def build_template_from_file( path: Path, filters: Dict[str, Callable] = {} ) -> jinja2.Template: file_directory = os.path.dirname(os.path.abspath(path)) env = create_jinja_env(jinja2.FileSystemLoader(file_directory), filters) return env.get_template(os.path.basename(path)) def create_jinja_env( loader: Optional[jinja2.BaseLoader], filters: Dict[str, Callable] ) -> jinja2.Environment: """Create a new Jinja environment. The Jinja environment is loaded with a set of pre-defined filters: - `name`: get the name of a function - `description`: get a function's docstring - `source`: get a function's source code - `signature`: get a function's signature - `args`: get a function's arguments - `schema`: display a JSON Schema Users may pass additional filters, and/or override existing ones. Parameters ---------- loader An optional `BaseLoader` instance filters A dictionary of filters, map between the filter's name and the corresponding function. """ env = jinja2.Environment( loader=loader, trim_blocks=True, lstrip_blocks=True, keep_trailing_newline=True, undefined=jinja2.StrictUndefined, ) env.filters["name"] = get_fn_name env.filters["description"] = get_fn_description env.filters["source"] = get_fn_source env.filters["signature"] = get_fn_signature env.filters["schema"] = get_schema env.filters["args"] = get_fn_args # The filters passed by the user may override the # pre-defined filters. for name, filter_fn in filters.items(): env.filters[name] = filter_fn return env def get_fn_name(fn: Callable): """Returns the name of a callable.""" if not callable(fn): raise TypeError("The `name` filter only applies to callables.") if not hasattr(fn, "__name__"): name = type(fn).__name__ else: name = fn.__name__ return name def get_fn_args(fn: Callable): """Returns the arguments of a function with annotations and default values if provided.""" if not callable(fn): raise TypeError("The `args` filter only applies to callables.") arg_str_list = [] signature = inspect.signature(fn) arg_str_list = [str(param) for param in signature.parameters.values()] arg_str = ", ".join(arg_str_list) return arg_str def get_fn_description(fn: Callable): """Returns the first line of a callable's docstring.""" if not callable(fn): raise TypeError("The `description` filter only applies to callables.") docstring = inspect.getdoc(fn) if docstring is None: description = "" else: description = docstring.split("\n")[0].strip() return description def get_fn_source(fn: Callable): """Return the source code of a callable.""" if not callable(fn): raise TypeError("The `source` filter only applies to callables.") source = textwrap.dedent(inspect.getsource(fn)) re_search = re.search(re.compile(r"(\bdef\b.*)", re.DOTALL), source) if re_search is not None: source = re_search.group(0) else: # pragma: no cover raise TypeError("Could not read the function's source code") return source def get_fn_signature(fn: Callable): """Return the signature of a callable.""" if not callable(fn): raise TypeError("The `source` filter only applies to callables.") source = textwrap.dedent(inspect.getsource(fn)) re_search = re.search(re.compile(r"\(([^)]+)\)"), source) if re_search is None: # pragma: no cover signature = "" else: signature = re_search.group(1) return signature @functools.singledispatch def get_schema(model: Any): raise NotImplementedError( f"No schema rendering function defined for type {type(model)}." ) @get_schema.register(dict) def get_schema_dict(model: Dict): """Return a pretty-printed dictionary""" return json.dumps(model, indent=2) @get_schema.register(type(BaseModel)) def get_schema_pydantic(model: Type[BaseModel]): """Return the schema of a Pydantic model.""" if hasattr(model, "model_json_schema"): def_key = "$defs" raw_schema = model.model_json_schema() else: # pragma: no cover def_key = "definitions" raw_schema = model.schema() definitions = raw_schema.get(def_key, None) schema = parse_pydantic_schema(raw_schema, definitions) return json.dumps(schema, indent=2) def parse_pydantic_schema(raw_schema, definitions): """Parse the output of `Basemodel.[schema|model_json_schema]()`. This recursively follows the references to other schemas in case of nested models. Other schemas are stored under the "definitions" key in the schema of the top-level model. """ simple_schema = {} for name, value in raw_schema["properties"].items(): if "description" in value: simple_schema[name] = value["description"] elif "$ref" in value: # pragma: no cover refs = value["$ref"].split("/") simple_schema[name] = parse_pydantic_schema( definitions[refs[2]], definitions ) else: simple_schema[name] = f"<{name}>" return simple_schema ================================================ FILE: outlines/types/__init__.py ================================================ """Output types for structured generation and regex DSL.""" from outlines.types.dsl import ( CFG, Choice, JsonSchema, Regex, at_least, at_most, between, cfg, either, exactly, json_schema, one_or_more, optional, regex, zero_or_more, ) from . import locale try: from . import airports except ImportError: # pragma: no cover class AirportImportError: """Dummy module that raises an error when accessed.""" def __getattr__(self, name): raise ImportError( "The 'airportsdata' package is required to use airport types. " "Install it with: pip install 'outlines[airports]'" ) airports = AirportImportError() # type: ignore try: from . import countries except ImportError: # pragma: no cover class CountryImportError: """Dummy module that raises an error when accessed.""" def __getattr__(self, name): raise ImportError( "The 'iso3166' package is required to use country types. " "Install it with: pip install 'outlines[countries]'" ) countries = CountryImportError() # type: ignore __all__ = [ # Submodules "airports", "countries", "locale", # DSL functions and classes "Regex", "CFG", "Choice", "JsonSchema", "regex", "cfg", "json_schema", "optional", "either", "exactly", "at_least", "at_most", "between", "zero_or_more", "one_or_more", # Python types "string", "integer", "boolean", "number", "date", "time", "datetime", # Basic regex types "digit", "char", "newline", "whitespace", "hex_str", "uuid4", "ipv4", # Document-specific types "sentence", "paragraph", "email", "isbn", ] # Python types string = Regex(r'"[^"]*"') integer = Regex(r"[+-]?(0|[1-9][0-9]*)") boolean = Regex("(True|False)") number = Regex(rf"{integer.pattern}(\.[0-9]+)?([eE][+-][0-9]+)?") date = Regex(r"(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])") time = Regex(r"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])") datetime = Regex(rf"({date.pattern})(\s)({time.pattern})") # Basic regex types digit = Regex(r"\d") char = Regex(r"\w") newline = Regex(r"(\r\n|\r|\n)") # Matched new lines on Linux, Windows & MacOS whitespace = Regex(r"\s") hex_str = Regex(r"(0x)?[a-fA-F0-9]+") uuid4 = Regex( r"[a-fA-F0-9]{8}-" r"[a-fA-F0-9]{4}-" r"4[a-fA-F0-9]{3}-" r"[89abAB][a-fA-F0-9]{3}-" r"[a-fA-F0-9]{12}" ) ipv4 = Regex( r"((25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})\.){3}" r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})" ) # Document-specific types sentence = Regex(r"[A-Z].*\s*[.!?]") paragraph = Regex(rf"{sentence.pattern}(?:\s+{sentence.pattern})*\n+") # The following regex is FRC 5322 compliant and was found at: # https://emailregex.com/ email = Regex( r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""" ) # Matches any ISBN number. Note that this is not completely correct as not all # 10 or 13 digits numbers are valid ISBNs. See https://en.wikipedia.org/wiki/ISBN # Taken from O'Reilly's Regular Expression Cookbook: # https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s13.html # # TODO: The check digit can only be computed by calling a function to compute it dynamically isbn = Regex( r"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]" ) ================================================ FILE: outlines/types/airports.py ================================================ """Generate valid airport codes.""" from enum import Enum import airportsdata AIRPORT_IATA_LIST = [ (v["iata"], v["iata"]) for v in airportsdata.load().values() if v["iata"] ] IATA = Enum("Airport", AIRPORT_IATA_LIST) # type:ignore ================================================ FILE: outlines/types/countries.py ================================================ """Generate valid country codes and names.""" from enum import Enum from iso3166 import countries def get_country_flags(): """Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum.""" base = ord("🇦") return { code.name: chr(base + ord(code.name[0]) - ord("A")) + chr(base + ord(code.name[1]) - ord("A")) for code in Alpha2 } ALPHA_2_CODE = [(country.alpha2, country.alpha2) for country in countries] Alpha2 = Enum("Alpha_2", ALPHA_2_CODE) # type:ignore ALPHA_3_CODE = [(country.alpha3, country.alpha3) for country in countries] Alpha3 = Enum("Alpha_3", ALPHA_3_CODE) # type:ignore NUMERIC_CODE = [(str(country.numeric), str(country.numeric)) for country in countries] Numeric = Enum("Numeric_code", NUMERIC_CODE) # type:ignore NAME = [(country.name, country.name) for country in countries] Name = Enum("Name", NAME) # type:ignore flag_mapping = get_country_flags() FLAG = [(flag, flag) for code, flag in flag_mapping.items()] Flag = Enum("Flag", FLAG) # type:ignore ================================================ FILE: outlines/types/dsl.py ================================================ """Regular expression DSL and output types for structured generation. This module contains elements related to three logical steps in the use of output types for structured generation: 1. Definition of `Term` classes that contain output type definitions. That includes both terms intended to be used by themselves such as `JsonSchema` or `CFG` and terms that are part of the regular expression DSL such as `Alternatives` or `KleeneStar` (and the related functions). 2. Conversion of Python types into `Term` instances (`python_types_to_terms`). 3. Conversion of a `Term` instance into a regular expression (`to_regex`). """ import json import re import sys import warnings from dataclasses import dataclass from enum import EnumMeta from types import FunctionType from typing import ( Any, List, Literal, Optional as OptionalType, Union, get_args, ) import jsonschema from genson import SchemaBuilder from pydantic import ( BaseModel, GetCoreSchemaHandler, GetJsonSchemaHandler, TypeAdapter, ) from pydantic.json_schema import JsonSchemaValue from pydantic_core import core_schema as cs from outlines_core.json_schema import build_regex_from_schema import outlines.types as types from outlines import grammars from outlines.types.json_schema_utils import ( json_schema_dict_to_pydantic, json_schema_dict_to_typeddict, json_schema_dict_to_dataclass, ) from outlines.types.utils import ( get_schema_from_signature, is_int, is_int_instance, is_float, is_float_instance, is_str, is_str_instance, is_bool, is_datetime, is_date, is_time, is_native_dict, is_dict_instance, is_dataclass, is_typed_dict, is_pydantic_model, is_genson_schema_builder, is_literal, is_union, is_enum, is_callable, is_typing_list, is_typing_tuple, is_typing_dict, ) if sys.version_info >= (3, 12): # pragma: no cover from typing import _TypedDictMeta # type: ignore else: # pragma: no cover from typing_extensions import _TypedDictMeta # type: ignore class Term: """Represents types defined with a regular expression. `Regex` instances can be used as a type in a Pydantic model definittion. They will be translated to JSON Schema as a "string" field with the "pattern" keyword set to the regular expression this class represents. The class also handles validation. Examples -------- >>> from outlines.types import Regex >>> from pydantic import BaseModel >>> >>> age_type = Regex("[0-9]+") >>> >>> class User(BaseModel): >>> name: str >>> age: age_type """ def __add__(self: "Term", other: "Term") -> "Sequence": if is_str_instance(other): other = String(str(other)) return Sequence([self, other]) def __radd__(self: "Term", other: "Term") -> "Sequence": if is_str_instance(other): other = String(str(other)) return Sequence([other, self]) def __or__(self: "Term", other: "Term") -> "Alternatives": if is_str_instance(other): other = String(str(other)) return Alternatives([self, other]) def __ror__(self: "Term", other: "Term") -> "Alternatives": if is_str_instance(other): other = String(str(other)) return Alternatives([other, self]) def __get_validator__(self, _core_schema): def validate(input_value): return self.validate(input_value) return validate def __get_pydantic_core_schema__( self, source_type: Any, handler: GetCoreSchemaHandler ) -> cs.CoreSchema: return cs.no_info_plain_validator_function(lambda value: self.validate(value)) def __get_pydantic_json_schema__( self, core_schema: cs.CoreSchema, handler: GetJsonSchemaHandler ) -> JsonSchemaValue: return {"type": "string", "pattern": to_regex(self)} def validate(self, value: str) -> str: pattern = to_regex(self) compiled = re.compile(pattern) if not compiled.fullmatch(str(value)): raise ValueError( f"Input should be in the language of the regular expression {pattern}" ) return value def matches(self, value: str) -> bool: """Check that a given value is in the language defined by the Term. We make the assumption that the language defined by the term can be defined with a regular expression. """ pattern = to_regex(self) compiled = re.compile(pattern) if compiled.fullmatch(str(value)): return True return False def display_ascii_tree(self, indent="", is_last=True) -> str: """Display the regex tree in ASCII format.""" branch = "└── " if is_last else "├── " result = indent + branch + self._display_node() + "\n" # Calculate the new indent for children new_indent = indent + (" " if is_last else "│ ") # Let each subclass handle its children result += self._display_children(new_indent) return result def _display_node(self): raise NotImplementedError def _display_children(self, indent: str) -> str: """Display the children of this node. Override in subclasses with children.""" return "" def __str__(self): return self.display_ascii_tree() def optional(self) -> "Optional": return optional(self) def exactly(self, count: int) -> "QuantifyExact": return exactly(count, self) def at_least(self, count: int) -> "QuantifyMinimum": return at_least(count, self) def at_most(self, count: int) -> "QuantifyMaximum": return at_most(count, self) def between(self, min_count: int, max_count: int) -> "QuantifyBetween": return between(min_count, max_count, self) def one_or_more(self) -> "KleenePlus": return one_or_more(self) def zero_or_more(self) -> "KleeneStar": return zero_or_more(self) @dataclass class String(Term): value: str def _display_node(self) -> str: return f"String('{self.value}')" def __repr__(self): return f"String(value='{self.value}')" @dataclass class Regex(Term): """Class representing a regular expression. Parameters ---------- pattern The regular expression as a string. """ pattern: str def _display_node(self) -> str: return f"Regex('{self.pattern}')" def __repr__(self): return f"Regex(pattern='{self.pattern}')" @dataclass class CFG(Term): """Class representing a context-free grammar. Parameters ---------- definition The definition of the context-free grammar as a string. """ definition: str def _display_node(self) -> str: return f"CFG('{self.definition}')" def __repr__(self): return f"CFG(definition='{self.definition}')" def __eq__(self, other): if not isinstance(other, CFG): return False return self.definition == other.definition @classmethod def from_file(cls, path: str) -> "CFG": """Create a CFG instance from a file containing a CFG definition. Parameters ---------- path : str The path to the file containing the CFG definition. Returns ------- CFG A CFG instance. """ with open(path, "r") as f: definition = f.read() return cls(definition) class JsonSchema(Term): """Class representing a JSON schema. The JSON schema object from which to instantiate the class can be a dictionary, a string, a Pydantic model, a typed dict, a dataclass, or a genSON schema builder. """ schema: str whitespace_pattern: OptionalType[str] def __init__( self, schema: Union[ dict, str, type[BaseModel], _TypedDictMeta, type, SchemaBuilder ], whitespace_pattern: OptionalType[str] = None, ensure_ascii: bool = True, ): """ Parameters ---------- schema The object containing the JSON schema. whitespace_pattern The pattern to use to match whitespace characters. ensure_ascii Whether to ensure the schema is ASCII-only. """ schema_str: str if is_dict_instance(schema): schema_str = json.dumps(schema, ensure_ascii=ensure_ascii) elif is_str_instance(schema): schema_str = str(schema) elif is_pydantic_model(schema): schema_str = json.dumps(schema.model_json_schema(), ensure_ascii=ensure_ascii) # type: ignore elif is_typed_dict(schema): schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii) elif is_dataclass(schema): schema_str = json.dumps(TypeAdapter(schema).json_schema(), ensure_ascii=ensure_ascii) elif is_genson_schema_builder(schema): schema_str = schema.to_json(ensure_ascii=ensure_ascii) # type: ignore else: raise ValueError( f"Cannot parse schema {schema}. The schema must be either " + "a Pydantic class, typed dict, a dataclass, a genSON schema " + "builder or a string or dict that contains the JSON schema " + "specification" ) jsonschema.Draft7Validator.check_schema(json.loads(schema_str)) self.schema = schema_str self.whitespace_pattern = whitespace_pattern @classmethod def is_json_schema(cls, obj: Any) -> bool: """Check if the object provided is a JSON schema type. Parameters ---------- obj: Any The object to check Returns ------- bool True if the object is a JSON schema type, False otherwise """ return ( isinstance(obj, cls) or is_pydantic_model(obj) or is_typed_dict(obj) or is_dataclass(obj) or is_genson_schema_builder(obj) ) @classmethod def convert_to( cls, schema: Union[ "JsonSchema", type[BaseModel], _TypedDictMeta, type, SchemaBuilder, ], target_types: List[Literal[ "str", "dict", "pydantic", "typeddict", "dataclass", "genson", ]], ) -> Union[str, dict, type[BaseModel], _TypedDictMeta, type, SchemaBuilder]: """Convert a JSON schema type to a different JSON schema type. If the schema provided is already of a type in the target_types, return it unchanged. Parameters ---------- schema: Union[JsonSchema, type[BaseModel], _TypedDictMeta, type, SchemaBuilder] The schema to convert target_types: List[Literal["str", "dict", "pydantic", "typeddict", "dataclass", "genson"]] The target types to convert to """ # If the schema provided is already of a type in the target_types, # just return it if isinstance(schema, cls): if "str" in target_types: return schema.schema elif "dict" in target_types: return json.loads(schema.schema) elif is_pydantic_model(schema) and "pydantic" in target_types: return schema elif is_typed_dict(schema) and "typeddict" in target_types: return schema elif is_dataclass(schema) and "dataclass" in target_types: return schema elif is_genson_schema_builder(schema) and "genson" in target_types: return schema # Convert the schema to a JSON schema string/dict if isinstance(schema, cls): schema_str = schema.schema else: schema_str = cls(schema).schema schema_dict = json.loads(schema_str) for target_type in target_types: try: # Convert the JSON schema string to the target type if target_type == "str": return schema_str elif target_type == "dict": return schema_dict elif target_type == "pydantic": return json_schema_dict_to_pydantic(schema_dict) elif target_type == "typeddict": return json_schema_dict_to_typeddict(schema_dict) elif target_type == "dataclass": return json_schema_dict_to_dataclass(schema_dict) # No conversion available for genson except Exception as e: # pragma: no cover warnings.warn( f"Cannot convert schema type {type(schema)} to {target_type}: {e}" ) continue raise ValueError( f"Cannot convert schema type {type(schema)} to any of the target " f"types {target_types}" ) def _display_node(self) -> str: return f"JsonSchema('{self.schema}')" def __repr__(self): return f"JsonSchema(schema='{self.schema}')" def __eq__(self, other): if not isinstance(other, JsonSchema): return False try: self_dict = json.loads(self.schema) other_dict = json.loads(other.schema) return self_dict == other_dict except json.JSONDecodeError: # pragma: no cover return self.schema == other.schema @classmethod def from_file(cls, path: str) -> "JsonSchema": """Create a JsonSchema instance from a .json file containing a JSON schema. Parameters ---------- path: The path to the file containing the JSON schema. Returns ------- JsonSchema A JsonSchema instance. """ with open(path, "r") as f: schema = json.load(f) return cls(schema) @dataclass class Choice(Term): """Class representing a choice between different items. Parameters ---------- items The items to choose from. """ items: List[Any] def _display_node(self) -> str: return f"Choice({repr(self.items)})" def __repr__(self): return f"Choice(items={repr(self.items)})" @dataclass class KleeneStar(Term): term: Term def _display_node(self) -> str: return "KleeneStar(*)" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return f"KleeneStar(term={repr(self.term)})" @dataclass class KleenePlus(Term): term: Term def _display_node(self) -> str: return "KleenePlus(+)" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return f"KleenePlus(term={repr(self.term)})" @dataclass class Optional(Term): term: Term def _display_node(self) -> str: return "Optional(?)" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return f"Optional(term={repr(self.term)})" @dataclass class Alternatives(Term): terms: List[Term] def _display_node(self) -> str: return "Alternatives(|)" def _display_children(self, indent: str) -> str: return "".join( term.display_ascii_tree(indent, i == len(self.terms) - 1) for i, term in enumerate(self.terms) ) def __repr__(self): return f"Alternatives(terms={repr(self.terms)})" @dataclass class Sequence(Term): terms: List[Term] def _display_node(self) -> str: return "Sequence" def _display_children(self, indent: str) -> str: return "".join( term.display_ascii_tree(indent, i == len(self.terms) - 1) for i, term in enumerate(self.terms) ) def __repr__(self): return f"Sequence(terms={repr(self.terms)})" @dataclass class QuantifyExact(Term): term: Term count: int def _display_node(self) -> str: return f"Quantify({{{self.count}}})" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return f"QuantifyExact(term={repr(self.term)}, count={repr(self.count)})" @dataclass class QuantifyMinimum(Term): term: Term min_count: int def _display_node(self) -> str: return f"Quantify({{{self.min_count},}})" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return ( f"QuantifyMinimum(term={repr(self.term)}, min_count={repr(self.min_count)})" ) @dataclass class QuantifyMaximum(Term): term: Term max_count: int def _display_node(self) -> str: return f"Quantify({{,{self.max_count}}})" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return ( f"QuantifyMaximum(term={repr(self.term)}, max_count={repr(self.max_count)})" ) @dataclass class QuantifyBetween(Term): term: Term min_count: int max_count: int def __post_init__(self): if self.min_count > self.max_count: raise ValueError( "QuantifyBetween: `max_count` must be greater than `min_count`." ) def _display_node(self) -> str: return f"Quantify({{{self.min_count},{self.max_count}}})" def _display_children(self, indent: str) -> str: return self.term.display_ascii_tree(indent, True) def __repr__(self): return f"QuantifyBetween(term={repr(self.term)}, min_count={repr(self.min_count)}, max_count={repr(self.max_count)})" def regex(pattern: str): return Regex(pattern) def cfg(definition: str): return CFG(definition) def json_schema(schema: Union[str, dict, type[BaseModel]]): return JsonSchema(schema) def either(*terms: Union[str, Term]): """Represents an alternative between different terms or strings. This factory function automatically translates string arguments into `String` objects. """ terms = [String(arg) if isinstance(arg, str) else arg for arg in terms] return Alternatives(terms) def optional(term: Union[Term, str]) -> Optional: term = String(term) if isinstance(term, str) else term return Optional(term) def exactly(count: int, term: Union[Term, str]) -> QuantifyExact: """Repeat the term exactly `count` times.""" term = String(term) if isinstance(term, str) else term return QuantifyExact(term, count) def at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum: """Repeat the term at least `count` times.""" term = String(term) if isinstance(term, str) else term return QuantifyMinimum(term, count) def at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum: """Repeat the term exactly `count` times.""" term = String(term) if isinstance(term, str) else term return QuantifyMaximum(term, count) def between(min_count: int, max_count: int, term: Union[Term, str]) -> QuantifyBetween: term = String(term) if isinstance(term, str) else term return QuantifyBetween(term, min_count, max_count) def zero_or_more(term: Union[Term, str]) -> KleeneStar: term = String(term) if isinstance(term, str) else term return KleeneStar(term) def one_or_more(term: Union[Term, str]) -> KleenePlus: term = String(term) if isinstance(term, str) else term return KleenePlus(term) def python_types_to_terms(ptype: Any, recursion_depth: int = 0) -> Term: """Convert Python types to Outlines DSL terms that constrain LLM output. Parameters ---------- ptype The Python type to convert recursion_depth Current recursion depth to prevent infinite recursion Returns ------- Term The corresponding DSL `Term` instance. """ if recursion_depth > 10: raise RecursionError( f"Maximum recursion depth exceeded when converting {ptype}. " "This might be due to a recursive type definition." ) # First handle Term instances if isinstance(ptype, Term): return ptype # Basic types if is_int(ptype): return types.integer elif is_float(ptype): return types.number elif is_bool(ptype): return types.boolean elif is_str(ptype): return types.string elif is_native_dict(ptype): return CFG(grammars.json) elif is_time(ptype): return types.time elif is_date(ptype): return types.date elif is_datetime(ptype): return types.datetime # Basic type instances if is_str_instance(ptype): return String(ptype) elif is_int_instance(ptype) or is_float_instance(ptype): return Regex(str(ptype)) # Structured types structured_type_checks = [ lambda x: is_dataclass(x), lambda x: is_typed_dict(x), lambda x: is_pydantic_model(x), ] if any(check(ptype) for check in structured_type_checks): schema = TypeAdapter(ptype).json_schema() return JsonSchema(schema) elif is_genson_schema_builder(ptype): schema = ptype.to_json() return JsonSchema(schema) if is_enum(ptype): return Alternatives( [ python_types_to_terms(member, recursion_depth + 1) for member in _get_enum_members(ptype) ] ) args = get_args(ptype) if is_literal(ptype): return _handle_literal(args) elif is_union(ptype): return _handle_union(args, recursion_depth) elif is_typing_list(ptype): return _handle_list(args, recursion_depth) elif is_typing_tuple(ptype): return _handle_tuple(args, recursion_depth) elif is_typing_dict(ptype): return _handle_dict(args, recursion_depth) if is_callable(ptype): return JsonSchema(get_schema_from_signature(ptype)) type_name = getattr(ptype, "__name__", ptype) raise TypeError( f"Type {type_name} is currently not supported. Please open an issue: " "https://github.com/dottxt-ai/outlines/issues" ) def _get_enum_members(ptype: EnumMeta) -> List[Any]: regular_members = [member.value for member in ptype] # type: ignore function_members = [] for key, value in ptype.__dict__.items(): if ( isinstance(value, FunctionType) and not (key.startswith('__') and key.endswith('__')) and key != '_generate_next_value_' # Skip this specific method that causes issues ): function_members.append(value) return regular_members + function_members def _handle_literal(args: tuple) -> Alternatives: return Alternatives([python_types_to_terms(arg) for arg in args]) def _ensure_json_quoted(term: Term) -> Term: """Wrap bare ``String`` terms in double quotes for JSON container contexts. When string literal values (from ``Literal`` or ``Enum``) appear inside container types (``List``, ``Tuple``, ``Dict``), they must be JSON-quoted so the generated regex matches valid JSON. ``Regex``-based terms (e.g. ``types.string``) already include their own quotes and are left untouched. """ if isinstance(term, String): return String(f'"{term.value}"') if isinstance(term, Alternatives): quoted = [_ensure_json_quoted(t) for t in term.terms] return Alternatives(quoted) return term def _handle_union(args: tuple, recursion_depth: int) -> Alternatives: # Handle the Optional[T] type if len(args) == 2 and (type(None) in args or None in args): other_ptype = next(arg for arg in args if arg not in (type(None), None)) return Alternatives( [ python_types_to_terms(other_ptype, recursion_depth + 1), String("None"), ] ) return Alternatives( [python_types_to_terms(arg, recursion_depth + 1) for arg in args] ) def _handle_list(args: tuple, recursion_depth: int) -> Sequence: if args is None or len(args) != 1: raise TypeError( "Only homogeneous lists are supported. You should provide exactly " + "one argument to `List`, got {args}." ) item_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1)) return Sequence( [ String("["), item_type, KleeneStar(Sequence([String(", "), item_type])), String("]"), ] ) def _handle_tuple(args: tuple, recursion_depth: int) -> Union[Sequence, String]: if len(args) == 0 or args == ((),): return String("()") elif len(args) == 2 and args[1] is Ellipsis: item_term = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1)) return Sequence( [ String("("), item_term, KleeneStar(Sequence([String(", "), item_term])), String(")"), ] ) else: items = [_ensure_json_quoted(python_types_to_terms(arg, recursion_depth + 1)) for arg in args] separator = String(", ") elements = [] for i, item in enumerate(items): elements.append(item) if i < len(items) - 1: elements.append(separator) return Sequence([String("("), *elements, String(")")]) def _handle_dict(args: tuple, recursion_depth: int) -> Sequence: if args is None or len(args) != 2: raise TypeError(f"Dict must have exactly two type arguments. Got {args}.") # Add dict support with key:value pairs key_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1)) value_type = _ensure_json_quoted(python_types_to_terms(args[1], recursion_depth + 1)) return Sequence( [ String("{"), Optional( Sequence( [ key_type, String(":"), value_type, KleeneStar( Sequence([String(", "), key_type, String(":"), value_type]) ), ] ) ), String("}"), ] ) def to_regex(term: Term) -> str: """Convert a term to a regular expression. We only consider self-contained terms that do not refer to another rule. Parameters ---------- term The term to convert to a regular expression. Returns ------- str The regular expression as a string. """ if isinstance(term, String): return re.escape(term.value) elif isinstance(term, Regex): return f"({term.pattern})" elif isinstance(term, JsonSchema): regex_str = build_regex_from_schema(term.schema, term.whitespace_pattern) return f"({regex_str})" elif isinstance(term, Choice): regexes = [to_regex(python_types_to_terms(item)) for item in term.items] return f"({'|'.join(regexes)})" elif isinstance(term, KleeneStar): return f"({to_regex(term.term)})*" elif isinstance(term, KleenePlus): return f"({to_regex(term.term)})+" elif isinstance(term, Optional): return f"({to_regex(term.term)})?" elif isinstance(term, Alternatives): regexes = [to_regex(subterm) for subterm in term.terms] return f"({'|'.join(regexes)})" elif isinstance(term, Sequence): regexes = [to_regex(subterm) for subterm in term.terms] return f"{''.join(regexes)}" elif isinstance(term, QuantifyExact): return f"({to_regex(term.term)}){{{term.count}}}" elif isinstance(term, QuantifyMinimum): return f"({to_regex(term.term)}){{{term.min_count},}}" elif isinstance(term, QuantifyMaximum): return f"({to_regex(term.term)}){{,{term.max_count}}}" elif isinstance(term, QuantifyBetween): return f"({to_regex(term.term)}){{{term.min_count},{term.max_count}}}" else: raise TypeError( f"Cannot convert object {repr(term)} to a regular expression." ) ================================================ FILE: outlines/types/json_schema_utils.py ================================================ """Convert JSON Schema dicts to Python types.""" import sys from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, create_model if sys.version_info >= (3, 12): # pragma: no cover from typing import _TypedDictMeta, TypedDict # type: ignore else: # pragma: no cover from typing_extensions import _TypedDictMeta, TypedDict # type: ignore def schema_type_to_python( schema: dict, caller_target_type: Literal["pydantic", "typeddict", "dataclass"] ) -> Any: """Get a Python type from a JSON Schema dict. Parameters ---------- schema: dict The JSON Schema dict to convert to a Python type caller_target_type: Literal["pydantic", "typeddict", "dataclass"] The type of the caller Returns ------- Any The Python type """ if "enum" in schema: values = schema["enum"] return Literal[tuple(values)] t = schema.get("type") if t == "string": return str elif t == "integer": return int elif t == "number": return float elif t == "boolean": return bool elif t == "array": items = schema.get("items", {}) if items: item_type = schema_type_to_python(items, caller_target_type) else: item_type = Any return List[item_type] # type: ignore elif t == "object": name = schema.get("title") if caller_target_type == "pydantic": return json_schema_dict_to_pydantic(schema, name) elif caller_target_type == "typeddict": return json_schema_dict_to_typeddict(schema, name) elif caller_target_type == "dataclass": return json_schema_dict_to_dataclass(schema, name) return Any def json_schema_dict_to_typeddict( schema: dict, name: Optional[str] = None ) -> _TypedDictMeta: """Convert a JSON Schema dict into a TypedDict class. Parameters ---------- schema: dict The JSON Schema dict to convert to a TypedDict name: Optional[str] The name of the TypedDict Returns ------- _TypedDictMeta The TypedDict class """ required = set(schema.get("required", [])) properties = schema.get("properties", {}) annotations: Dict[str, Any] = {} for property, details in properties.items(): typ = schema_type_to_python(details, "typeddict") if property not in required: typ = Optional[typ] annotations[property] = typ return TypedDict(name or "AnonymousTypedDict", annotations) # type: ignore def json_schema_dict_to_pydantic( schema: dict, name: Optional[str] = None ) -> type[BaseModel]: """Convert a JSON Schema dict into a Pydantic BaseModel class. Parameters ---------- schema: dict The JSON Schema dict to convert to a Pydantic BaseModel name: Optional[str] The name of the Pydantic BaseModel Returns ------- type[BaseModel] The Pydantic BaseModel class """ required = set(schema.get("required", [])) properties = schema.get("properties", {}) field_definitions: Dict[str, Any] = {} for property, details in properties.items(): typ = schema_type_to_python(details, "pydantic") if property not in required: field_definitions[property] = (Optional[typ], None) else: field_definitions[property] = (typ, ...) return create_model(name or "AnonymousPydanticModel", **field_definitions) def json_schema_dict_to_dataclass( schema: dict, name: Optional[str] = None ) -> type: """Convert a JSON Schema dict into a dataclass. Parameters ---------- schema: dict The JSON Schema dict to convert to a dataclass name: Optional[str] The name of the dataclass Returns ------- type The dataclass """ required = set(schema.get("required", [])) properties = schema.get("properties", {}) annotations: Dict[str, Any] = {} defaults: Dict[str, Any] = {} for property, details in properties.items(): typ = schema_type_to_python(details, "dataclass") annotations[property] = typ if property not in required: defaults[property] = None class_dict = { '__annotations__': annotations, '__module__': __name__, } for property, default_val in defaults.items(): class_dict[property] = field(default=default_val) cls = type(name or "AnonymousDataclass", (), class_dict) return dataclass(cls) ================================================ FILE: outlines/types/locale/__init__.py ================================================ """Locale-specific regex patterns.""" from . import us __all__ = [ "us", ] ================================================ FILE: outlines/types/locale/us.py ================================================ """Locale-specific regex patterns for the United States.""" from outlines.types.dsl import Regex zip_code = Regex(r"\d{5}(?:-\d{4})?") phone_number = Regex(r"(\([0-9]{3}\) |[0-9]{3}-)[0-9]{3}-[0-9]{4}") ================================================ FILE: outlines/types/utils.py ================================================ """Utility functions for the types module.""" import dataclasses import datetime import inspect import sys import warnings from enum import Enum, EnumMeta from typing import ( Annotated, Any, Callable, Dict, Literal, List, NewType, Tuple, Union, get_args, get_origin, ) from genson import SchemaBuilder from pydantic import BaseModel, create_model if sys.version_info >= (3, 12): # pragma: no cover from typing import _TypedDictMeta # type: ignore else: # pragma: no cover from typing_extensions import _TypedDictMeta # type: ignore # Type identification def is_int(value: Any) -> bool: return ( value is int or get_origin(value) is int or (get_origin(value) is Annotated and get_args(value)[0] is int) or (hasattr(value, "__supertype__") and value.__supertype__ is int) ) def is_int_instance(value: Any) -> bool: return isinstance(value, int) and not isinstance(value, bool) def is_float(value: Any) -> bool: return ( value is float or get_origin(value) is float or (get_origin(value) is Annotated and get_args(value)[0] is float) or (hasattr(value, "__supertype__") and value.__supertype__ is float) ) def is_float_instance(value: Any) -> bool: return isinstance(value, float) def is_str(value: Any) -> bool: return ( value is str or get_origin(value) is str or (get_origin(value) is Annotated and get_args(value)[0] is str) or (hasattr(value, "__supertype__") and value.__supertype__ is str) ) def is_str_instance(value: Any) -> bool: return isinstance(value, str) def is_bool(value: Any) -> bool: return ( value is bool or get_origin(value) is bool or (get_origin(value) is Annotated and get_args(value)[0] is bool) or (hasattr(value, "__supertype__") and value.__supertype__ is bool) ) def is_dict_instance(value: Any) -> bool: return isinstance(value, dict) def is_datetime(value: Any) -> bool: return value is datetime.datetime or get_origin(value) is datetime.datetime def is_date(value: Any) -> bool: return value is datetime.date or get_origin(value) is datetime.date def is_time(value: Any) -> bool: return value is datetime.time or get_origin(value) is datetime.time def is_native_dict(value: Any) -> bool: return value is dict def is_typing_dict(value: Any) -> bool: return get_origin(value) is dict def is_typing_list(value: Any) -> bool: return get_origin(value) is list def is_typing_tuple(value: Any) -> bool: return get_origin(value) is tuple def is_union(value: Any) -> bool: return get_origin(value) is Union def is_literal(value: Any) -> bool: return get_origin(value) is Literal def is_dataclass(value: Any) -> bool: return isinstance(value, type) and dataclasses.is_dataclass(value) def is_typed_dict(value: Any) -> bool: return isinstance(value, _TypedDictMeta) def is_pydantic_model(value): # needed because generic type cannot be used with `issubclass` # for Python versions < 3.11 if get_origin(value) is not None: return False return isinstance(value, type) and issubclass(value, BaseModel) def is_genson_schema_builder(value: Any) -> bool: return isinstance(value, SchemaBuilder) def is_enum(value: Any) -> bool: return isinstance(value, EnumMeta) def is_callable(value: Any) -> bool: return callable(value) and not isinstance(value, type) # Type conversion def get_enum_from_literal(value) -> Enum: return Enum( value.__name__, {str(arg): arg for arg in get_args(value)} ) def get_enum_from_choice(value) -> Enum: return Enum( 'Choice', {str(item): item for item in value.items} ) def get_schema_from_signature(fn: Callable) -> dict: """Turn a function signature into a JSON schema. Every JSON object valid to the output JSON Schema can be passed to `fn` using the ** unpacking syntax. """ signature = inspect.signature(fn) arguments = {} for name, arg in signature.parameters.items(): if arg.annotation == inspect._empty: raise ValueError("Each argument must have a type annotation") else: arguments[name] = (arg.annotation, ...) try: fn_name = fn.__name__ except Exception as e: fn_name = "Arguments" warnings.warn( f"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\n{e}", category=UserWarning, ) model = create_model(fn_name, **arguments) return model.model_json_schema() def get_schema_from_enum(myenum: type[Enum]) -> dict: if len(myenum) == 0: raise ValueError( f"Your enum class {myenum.__name__} has 0 members. If you are working with an enum of functions, do not forget to register them as callable (using `partial` for instance)" ) choices = [ get_schema_from_signature(elt.value.func) if callable(elt.value) else {"const": elt.value} for elt in myenum ] schema = {"title": myenum.__name__, "oneOf": choices} return schema ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" [project] name = "outlines" authors= [{name = "Outlines Developers"}] description = "Probabilistic Generative Model Programming" requires-python = ">=3.10,<3.14" license = {text = "Apache-2.0"} keywords=[ "machine learning", "deep learning", "language models", "structured generation", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dependencies = [ "jinja2", "cloudpickle", "diskcache", "pydantic>=2.0", "jsonschema", "pillow", "typing_extensions", "outlines_core==0.2.14", "genson", "jsonpath_ng", ] dynamic = ["version"] [project.optional-dependencies] anthropic = ["anthropic"] dottxt = ["dottxt"] gemini = ["google-genai"] llamacpp = ["huggingface-hub", "llama-cpp-python", "numba"] mlxlm = ["datasets", "mlx", "mlx-lm"] lmstudio = ["lmstudio"] ollama = ["ollama"] openai = ["openai"] mistral = ["mistralai"] sglang = ["openai"] tgi = ["huggingface_hub"] transformers = ["accelerate", "datasets", "transformers", "setuptools", "sentencepiece"] vllm = ["openai"] xgrammar = ["xgrammar"] llguidance = ["llguidance"] airports = ["airportsdata"] countries = ["iso3166"] test = [ "pre-commit", "pytest", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-asyncio", "coverage[toml]>=5.1", "diff-cover", "accelerate", "beartype<0.16.0", "responses", "llama-cpp-python", "mlx-lm>=0.19.2; platform_machine == 'arm64' and sys_platform == 'darwin'", "huggingface_hub", "openai>=1.0.0", "datasets", "anthropic", "google-genai", "mistralai", "transformers", "pillow", "jax", "flax", "numpy>=2.0.0,<2.2.0", "numba", "torch>2.3.0", "tensorflow", "tf-keras", "ollama", "lmstudio", "dottxt", "sentencepiece", "mkdocs_gen_files", "llguidance", "xgrammar", "airportsdata", "iso3166", "requests", ] [dependency-groups] # Note: vllm is excluded from the lock file due to circular dependency with outlines-core. # For GPU testing, install vllm manually: pip install vllm test-gpu = ["outlines[test]"] [project.urls] homepage = "https://github.com/dottxt-ai/outlines" documentation = "https://dottxt-ai.github.io/outlines/" repository = "https://github.com/dottxt-ai/outlines" [project.readme] file="README.md" content-type = "text/markdown" [tool.setuptools.packages.find] include = ["outlines*"] [tool.setuptools.package-data] "outlines" = ["py.typed", "grammars/*.lark"] [tool.setuptools_scm] write_to = "outlines/_version.py" [tool.pytest.ini_options] testpaths = ["tests"] filterwarnings = [ "error", "ignore::pydantic.warnings.PydanticDeprecatedSince20", "ignore::FutureWarning:transformers.*", "ignore::FutureWarning:huggingface_hub.*", "ignore::UserWarning", "ignore::DeprecationWarning:pyairports.*", "ignore::DeprecationWarning:jax.*", "ignore::DeprecationWarning:flax.*", "ignore::DeprecationWarning:torch.*", ] [tool.mypy] exclude=["examples"] enable_incomplete_feature = ["Unpack"] [[tool.mypy.overrides]] module = [ "jax", "jaxlib", "jax.numpy", "jinja2", "jsonschema.*", "anthropic.*", "google.*", "mistralai.*", "mamba_ssm.*", "mlx_lm.*", "mlx.*", "numpy.*", "cloudpickle.*", "diskcache.*", "pydantic.*", "pydantic_core.*", "pytest", "referencing.*", "torch.*", "transformers.*", "llama_cpp", "huggingface_hub", "datasets.*", "openai.*", "requests.*", "responses.*", "vllm.*", "iso3166.*", "airportsdata.*", "outlines_core.*", "genson", "lmstudio.*", "ollama.*", "dottxt.*", "tensorflow", "tensorflow.*", "tf-keras", "tf-keras.*", "mkdocs_gen_files.*", "jsonpath_ng.*", "llguidance.*", "xgrammar.*", ] ignore_missing_imports = true [tool.coverage.run] # we omit the files that require a GPU or Apple Silicon # as well as the models that make API calls omit = [ "outlines/_version.py", "outlines/models/anthropic.py", "outlines/models/dottxt.py", "outlines/models/gemini.py", "outlines/models/lmstudio.py", "outlines/models/mlxlm.py", "outlines/models/openai.py", "outlines/models/mistral.py", "outlines/models/vllm_offline.py", "outlines/processors/tensor_adapters/mlx.py", "tests/*", ] branch = true relative_files = true [tool.coverage.report] show_missing = true exclude_lines = [ "pragma: no cover", "if TYPE_CHECKING:", "\\.\\.\\.", ] [tool.diff_cover] compare_branch = "origin/main" diff_range_notation = ".." [tool.docformatter] style = "numpy" in-place = true [tool.ruff.lint] ignore = [ "E731", "F401" ] ================================================ FILE: requirements-doc.txt ================================================ mkdocs mkdocs-material mkdocs-material[imaging] mkdocs-mermaid2-plugin mkdocs-section-index mkdocstrings[python] mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin mkdocs-redirects mkdocs-gen-files mkdocs-literate-nav mike ================================================ FILE: scripts/gen_ref_pages.py ================================================ """Generate the API reference pages and navigation automatically. This script is based on the `gen_ref_pages.py` script in the [mkdocstrings](https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages) project. To exclude a file or module from being included in the generated API reference, add a part of its path to the `EXCLUDED_FILES` list. """ from pathlib import Path import mkdocs_gen_files CODEBASE_DIR_NAME = "outlines" OUTPUT_DIR_NAME = "api_reference" EXCLUDED_FILES = ["_version"] nav = mkdocs_gen_files.Nav() root = Path(__file__).parent.parent src = root / CODEBASE_DIR_NAME for path in sorted(src.rglob("*.py")): module_path = path.relative_to(src).with_suffix("") doc_path = path.relative_to(src).with_suffix(".md") full_doc_path = Path(OUTPUT_DIR_NAME, doc_path) parts = tuple(module_path.parts) if any(part in EXCLUDED_FILES for part in parts): continue if parts[-1] == "__init__": if len(parts) == 1: doc_path = Path("index.md") full_doc_path = Path(OUTPUT_DIR_NAME, doc_path) parts = (CODEBASE_DIR_NAME,) else: parts = parts[:-1] doc_path = doc_path.with_name("index.md") full_doc_path = full_doc_path.with_name("index.md") nav[parts] = doc_path.as_posix() with mkdocs_gen_files.open(full_doc_path, "w") as fd: ident = ".".join(parts) if len(parts) == 1 and parts[0] == CODEBASE_DIR_NAME: # For root module, just use the package name fd.write(f"::: {CODEBASE_DIR_NAME}") else: fd.write(f"::: {CODEBASE_DIR_NAME}.{ident}") mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root)) with mkdocs_gen_files.open(f"{OUTPUT_DIR_NAME}/SUMMARY.md", "w") as nav_file: nav_file.writelines(nav.build_literate_nav()) ================================================ FILE: setup.cfg ================================================ [flake8] max-line-length = 88 select = C,E,F,W ignore = E203,E231,E501,E741,W503,W504,C901,E731 per-file-ignores = **/__init__.py:F401,F403 exclude = normalai/_version.py ================================================ FILE: shell.nix ================================================ { pkgs ? import { config = { allowUnfree = true; }; } }: (pkgs.buildFHSEnv { name = "dottxt-ai"; targetPkgs = pkgs: with pkgs; [ autoconf binutils cmake cudatoolkit curl freeglut gcc13 git gitRepo gnumake gnupg gperf libGL libGLU linuxPackages.nvidia_x11 m4 ncurses5 procps python311 stdenv.cc unzip util-linux uv xorg.libX11 xorg.libXext xorg.libXi xorg.libXmu xorg.libXrandr xorg.libXv zlib ]; multiPkgs = pkgs: with pkgs; [ zlib ]; runScript = "bash"; profile = '' # CUDA paths export CUDA_HOME=${pkgs.cudatoolkit} export CUDA_PATH=${pkgs.cudatoolkit} # Ensure proper binary paths are included export PATH=${pkgs.gcc13}/bin:${pkgs.cudatoolkit}/bin:$PATH # Set library paths, including additional directories for CUPTI export LD_LIBRARY_PATH=${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/extras/CUPTI/lib64:${pkgs.linuxPackages.nvidia_x11}/lib:$LD_LIBRARY_PATH # Add static library paths to EXTRA_LDFLAGS for the linker export EXTRA_LDFLAGS="-L${pkgs.cudatoolkit}/lib64 -L${pkgs.cudatoolkit}/extras/CUPTI/lib64 -L${pkgs.linuxPackages.nvidia_x11}/lib -L${pkgs.cudatoolkit}/libdevice $EXTRA_LDFLAGS" export EXTRA_CCFLAGS="-I${pkgs.cudatoolkit}/include $EXTRA_CCFLAGS" # Set CMake paths export CMAKE_PREFIX_PATH=${pkgs.cudatoolkit}:${pkgs.linuxPackages.nvidia_x11}:$CMAKE_PREFIX_PATH # C++ and CC flags export CXXFLAGS="--std=c++17 $EXTRA_CCFLAGS" export CC=${pkgs.gcc13}/bin/gcc export CXX=${pkgs.gcc13}/bin/g++ # NVCC flags to use the right compiler export NVCC_FLAGS="-ccbin ${pkgs.gcc13}/bin/gcc" ''; structuredAttrs__ = { stdenv = pkgs.stdenv.overrideCC pkgs.stdenv.cc pkgs.gcc13; }; }).env ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/backends/test_backends.py ================================================ import outlines import pytest import transformers from outlines.backends import ( _get_backend, get_json_schema_logits_processor, get_regex_logits_processor, get_cfg_logits_processor, ) from outlines.backends.outlines_core import ( OutlinesCoreBackend, OutlinesCoreLogitsProcessor, ) from outlines.backends.llguidance import ( LLGuidanceBackend, LLGuidanceLogitsProcessor ) from outlines.backends.xgrammar import XGrammarBackend, XGrammarLogitsProcessor @pytest.fixture def model(): return outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"), transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) @pytest.fixture def json_schema(): return ( '{"type": "object", "properties": {"name": {"type": "string"}, ' + '"age": {"type": "integer"}}, "required": ["name", "age"], ' + '"additionalProperties": false}' ) @pytest.fixture def regex(): return r"[0-9]{3}" @pytest.fixture def cfg_lark(): return """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ @pytest.fixture def cfg_ebnf(): return """ root ::= answer answer ::= "yes" | "no" """ def test_get_backend(model): backend = _get_backend("outlines_core", model) assert isinstance(backend, OutlinesCoreBackend) backend = _get_backend("xgrammar", model) assert isinstance(backend, XGrammarBackend) backend = _get_backend("llguidance", model) assert isinstance(backend, LLGuidanceBackend) with pytest.raises(ValueError, match="not supported"): _get_backend("not_supported", model) def test_get_json_schema_logits_processor(model, json_schema): processor = get_json_schema_logits_processor("outlines_core", model, json_schema) assert isinstance(processor, OutlinesCoreLogitsProcessor) processor = get_json_schema_logits_processor("llguidance", model, json_schema) assert isinstance(processor, LLGuidanceLogitsProcessor) processor = get_json_schema_logits_processor("xgrammar", model, json_schema) assert isinstance(processor, XGrammarLogitsProcessor) def test_get_regex_logits_processor(model, regex): processor = get_regex_logits_processor("outlines_core", model, regex) assert isinstance(processor, OutlinesCoreLogitsProcessor) processor = get_regex_logits_processor("llguidance", model, regex) assert isinstance(processor, LLGuidanceLogitsProcessor) processor = get_regex_logits_processor("xgrammar", model, regex) assert isinstance(processor, XGrammarLogitsProcessor) def test_get_cfg_logits_processor(model, cfg_lark, cfg_ebnf): with pytest.raises( NotImplementedError, match="Outlines Core does not support context-free grammar." ): get_cfg_logits_processor("outlines_core", model, cfg_lark) processor = get_cfg_logits_processor("llguidance", model, cfg_lark) assert isinstance(processor, LLGuidanceLogitsProcessor) processor = get_cfg_logits_processor("xgrammar", model, cfg_ebnf) assert isinstance(processor, XGrammarLogitsProcessor) ================================================ FILE: tests/backends/test_backends_utils.py ================================================ import torch import numpy as np def simulate_model_calling_processor(processor, tensor_library_name, vocabulary_size, eos_token_id, batch_size): if tensor_library_name == "torch": tensor_adapter = TorchTensorAdapter() elif tensor_library_name == "numpy": tensor_adapter = NumpyTensorAdapter() elif tensor_library_name == "mlx": tensor_adapter = MLXTensorAdapter() processor.reset() i = 0 input_ids = tensor_adapter.randint(0, vocabulary_size, (batch_size, 10)) while True: i += 1 logits = tensor_adapter.randn((batch_size, vocabulary_size)) output = processor(input_ids, logits) assert output.shape == (batch_size, vocabulary_size) if all(input_ids[:, -1] == eos_token_id): break input_ids = tensor_adapter.add_token_inputs_ids(input_ids, output) print(input_ids) if i > 20: break return input_ids[:, 10:] class TorchTensorAdapter(): def randn(self, shape): return torch.randn(*shape) def randint(self, low, high, size): return torch.randint(low, high, size) def add_token_inputs_ids(self, input_ids, logits): next_token_ids = torch.argmax(logits, dim=-1) input_ids = torch.cat([input_ids, next_token_ids.unsqueeze(-1)], dim=-1) return input_ids class NumpyTensorAdapter(): def randn(self, shape): return np.random.randn(*shape) def randint(self, low, high, size): return np.random.randint(low, high, size) def add_token_inputs_ids(self, input_ids, logits): next_token_ids = np.argmax(logits, axis=-1) print("next_token_ids",next_token_ids) input_ids = np.concatenate([input_ids, next_token_ids[..., None]], axis=-1) return input_ids class MLXTensorAdapter(): def __init__(self): import mlx self.mlx = mlx def randn(self, shape): return self.mlx.random.randn(*shape) def randint(self, low, high, size): return self.mlx.random.randint(low, high, size) def add_token_inputs_ids(self, input_ids, logits): next_token_ids = self.mlx.argmax(logits, axis=-1) input_ids = self.mlx.concatenate([input_ids, next_token_ids[..., None]], axis=-1) return input_ids ================================================ FILE: tests/backends/test_llguidance.py ================================================ import re import llama_cpp import llguidance import pytest import transformers from llguidance import LLTokenizer import outlines from outlines.backends.llguidance import ( LLGuidanceBackend, LLGuidanceLogitsProcessor ) from tests.backends.test_backends_utils import simulate_model_calling_processor try: import mlx_lm HAS_MLX = True except ImportError: HAS_MLX = False def model_transformers(): return outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"), transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) def model_llamacpp(): return outlines.from_llamacpp( llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) ) def model_mlxlm(): return outlines.from_mlxlm( *mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit") ) @pytest.fixture def json_schema(): return ( '{"type": "object", "properties": {"name": {"type": "string"}, ' + '"age": {"type": "integer"}}, "required": ["name", "age"], ' + '"additionalProperties": false}' ) @pytest.fixture def regex(): return r"[0-9]{3}" @pytest.fixture def cfg_lark(): return """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ @pytest.fixture def cfg_ebnf(): return """ root ::= answer answer ::= "yes" | "no" """ def test_llguidance_processor_torch(regex): model = model_transformers() tokenizer = model.tokenizer hf_tokenizer = model.hf_tokenizer llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer grammar_spec = llguidance.grammar_from("regex", regex) processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "torch") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "torch", len(tokenizer.get_vocab()), tokenizer.eos_token_id, 2 ) assert re.match(regex, hf_tokenizer.decode(input_ids[0])) assert re.match(regex, hf_tokenizer.decode(input_ids[1])) def test_llguidance_processor_numpy(regex): model = model_llamacpp() tokenizer = model.tokenizer llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer grammar_spec = llguidance.grammar_from("regex", regex) processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "numpy") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "numpy", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2 ) assert re.match(regex, tokenizer.decode(input_ids[0])[0]) assert re.match(regex, tokenizer.decode(input_ids[1])[0]) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_llguidance_processor_mlx(regex): model = model_mlxlm() tokenizer = model.mlx_tokenizer llg_tokenizer = LLGuidanceBackend(model).llg_tokenizer grammar_spec = llguidance.grammar_from("regex", regex) processor = LLGuidanceLogitsProcessor(grammar_spec, llg_tokenizer, "mlx") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "mlx", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2 ) assert re.match(regex, tokenizer.decode(input_ids[0])) assert re.match(regex, tokenizer.decode(input_ids[1])) models = [ (model_transformers(), "torch"), (model_llamacpp(), "numpy"), ] if HAS_MLX: models.append((model_mlxlm(), "mlx")) @pytest.mark.parametrize("model, tensor_library_name", models) def test_llguidance_backend(model, tensor_library_name, json_schema, regex, cfg_lark, cfg_ebnf): # initialization backend = LLGuidanceBackend(model) assert isinstance(backend.llg_tokenizer, LLTokenizer) assert backend.tensor_library_name == tensor_library_name # json schema processor = backend.get_json_schema_logits_processor(json_schema) assert isinstance(processor, LLGuidanceLogitsProcessor) generator = outlines.Generator(model, backend="llguidance", processor=processor) response = generator("Hello, how are you?") assert response[0] == "{" # regex processor = backend.get_regex_logits_processor(regex) assert isinstance(processor, LLGuidanceLogitsProcessor) generator = outlines.Generator(model, backend="llguidance", processor=processor) response = generator("Hello, how are you?") assert len(response) == 3 assert int(response) # cfg lark processor = backend.get_cfg_logits_processor(cfg_lark) assert isinstance(processor, LLGuidanceLogitsProcessor) generator = outlines.Generator(model, backend="llguidance", processor=processor) response = generator("Hello, how are you?") assert ( "+" in response or "-" in response or "*" in response or "/" in response or float(response.strip()) ) # cfg ebnf processor = backend.get_cfg_logits_processor(cfg_ebnf) assert isinstance(processor, LLGuidanceLogitsProcessor) generator = outlines.Generator(model, backend="llguidance", processor=processor) response = generator("Hello, how are you?") assert response == "yes" or response == "no" # batch + multiple generations processor = backend.get_json_schema_logits_processor(json_schema) generator = outlines.Generator(model, backend="llguidance", processor=processor) for _ in range(2): if tensor_library_name == "torch": response = generator.batch(["Create a character", "Hello, how are you?"], max_new_tokens=200) assert len(response) == 2 for r in response: assert r[0] == "{" else: response = generator("Create a character", max_tokens=20) assert response[0] == "{" ================================================ FILE: tests/backends/test_outlines_core.py ================================================ import re import llama_cpp import pytest import transformers from outlines_core import Index, Vocabulary import outlines from outlines.backends.outlines_core import ( OutlinesCoreBackend, OutlinesCoreLogitsProcessor, ) from tests.backends.test_backends_utils import simulate_model_calling_processor try: import mlx_lm HAS_MLX = True except ImportError: HAS_MLX = False def model_transformers(): return outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"), transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) def model_llamacpp(): return outlines.from_llamacpp( llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) ) def model_mlxlm(): return outlines.from_mlxlm(*mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit")) @pytest.fixture def json_schema(): return ( '{"type": "object", "properties": {"name": {"type": "string"}, ' + '"age": {"type": "integer"}}, "required": ["name", "age"], ' + '"additionalProperties": false}' ) @pytest.fixture def regex(): return r"[0-9]{3}" @pytest.fixture def cfg(): return """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ def test_outlines_core_processor_torch(regex): model = model_transformers() tokenizer = model.tokenizer hf_tokenizer = model.hf_tokenizer backend = OutlinesCoreBackend(model) index = Index(regex, backend.vocabulary) processor = OutlinesCoreLogitsProcessor(index, "torch") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "torch", len(tokenizer.get_vocab()), tokenizer.eos_token_id, 2 ) assert re.match(regex, hf_tokenizer.decode(input_ids[0])) assert re.match(regex, hf_tokenizer.decode(input_ids[1])) def test_outlines_core_processor_numpy(regex): model = model_llamacpp() tokenizer = model.tokenizer backend = OutlinesCoreBackend(model) index = Index(regex, backend.vocabulary) processor = OutlinesCoreLogitsProcessor(index, "numpy") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "numpy", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2 ) assert re.match(regex, tokenizer.decode(input_ids[0])[0]) assert re.match(regex, tokenizer.decode(input_ids[1])[0]) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_outlines_core_processor_mlx(): model = model_mlxlm() tokenizer = model.mlx_tokenizer backend = OutlinesCoreBackend(model) index = Index(r"[0-9]{3}", backend.vocabulary) processor = OutlinesCoreLogitsProcessor(index, "mlx") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "mlx", len(tokenizer.vocabulary), tokenizer.eos_token_id, 2 ) assert re.match(regex, tokenizer.decode(input_ids[0])) assert re.match(regex, tokenizer.decode(input_ids[1])) def test_create_vocabulary_preserves_duplicate_token_ids(): vocab = { "hello": 1, "world": 2, "<0x20>": 3, "▁": 4, } def token_to_str(token): if token in ("<0x20>", "▁"): return " " return token vocabulary = OutlinesCoreBackend.create_outlines_core_vocabulary( vocab=vocab, eos_token_id=0, eos_token="hello", token_to_str=token_to_str, ) # 4 original IDs - 1 popped (hello) + 1 EOS added by Vocabulary = 4 assert len(vocabulary) == 4 models = [ (model_transformers(), "torch"), (model_llamacpp(), "numpy"), ] if HAS_MLX: models.append((model_mlxlm(), "mlx")) @pytest.mark.parametrize("model, tensor_library_name", models) def test_outlines_core_backend(model, tensor_library_name, json_schema, regex, cfg): # initialization backend = OutlinesCoreBackend(model) assert isinstance(backend.vocabulary, Vocabulary) assert backend.tensor_library_name == tensor_library_name # json schema processor = backend.get_json_schema_logits_processor(json_schema) assert isinstance(processor, OutlinesCoreLogitsProcessor) generator = outlines.Generator(model, backend="outlines_core", processor=processor) response = generator("Hello, how are you?") assert "name" in response # regex processor = backend.get_regex_logits_processor(regex) assert isinstance(processor, OutlinesCoreLogitsProcessor) generator = outlines.Generator(model, backend="outlines_core", processor=processor) response = generator("Hello, how are you?") assert len(response) == 3 assert int(response) # cfg with pytest.raises( NotImplementedError, match="Outlines Core does not support context-free grammar.", ): backend.get_cfg_logits_processor(cfg) # batch + multiple generations processor = backend.get_json_schema_logits_processor(json_schema) generator = outlines.Generator(model, backend="outlines_core", processor=processor) for _ in range(2): if tensor_library_name == "torch": response = generator.batch( ["Create a character", "Hello, how are you?"], max_new_tokens=200 ) assert len(response) == 2 for r in response: assert r[0] == "{" assert "name" in r else: response = generator("Create a character", max_tokens=20) assert response[0] == "{" assert "name" in response ================================================ FILE: tests/backends/test_xgrammar.py ================================================ import re import llama_cpp import outlines import pytest import transformers from xgrammar import GrammarCompiler, TokenizerInfo from outlines.backends.xgrammar import XGrammarBackend, XGrammarLogitsProcessor from tests.backends.test_backends_utils import simulate_model_calling_processor try: import mlx_lm HAS_MLX = True except ImportError: HAS_MLX = False def model_transformers(): return outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"), transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) def model_llamacpp(): return outlines.from_llamacpp( llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) ) def model_mlxlm(): return outlines.from_mlxlm( *mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit") ) @pytest.fixture def tokenizer_info(): tokenizer = model_transformers().hf_tokenizer tokenizer_info = TokenizerInfo.from_huggingface( tokenizer, vocab_size=len(tokenizer.get_vocab()) ) return tokenizer_info @pytest.fixture def json_schema(): return ( '{"type": "object", "properties": {"name": {"type": "string"}, ' + '"age": {"type": "integer"}}, "required": ["name", "age"], ' + '"additionalProperties": false}' ) @pytest.fixture def regex(): return r"[0-9]{3}" @pytest.fixture def cfg(): return """ root ::= answer answer ::= "yes" | "no" """ def test_xgr_processor_torch(regex): model = model_transformers() tokenizer = model.tokenizer hf_tokenizer = model.hf_tokenizer tokenizer_info = TokenizerInfo.from_huggingface( hf_tokenizer, vocab_size=len(hf_tokenizer.get_vocab()) ) grammar_compiler = GrammarCompiler(tokenizer_info) compiled_grammar = grammar_compiler.compile_regex(regex) processor = XGrammarLogitsProcessor(compiled_grammar, "torch") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "torch", len(tokenizer.get_vocab()), tokenizer.eos_token_id, 2 ) assert re.match(regex, hf_tokenizer.decode(input_ids[0])) assert re.match(regex, hf_tokenizer.decode(input_ids[1])) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_xgr_processor_mlx(tokenizer_info): model = model_mlxlm() tokenizer = model.mlx_tokenizer tokenizer_info = TokenizerInfo.from_huggingface( tokenizer, vocab_size=len(tokenizer.get_vocab()) ) grammar_compiler = GrammarCompiler(tokenizer_info) compiled_grammar = grammar_compiler.compile_regex(regex) processor = XGrammarLogitsProcessor(compiled_grammar, "mlx") for _ in range(2): input_ids = simulate_model_calling_processor( processor, "mlx", len(tokenizer.get_vocab()), tokenizer.eos_token_id, 2 ) assert re.match(regex, tokenizer.decode(input_ids[0])) assert re.match(regex, tokenizer.decode(input_ids[1])) models = [(model_transformers(), "torch")] if HAS_MLX: models.append((model_mlxlm(), "mlx")) @pytest.mark.parametrize("model, tensor_library_name", models) def test_xgrammar_backend(model, tensor_library_name, json_schema, regex, cfg): # initialization backend = XGrammarBackend(model) assert isinstance(backend.grammar_compiler, GrammarCompiler) # json schema processor = backend.get_json_schema_logits_processor(json_schema) assert isinstance(processor, XGrammarLogitsProcessor) generator = outlines.Generator(model, backend="xgrammar", processor=processor) response = generator("Hello, how are you?") assert response[0] == "{" assert "name" in response # regex processor = backend.get_regex_logits_processor(regex) assert isinstance(processor, XGrammarLogitsProcessor) generator = outlines.Generator(model, backend="xgrammar", processor=processor) response = generator("Hello, how are you?") assert len(response) == 3 assert int(response) # cfg processor = backend.get_cfg_logits_processor(cfg) assert isinstance(processor, XGrammarLogitsProcessor) generator = outlines.Generator(model, backend="xgrammar", processor=processor) response = generator("Hello, how are you?") assert response == "yes" or response == "no" # batch + multiple generations processor = backend.get_json_schema_logits_processor(json_schema) generator = outlines.Generator(model, backend="xgrammar", processor=processor) for _ in range(2): if tensor_library_name == "torch": response = generator.batch(["Create a character", "Hello, how are you?"], max_new_tokens=200) assert len(response) == 2 for r in response: assert r[0] == "{" assert "name" in r else: response = generator("Create a character", max_tokens=20) assert response[0] == "{" assert "name" in response def test_xgrammar_backend_invalid_model(): with pytest.raises( ValueError, match="The xgrammar backend only supports Transformers and MLXLM models", ): XGrammarBackend(model_llamacpp()) ================================================ FILE: tests/cfg_samples/arithmetic/lots_of_ops.arithmetic.test ================================================ 5+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 ================================================ FILE: tests/cfg_samples/arithmetic/simple_math.arithmetic.test ================================================ (1 * 2) - (0.1 * 2 * 9.42) ================================================ FILE: tests/cfg_samples/json/outlines.generate.samplers.mypy.json.test ================================================ { ".class": "MypyFile", "_fullname": "outlines.generate.samplers", "future_import_flags": [], "is_partial_stub_package": false, "is_stub": false, "names": { ".class": "SymbolTable", "Protocol": { ".class": "SymbolTableNode", "cross_ref": "typing.Protocol", "kind": "Gdef" }, "Sampler": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "TypeInfo", "_promote": [], "abstract_attributes": [ [ "__call__", 2 ] ], "alt_promote": null, "bases": [ "builtins.object" ], "dataclass_transform_spec": null, "declared_metaclass": null, "defn": { ".class": "ClassDef", "fullname": "outlines.generate.samplers.Sampler", "name": "Sampler", "type_vars": [] }, "deletable_attributes": [], "flags": [ "is_abstract", "is_protocol" ], "fullname": "outlines.generate.samplers.Sampler", "has_param_spec_type": false, "metaclass_type": "abc.ABCMeta", "metadata": {}, "module_name": "outlines.generate.samplers", "mro": [ "outlines.generate.samplers.Sampler", "builtins.object" ], "names": { ".class": "SymbolTable", "__call__": { ".class": "SymbolTableNode", "kind": "Mdef", "node": { ".class": "FuncDef", "abstract_status": 2, "arg_kinds": [ 0, 0, 0, 0 ], "arg_names": [ "self", "logits", "samples", "rng" ], "dataclass_transform_spec": null, "flags": [ "is_trivial_body" ], "fullname": "outlines.generate.samplers.Sampler.__call__", "name": "__call__", "type": { ".class": "CallableType", "arg_kinds": [ 0, 0, 0, 0 ], "arg_names": [ "self", "logits", "samples", "rng" ], "arg_types": [ "outlines.generate.samplers.Sampler", { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "builtins.int", { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 } ], "bound_args": [], "def_extras": { "first_arg": "self" }, "fallback": "builtins.function", "from_concatenate": false, "implicit": false, "is_ellipsis_args": false, "name": "__call__ of Sampler", "ret_type": { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "type_guard": null, "unpack_kwargs": false, "variables": [] } } } }, "self_type": null, "slots": null, "tuple_type": null, "type_vars": [], "typeddict_type": null } }, "__annotations__": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_ready" ], "fullname": "outlines.generate.samplers.__annotations__", "name": "__annotations__", "type": { ".class": "Instance", "args": [ "builtins.str", { ".class": "AnyType", "missing_import_name": null, "source_any": null, "type_of_any": 6 } ], "type_ref": "builtins.dict" } } }, "__doc__": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_ready" ], "fullname": "outlines.generate.samplers.__doc__", "name": "__doc__", "type": "builtins.str" } }, "__file__": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_ready" ], "fullname": "outlines.generate.samplers.__file__", "name": "__file__", "type": "builtins.str" } }, "__name__": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_ready" ], "fullname": "outlines.generate.samplers.__name__", "name": "__name__", "type": "builtins.str" } }, "__package__": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_ready" ], "fullname": "outlines.generate.samplers.__package__", "name": "__package__", "type": "builtins.str" } }, "greedy": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "FuncDef", "abstract_status": 0, "arg_kinds": [ 0, 0, 2 ], "arg_names": [ "logits", "samples", "_" ], "dataclass_transform_spec": null, "flags": [], "fullname": "outlines.generate.samplers.greedy", "name": "greedy", "type": { ".class": "CallableType", "arg_kinds": [ 0, 0, 2 ], "arg_names": [ "logits", "samples", "_" ], "arg_types": [ { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "builtins.int", { ".class": "AnyType", "missing_import_name": null, "source_any": null, "type_of_any": 1 } ], "bound_args": [], "def_extras": { "first_arg": null }, "fallback": "builtins.function", "from_concatenate": false, "implicit": false, "is_ellipsis_args": false, "name": "greedy", "ret_type": { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "type_guard": null, "unpack_kwargs": false, "variables": [] } } }, "multinomial": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "FuncDef", "abstract_status": 0, "arg_kinds": [ 0, 0, 0 ], "arg_names": [ "logits", "samples", "rng" ], "dataclass_transform_spec": null, "flags": [], "fullname": "outlines.generate.samplers.multinomial", "name": "multinomial", "type": { ".class": "CallableType", "arg_kinds": [ 0, 0, 0 ], "arg_names": [ "logits", "samples", "rng" ], "arg_types": [ { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "builtins.int", { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 } ], "bound_args": [], "def_extras": { "first_arg": null }, "fallback": "builtins.function", "from_concatenate": false, "implicit": false, "is_ellipsis_args": false, "name": "multinomial", "ret_type": { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 }, "type_guard": null, "unpack_kwargs": false, "variables": [] } } }, "torch": { ".class": "SymbolTableNode", "kind": "Gdef", "node": { ".class": "Var", "flags": [ "is_suppressed_import", "is_ready", "is_inferred" ], "fullname": "outlines.generate.samplers.torch", "name": "torch", "type": { ".class": "AnyType", "missing_import_name": "outlines.generate.samplers.torch", "source_any": null, "type_of_any": 3 } } } }, "path": "/home/andrew/p/outlines/outlines/generate/samplers.py" } ================================================ FILE: tests/cfg_samples/json/simple_fruit.json.test ================================================ [ { "ID": "1", "Name": "Andrew \"The Escaper\" Lapp", "Age": "30", "FavFruit": "Banana" }, { "ID": "2", "Name": "Mohammad", "Age": "40", "FavFruit": "\"Any Fruit As Long as It's In Quotes!\"" }, { "ID": "3", "Name": "Alice", "Age": "61", "FavFruit": "Peaches, but only \n newline separated peaches" } ] ================================================ FILE: tests/cfg_samples/json/simple_fruit_no_indent.json.test ================================================ [{"ID": "1", "Name": "Andrew", "Age": "30", "FavFruit": "Banana"}, {"ID": "2", "Name": "Mohammad", "Age": "40", "FavFruit": "Apple"}, {"ID": "3", "Name": "Alice", "Age": "61", "FavFruit": "Peach"}] ================================================ FILE: tests/conftest.py ================================================ import sys import pytest def pytest_collection_modifyitems(config, items): if sys.platform != "linux": if not config.option.keyword or ( config.option.keyword and "test_integration_vllm" in config.option.keyword ): print( "WARNING: test_integration_vllm tests are skipped because vLLM only supports Linux platform (including WSL)." ) skip_vllm = pytest.mark.skip(reason="vLLM models can only be run on Linux.") for item in items: if "test_integration_vllm" in item.nodeid: item.add_marker(skip_vllm) ================================================ FILE: tests/models/test_anthopic_type_adapter.py ================================================ import io import pytest from dataclasses import dataclass from PIL import Image as PILImage from outlines.inputs import Chat, Image from outlines.models.anthropic import AnthropicTypeAdapter @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return AnthropicTypeAdapter() def test_anthropic_type_adapter_input_text(adapter): message = "prompt" result = adapter.format_input(message) assert result == {"messages": [{"role": "user", "content": message}]} def test_anthropic_type_adapter_input_vision(adapter, image): image_input = Image(image) text_input = "hello" result = adapter.format_input([text_input, image_input]) assert result == { "messages": [ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": image_input.image_str, }, }, {"type": "text", "text": text_input}, ], }, ] } def test_anthropic_type_adapter_input_chat(adapter, image): image_input = Image(image) model_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = adapter.format_input(model_input) assert result == { "messages": [ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": image_input.image_str, }, }, {"type": "text", "text": "hello"}, ]}, {"role": "assistant", "content": "response"}, ] } def test_anthropic_type_adapter_input_invalid(adapter): @dataclass class Audio: file: str with pytest.raises(TypeError, match="is not available with Anthropic"): _ = adapter.format_input(Audio("file")) with pytest.raises( ValueError, match="All assets provided must be of type Image", ): _ = adapter.format_input(["prompt", Audio("file")]) with pytest.raises( ValueError, match="The content must be a string or a list", ): _ = adapter.format_input( Chat(messages=[{"role": "user", "content": {"foo": "bar"}}]) ) def test_anthropic_type_adapter_output(adapter): with pytest.raises( NotImplementedError, match="is not available with Anthropic" ): adapter.format_output_type(str) ================================================ FILE: tests/models/test_anthropic.py ================================================ import io from typing import Generator from anthropic import Anthropic as AnthropicClient from PIL import Image as PILImage import pytest import outlines from outlines.inputs import Chat, Image, Video from outlines.models.anthropic import Anthropic MODEL_NAME = "claude-3-haiku-20240307" @pytest.fixture(scope="session") def model(): return Anthropic(AnthropicClient(), MODEL_NAME) @pytest.fixture(scope="session") def model_no_model_name(): return Anthropic(AnthropicClient()) @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_init_from_client(): client = AnthropicClient() # With model name model = outlines.from_anthropic(client, MODEL_NAME) assert isinstance(model, Anthropic) assert model.client == client assert model.model_name == MODEL_NAME # Without model name model = outlines.from_anthropic(client) assert isinstance(model, Anthropic) assert model.client == client assert model.model_name is None def test_anthropic_wrong_inference_parameters(): with pytest.raises(TypeError, match="got an unexpected"): model = Anthropic(AnthropicClient(), MODEL_NAME) model.generate("prompt", foo=10, max_tokens=1024) def test_anthropic_wrong_input_type(image): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(TypeError, match="is not available"): model = Anthropic(AnthropicClient(), MODEL_NAME) model.generate(Foo("prompt")) with pytest.raises(ValueError, match="All assets provided must be of type Image"): model.generate(["foo?", Image(image), Video("")]) def test_anthropic_wrong_output_type(): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(NotImplementedError, match="is not available"): model = Anthropic(AnthropicClient(), MODEL_NAME) model.generate("prompt", Foo(1)) @pytest.mark.api_call def test_anthropic_simple_call(model): result = model.generate("Respond with one word. Not more.", max_tokens=1024) assert isinstance(result, str) @pytest.mark.xfail(reason="Anthropic requires the `max_tokens` parameter to be set") @pytest.mark.api_call def test_anthropic_direct_call(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", model_name=MODEL_NAME, max_tokens=1024, ) assert isinstance(result, str) @pytest.mark.api_call def test_anthropic_simple_vision(model, image): result = model.generate( [ "What does this logo represent?", Image(image), ], max_tokens=1024, ) assert isinstance(result, str) @pytest.mark.api_call def test_anthropic_chat(model, image): result = model.generate(Chat(messages=[ {"role": "assistant", "content": "How can I help you today?"}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ]), max_tokens=10) assert isinstance(result, str) @pytest.mark.api_call def test_anthopic_streaming(model): result = model.stream("Respond with one word. Not more.", max_tokens=1024) assert isinstance(result, Generator) assert isinstance(next(result), str) def test_anthropic_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], max_tokens=1024, ) ================================================ FILE: tests/models/test_dottxt.py ================================================ import json import os import pytest from dottxt.client import Dottxt as DottxtClient from pydantic import BaseModel import outlines from outlines import Generator from outlines.models.dottxt import Dottxt MODEL_NAME = "dottxt/dottxt-v1-alpha" MODEL_REVISION = "d06c86726aadd8dadb92c5b9b9e3ce8ef246c471" class User(BaseModel): first_name: str last_name: str user_id: int @pytest.fixture(scope="session") def api_key(): """Get the Dottxt API key from the environment, providing a default value if not found. This fixture should be used for tests that do not make actual api calls, but still require to initialize the Dottxt client. """ api_key = os.getenv("DOTTXT_API_KEY") if not api_key: return "MOCK_API_KEY" return api_key @pytest.fixture(scope="session") def model_name_and_revision(api_key): client = DottxtClient(api_key=api_key) model_list = client.list_models() return (model_list[0].name, model_list[0].revision) @pytest.fixture(scope="session") def model(api_key, model_name_and_revision): client = DottxtClient(api_key=api_key) return Dottxt( client, model_name_and_revision[0], model_name_and_revision[1], ) @pytest.fixture(scope="session") def model_no_model_name(api_key): client = DottxtClient(api_key=api_key) return Dottxt(client) @pytest.mark.api_call def test_dottxt_init_from_client(api_key, model_name_and_revision): client = DottxtClient(api_key=api_key) # Without model name model = outlines.from_dottxt(client) assert isinstance(model, Dottxt) assert model.client == client assert model.model_name is None # With model name model = outlines.from_dottxt( client, model_name_and_revision[0], model_name_and_revision[1], ) assert isinstance(model, Dottxt) assert model.client == client assert model.model_name == model_name_and_revision[0] assert model.model_revision == model_name_and_revision[1] def test_dottxt_wrong_output_type(model_no_model_name): with pytest.raises(TypeError, match="You must provide an output type"): model_no_model_name("prompt") def test_dottxt_wrong_input_type(model_no_model_name): with pytest.raises(TypeError, match="is not available"): model_no_model_name(["prompt"], User) @pytest.mark.api_call def test_dottxt_wrong_inference_parameters(model_no_model_name): with pytest.raises(TypeError, match="got an unexpected"): model_no_model_name("prompt", User, foo=10) @pytest.mark.api_call def test_dottxt_direct_pydantic_call(model_no_model_name): result = model_no_model_name("Create a user", User) assert "first_name" in json.loads(result) @pytest.mark.api_call def test_dottxt_direct_jsonschema_call( model_no_model_name, model_name_and_revision ): result = model_no_model_name( "Create a user", User, model_name=model_name_and_revision[0], model_revision=model_name_and_revision[1], ) assert "first_name" in json.loads(result) @pytest.mark.api_call def test_dottxt_generator_pydantic_call(model): generator = Generator(model, User) result = generator("Create a user") assert "first_name" in json.loads(result) @pytest.mark.api_call def test_dottxt_streaming(model): with pytest.raises( NotImplementedError, match="Dottxt does not support streaming" ): model.stream("Create a user", User) @pytest.mark.api_call def test_dottxt_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."] ) ================================================ FILE: tests/models/test_dottxt_type_adapter.py ================================================ import io import json import pytest import sys from dataclasses import dataclass from PIL import Image as PILImage from genson import SchemaBuilder from pydantic import BaseModel from outlines.inputs import Image from outlines.models.dottxt import DottxtTypeAdapter from outlines.types import cfg, json_schema, regex if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", } @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return DottxtTypeAdapter() def test_dottxt_type_adapter_input_text(adapter): message = "prompt" result = adapter.format_input(message) assert result == message def test_dottxt_type_adapter_input_invalid(adapter, image): prompt = ["prompt", image] with pytest.raises(TypeError, match="The input type"): _ = adapter.format_input(prompt) def test_dottxt_type_adapter_output_invalid(adapter): with pytest.raises(TypeError, match="You must provide an output type"): adapter.format_output_type(None) with pytest.raises(TypeError, match="The type `str` is not supported"): adapter.format_output_type(str) with pytest.raises(TypeError, match="The type `int` is not supported"): adapter.format_output_type(int) with pytest.raises(TypeError, match="Regex-based structured outputs will soon be"): adapter.format_output_type(regex("[0-9]")) with pytest.raises(TypeError, match="CFG-based structured outputs will soon be"): adapter.format_output_type(cfg("")) def test_dottxt_type_adapter_output_dataclass(adapter, schema): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert result == json.dumps(schema) def test_dottxt_type_adapter_output_typed_dict(adapter, schema): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert result == json.dumps(schema) def test_dottxt_type_adapter_output_pydantic(adapter, schema): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert result == json.dumps(schema) def test_dottxt_type_adapter_output_genson_schema_builder(adapter, schema): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) result = adapter.format_output_type(builder) result_dict = json.loads(result) assert isinstance(result_dict, dict) expected_schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"], } assert result_dict == expected_schema def test_dottxt_type_adapter_json_schema_str(adapter, schema): schema_str = json.dumps(schema) result = adapter.format_output_type(json_schema(schema_str)) assert result == json.dumps(schema) def test_dottxt_type_adapter_json_schema_dict(adapter, schema): result = adapter.format_output_type(json_schema(schema)) assert result == json.dumps(schema) ================================================ FILE: tests/models/test_gemini.py ================================================ import io import json import sys from dataclasses import dataclass from enum import Enum from typing import Generator, Literal import pytest from PIL import Image as PILImage from google.genai import Client from pydantic import BaseModel, ValidationError import outlines from outlines.inputs import Chat, Image, Video from outlines.models.gemini import Gemini from outlines.types import Choice if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict MODEL_NAME = "gemini-1.5-flash-latest" @pytest.fixture(scope="session") def model(): return Gemini(Client(), MODEL_NAME) @pytest.fixture(scope="session") def model_no_model_name(): return Gemini(Client()) @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.mark.api_call def test_gemini_init_from_client(): client = Client() # Without model name model = outlines.from_gemini(client) assert isinstance(model, Gemini) assert model.client == client assert model.model_name is None # With model name model = outlines.from_gemini(client, MODEL_NAME) assert isinstance(model, Gemini) assert model.client == client assert model.model_name == MODEL_NAME @pytest.mark.api_call def test_gemini_wrong_inference_parameters(model): with pytest.raises(ValidationError): model.generate("prompt", foo=10) @pytest.mark.api_call def test_gemini_wrong_input_type(model, image): with pytest.raises(ValueError, match="All assets provided must be of type Image"): model.generate(["foo?", Image(image), Video("")]) @pytest.mark.api_call def test_gemini_simple_call(model): result = model.generate("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.api_call def test_gemini_direct_call(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", model=MODEL_NAME ) assert isinstance(result, str) @pytest.mark.api_call def test_gemini_simple_vision(model, image): result = model.generate(["What does this logo represent?", Image(image)]) assert isinstance(result, str) @pytest.mark.api_call def test_gemini_chat(model, image): result = model.generate(Chat(messages=[ {"role": "assistant", "content": "How can I help you today?"}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ])) assert isinstance(result, str) @pytest.mark.api_call def test_gemini_simple_pydantic(model): class Foo(BaseModel): bar: int result = model.generate("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_gemini_simple_vision_pydantic(model, image): class Logo(BaseModel): name: int result = model.generate(["What does this logo represent?", Image(image)], Logo) assert isinstance(result, str) assert "name" in json.loads(result) @pytest.mark.api_call def test_gemini_nested_pydantic(model): class Bar(BaseModel): fu: str class Foo(BaseModel): sna: int bar: Bar result = model.generate("foo?", Foo) assert isinstance(result, str) assert "sna" in json.loads(result) assert "bar" in json.loads(result) assert "fu" in json.loads(result)["bar"] @pytest.mark.xfail( reason="The Gemini SDK's serialization method does not support Json Schema strings." ) @pytest.mark.api_call def test_gemini_simple_json_schema_string(model): schema = "{'properties': {'bar': {'title': 'Bar', 'type': 'integer'}}, 'required': ['bar'], 'title': 'Foo', 'type': 'object'}" result = model.generate("foo?", schema) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.xfail( reason="The Gemini SDK's serialization method does not support Json Schema dictionaries." ) @pytest.mark.api_call def test_gemini_simple_json_schema_dict(model): schema = { "properties": {"bar": {"type": "integer"}}, "required": ["bar"], "type": "object", } result = model.generate("foo?", schema) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_gemini_simple_typed_dict(model): class Foo(TypedDict): bar: int result = model.generate("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_gemini_simple_dataclass(model): @dataclass class Foo: bar: int result = model.generate("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_gemini_simple_choice_enum(model): class Foo(Enum): bar = "Bar" foor = "Foo" result = model.generate("foo?", Foo) assert isinstance(result, str) assert result == "Foo" or result == "Bar" @pytest.mark.api_call def test_gemini_simple_choice_choice(model): result = model.generate("foo?", Choice(["Foo", "Bar"])) assert isinstance(result, str) assert result == "Foo" or result == "Bar" @pytest.mark.api_call def test_gemini_sample_choice_literal(model): result = model.generate("foo?", Literal["Foo", "Bar"]) assert isinstance(result, str) assert result == "Foo" or result == "Bar" @pytest.mark.xfail( reason="Gemini supports lists for choices but we do not as it is semantically incorrect." ) @pytest.mark.api_call def test_gemini_simple_choice_list(model): choices = ["Foo", "Bar"] result = model.generate("foo?", choices) assert isinstance(result, str) assert result == "Foo" or result == "Bar" @pytest.mark.api_call def test_gemini_simple_list_pydantic(model): class Foo(BaseModel): bar: int result = model.generate("foo?", list[Foo]) assert isinstance(json.loads(result), list) assert isinstance(json.loads(result)[0], dict) assert "bar" in json.loads(result)[0] @pytest.mark.api_call def test_gemini_streaming(model): result = model.stream("Respond with one word. Not more.") assert isinstance(result, Generator) assert isinstance(next(result), str) @pytest.mark.api_call def test_gemini_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], ) ================================================ FILE: tests/models/test_gemini_type_adapter.py ================================================ import io import pytest import sys from dataclasses import dataclass from enum import Enum, EnumMeta from typing import Literal, get_args from PIL import Image as PILImage from genson import SchemaBuilder from pydantic import BaseModel from outlines import cfg, json_schema, regex from outlines.inputs import Chat, Image from outlines.models.gemini import GeminiTypeAdapter from outlines.types.utils import is_dataclass if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", } @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return GeminiTypeAdapter() def test_gemini_type_adapter_input_text(adapter): message = "prompt" result = adapter.format_input(message) assert result == {"contents": [{"text": message}]} def test_gemini_type_adapter_input_vision(adapter, image): image_input = Image(image) text_input = "hello" result = adapter.format_input([text_input, image_input]) assert result == { "contents": [ { "role": "user", "parts": [ {"text": text_input}, { "inline_data": { "mime_type": "image/png", "data": image_input.image_str, }, }, ], }, ] } def test_gemini_type_adapter_input_chat(adapter, image): image_input = Image(image) input_message = Chat(messages=[ {"role": "assistant", "content": "How can I help you today?"}, {"role": "user", "content": [ "What does this logo represent?", image_input, ]}, ]) result = adapter.format_input(input_message) assert result == { "contents": [ {"role": "model", "parts": [{"text": "How can I help you today?"}]}, { "role": "user", "parts": [ {"text": "What does this logo represent?"}, { "inline_data": { "mime_type": "image/png", "data": image_input.image_str, }, }, ], }, ] } def test_gemini_type_adapter_input_invalid(adapter): @dataclass class Audio: file: str prompt = Audio( "file", ) with pytest.raises(TypeError, match="The input type"): _ = adapter.format_input(prompt) def test_gemini_type_adapter_output_invalid(adapter): with pytest.raises(TypeError, match="The type `str` is not supported"): adapter.format_output_type(str) with pytest.raises(TypeError, match="The type `int` is not supported"): adapter.format_output_type(int) with pytest.raises(TypeError, match="Neither regex-based"): adapter.format_output_type(regex("[0-9]")) with pytest.raises(TypeError, match="CFG-based structured outputs"): adapter.format_output_type(cfg("")) def test_gemini_type_adapter_output_none(adapter): result = adapter.format_output_type(None) assert result == {} def test_gemini_type_adapter_output_json_schema(adapter, schema): result = adapter.format_output_type(json_schema(schema)) assert isinstance(result, dict) assert result["response_mime_type"] == "application/json" assert is_dataclass(result["response_schema"]) def test_gemini_type_adapter_output_list_json_schema(adapter, schema): result = adapter.format_output_type(list[json_schema(schema)]) assert isinstance(result, dict) assert result["response_mime_type"] == "application/json" args = get_args(result["response_schema"]) assert len(args) == 1 assert is_dataclass(args[0]) def test_gemini_type_adapter_output_dataclass(adapter): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert result == { "response_mime_type": "application/json", "response_schema": User, } def test_gemini_type_adapter_output_list_dataclass(adapter): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(list[User]) assert result == { "response_mime_type": "application/json", "response_schema": list[User], } def test_gemini_type_adapter_output_typed_dict(adapter): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert result == { "response_mime_type": "application/json", "response_schema": User, } def test_gemini_type_adapter_output_list_typed_dict(adapter): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(list[User]) assert result == { "response_mime_type": "application/json", "response_schema": list[User], } def test_gemini_type_adapter_output_pydantic(adapter): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert result == { "response_mime_type": "application/json", "response_schema": User, } def test_gemini_type_adapter_output_list_pydantic(adapter): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(list[User]) assert result == { "response_mime_type": "application/json", "response_schema": list[User], } def test_gemini_type_adapter_output_genson_schema_builder(adapter): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {"foo": {"type": "string"}, "bar": {"type": "integer"}}, "required": ["foo"]}) result = adapter.format_output_type(builder) assert isinstance(result, dict) assert result["response_mime_type"] == "application/json" assert is_dataclass(result["response_schema"]) def test_gemini_type_adapter_output_list_genson_schema_builder(adapter): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {"foo": {"type": "string"}, "bar": {"type": "integer"}}, "required": ["foo"]}) result = adapter.format_output_type(list[builder]) assert isinstance(result, dict) assert result["response_mime_type"] == "application/json" args = get_args(result["response_schema"]) assert len(args) == 1 assert is_dataclass(args[0]) def test_gemini_type_adapter_output_enum(adapter): class Foo(Enum): Bar = "bar" Fuzz = "fuzz" result = adapter.format_output_type(Foo) assert result == { "response_mime_type": "text/x.enum", "response_schema": Foo, } def test_gemini_type_adapter_output_literal(adapter): Foo = Literal["bar", "fuzz"] result = adapter.format_output_type(Foo) assert isinstance(result, dict) assert len(result) == 2 assert result["response_mime_type"] == "text/x.enum" assert isinstance(result["response_schema"], EnumMeta) assert len(result["response_schema"].__members__) == 2 assert result["response_schema"].bar.value == "bar" assert result["response_schema"].fuzz.value == "fuzz" ================================================ FILE: tests/models/test_llamacpp.py ================================================ import json from enum import Enum import pytest from llama_cpp import Llama from pydantic import BaseModel from outlines.inputs import Chat from outlines.models.llamacpp import ( LlamaCpp, LlamaCppTokenizer, LlamaCppTypeAdapter, from_llamacpp ) from outlines.types.dsl import Regex, CFG def test_load_model(): model = from_llamacpp( Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen" ) ) assert isinstance(model, LlamaCpp) assert isinstance(model.model, Llama) assert isinstance(model.tokenizer, LlamaCppTokenizer) assert isinstance(model.type_adapter, LlamaCppTypeAdapter) assert model.tensor_library_name == "numpy" @pytest.fixture(scope="session") def model(tmp_path_factory): return LlamaCpp( Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) ) @pytest.fixture(scope="session") def model_no_chat(tmp_path_factory): return LlamaCpp( Llama.from_pretrained( repo_id="tensorblock/Llama3-1B-Base-GGUF", filename="Llama3-1B-Base-Q2_K.gguf", ), chat_mode=False ) @pytest.fixture def lark_grammar(): return """ ?start: sum ?sum: product | sum "+" product -> add | sum "-" product -> sub ?product: atom | product "*" atom -> mul | product "/" atom -> div ?atom: NUMBER -> number | "-" atom -> neg | "(" sum ")" %import common.NUMBER %import common.WS_INLINE %ignore WS_INLINE """ @pytest.fixture def ebnf_grammar(): return """ root ::= answer answer ::= "yes" | "no" """ def test_llamacpp_simple(model): result = model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) def test_llamacpp_chat(model): result = model.generate( Chat( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Respond with one word. Not more."} ] ), max_tokens=10 ) assert isinstance(result, str) def test_llamacpp_regex(model): result = model("Respond with one word. Not more.", Regex(r"[0-9]")) assert isinstance(result, str) assert int(result) assert len(result) == 1 def test_llamacpp_json(model): class Foo(BaseModel): bar: str result = model("foo? Respond with one word.", Foo, max_tokens=100) assert isinstance(result, str) assert "bar" in json.loads(result) def test_llamacpp_choice(model): class Foo(Enum): bar = "Bar" foor = "Foo" result = model("foo?", Foo) assert result == "Foo" or result == "Bar" def test_llamacpp_cfg(model, ebnf_grammar): response = model("Respond with one word. Not more.", CFG(ebnf_grammar)) assert response in ["yes", "no"] def test_llamacpp_cfg_outlines_core(model, lark_grammar): with pytest.raises( NotImplementedError, match="Outlines Core does not support context-free grammar." ): model( "Respond with one word. Not more.", CFG(lark_grammar), backend="outlines_core" ) def test_llamacpp_text_stop(model): result = model.generate("Write the letter a.", None, stop="a", max_tokens=100) assert "a" not in result def test_llamacpp_stream_simple(model): generator = model.stream("Respond with one word. Not more.", None) for x in generator: assert isinstance(x, str) def test_llamacpp_stream_chat(model): generator = model.stream( Chat( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Respond with one word. Not more."} ] ), max_tokens=10 ) for x in generator: assert isinstance(x, str) def test_llamacpp_stream_regex(model): generator = model.stream("Respond with one word. Not more.", Regex(r"[0-9]")) x = next(generator) assert isinstance(x, str) def test_llamacpp_stream_json(model): class Foo(BaseModel): bar: int generator = model.stream("foo?", Foo) # NOTE: The first few chunks may be empty (role info, control tokens, finish chunks) # Relevant issue: https://github.com/abetlen/llama-cpp-python/issues/372 first_non_empty_token = next(x for x in generator if x) assert first_non_empty_token == "{" def test_llamacpp_stream_cfg(model, ebnf_grammar): response = "" for chunk in model.stream( "Respond with one word. Not more.", CFG(ebnf_grammar) ): response += chunk assert response in ["yes", "no"] def test_llamacpp_stream_cfg_outlines_core(model, lark_grammar): with pytest.raises( NotImplementedError, match="Outlines Core does not support context-free grammar." ): for chunk in model.stream( "Respond with one word. Not more.", CFG(lark_grammar), backend="outlines_core" ): pass def test_llamacpp_stream_choice(model): class Foo(Enum): bar = "Bar" foor = "Foo" generator = model.stream("foo?", Foo) first_non_empty_token = next(x for x in generator if x) assert first_non_empty_token[0] in ("B", "F") def test_llamacpp_stream_text_stop(model): generator = model.stream("Write the letter a.", None, stop="a", max_tokens=100) result = next(generator) assert isinstance(result, str) assert result != "a" def test_llamacpp_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], ) def test_llamacpp_no_chat(model_no_chat): result = model_no_chat.generate("Respond with one word. Not more.", None) assert isinstance(result, str) generator = model_no_chat.stream("Respond with one word. Not more.", None) for x in generator: assert isinstance(x, str) ================================================ FILE: tests/models/test_llamacpp_tokenizer.py ================================================ import ctypes import pytest import sys from unittest.mock import MagicMock, patch import llama_cpp import transformers from outlines.models.llamacpp import LlamaCppTokenizer @pytest.fixture def model(): model = llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) setattr( model.tokenizer_, "hf_tokenizer", transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) return model @pytest.fixture def model_no_hf_tokenizer(): model = llama_cpp.Llama.from_pretrained( repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF", filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", chat_format="qwen", ) del model.tokenizer_ return model @pytest.fixture def different_model(): model = llama_cpp.Llama.from_pretrained( "TheBloke/phi-2-GGUF", "phi-2.Q4_K_M.gguf", ) return model @pytest.fixture def tokenizer(model): return LlamaCppTokenizer(model) @pytest.fixture def another_tokenizer(model): return LlamaCppTokenizer(model) @pytest.fixture def tokenizer_no_hf_tokenizer(model_no_hf_tokenizer): return LlamaCppTokenizer(model_no_hf_tokenizer) @pytest.fixture def different_tokenizer(different_model): return LlamaCppTokenizer(different_model) def test_llama_cpp_tokenizer_init(tokenizer, tokenizer_no_hf_tokenizer): # regular case assert tokenizer.eos_token_id is not None assert tokenizer.pad_token_id is not None assert isinstance(tokenizer.vocabulary, dict) # tokenizer with no hf_tokenizer assert tokenizer_no_hf_tokenizer.eos_token_id is not None assert tokenizer_no_hf_tokenizer.pad_token_id is not None assert isinstance(tokenizer_no_hf_tokenizer.vocabulary, dict) def test_llama_cpp_tokenizer_encode(tokenizer): # batch case with pytest.raises(NotImplementedError): token_ids, attention_mask = tokenizer.encode(["foo", "bar"]) # regular case token_ids, attention_mask = tokenizer.encode("Hello, world!") assert token_ids is not None assert attention_mask is not None assert len(token_ids) == len(attention_mask) def test_llama_cpp_tokenizer_decode(tokenizer): token_ids, _ = tokenizer.encode("Hello, world!") decoded_text = tokenizer.decode(token_ids) assert isinstance(decoded_text, list) assert "".join(decoded_text).strip() == "Hello, world!" def test_llama_cpp_tokenizer_convert_token_to_string( tokenizer, tokenizer_no_hf_tokenizer ): # with self._hf_tokenizer token_str = tokenizer.convert_token_to_string("<0x20>") assert isinstance(token_str, str) # without self._hf_tokenizer token_str = tokenizer_no_hf_tokenizer.convert_token_to_string("<0x20>") assert isinstance(token_str, str) def test_llama_cpp_tokenizer_eq(tokenizer, another_tokenizer, different_tokenizer): assert not tokenizer == 1 assert tokenizer == another_tokenizer assert tokenizer != different_tokenizer def test_llama_cpp_tokenizer_hash(tokenizer, another_tokenizer, different_tokenizer): assert isinstance(hash(tokenizer), int) assert hash(tokenizer) == hash(another_tokenizer) assert hash(tokenizer) != hash(different_tokenizer) def test_llama_cpp_tokenizer_getstate(tokenizer): state = tokenizer.__getstate__() assert isinstance(state, tuple) assert len(state) == 5 assert isinstance(state[0], dict) assert isinstance(state[1], int) assert isinstance(state[2], str) assert isinstance(state[3], int) assert isinstance(state[4], list) def test_llama_cpp_tokenizer_setstate(tokenizer): with pytest.raises(NotImplementedError): tokenizer.__setstate__(None) def _make_mock_model(n_vocab, eos_id, pieces): """Build a mock Llama model whose vocab is defined by *pieces*. Parameters ---------- n_vocab : int Number of tokens in the vocabulary. eos_id : int The EOS token id. pieces : dict[int, bytes] Mapping from token id to the raw bytes of the token piece. """ model = MagicMock() # Remove tokenizer_ so the code falls into the C-API branch del model.tokenizer_ model.token_eos.return_value = eos_id model.n_vocab.return_value = n_vocab model.model = MagicMock() return model def test_vocab_truncation_retry_path(): """Tokens whose piece length exceeds the 32-byte buffer must trigger the retry path with a larger buffer so their text is not collapsed.""" long_piece = b"x" * 40 # 40 > 32 → triggers the retry branch short_piece = b"hi" eos_piece = b"" pieces = {0: short_piece, 1: long_piece, 2: eos_piece} model = _make_mock_model(n_vocab=3, eos_id=2, pieces=pieces) def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args): data = pieces[token_id] n = len(data) # Only write into the buffer when it is large enough if buf_size >= n: ctypes.memmove(buf, data, n) return n with patch( "outlines.models.llamacpp.llama_model_get_vocab", return_value=MagicMock(), create=True, ), patch( "outlines.models.llamacpp.llama_token_to_piece", side_effect=fake_llama_token_to_piece, create=True, ): # Patch the imports inside the __init__ else-branch with patch.dict( "sys.modules", { "llama_cpp": MagicMock( llama_model_get_vocab=MagicMock(return_value=MagicMock()), llama_token_to_piece=fake_llama_token_to_piece, ), }, ): tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer) # Re-import inside the else-branch uses llama_cpp module tok.__init__(model) assert tok.vocabulary[long_piece.decode()] == 1 assert tok.vocabulary[short_piece.decode()] == 0 assert tok.eos_token == eos_piece.decode() def test_attention_mask_all_ones_even_with_eos(): """The attention mask must be all-ones for every token, including EOS.""" eos_piece = b"" pieces = {0: b"hello", 1: eos_piece} model = _make_mock_model(n_vocab=2, eos_id=1, pieces=pieces) def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args): data = pieces[token_id] n = len(data) if buf_size >= n: ctypes.memmove(buf, data, n) return n with patch.dict( "sys.modules", { "llama_cpp": MagicMock( llama_model_get_vocab=MagicMock(return_value=MagicMock()), llama_token_to_piece=fake_llama_token_to_piece, ), }, ): tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer) tok.__init__(model) # Simulate encoding that returns token ids including the EOS token fake_tokenizer = MagicMock() fake_tokenizer.tokenize.return_value = [0, 1] # token 1 == eos_id tok.tokenizer = fake_tokenizer token_ids, attention_mask = tok.encode("hello") assert token_ids == [0, 1] assert attention_mask == [1, 1] def test_negative_n_skips_invalid_token(): """Tokens that return n < 0 from llama_token_to_piece (error codes) must be silently skipped instead of producing garbage vocabulary entries.""" eos_piece = b"" pieces = {0: b"ok", 1: None, 2: eos_piece} # token 1 returns error model = _make_mock_model(n_vocab=3, eos_id=2, pieces=pieces) def fake_llama_token_to_piece(vocab, token_id, buf, buf_size, *args): data = pieces[token_id] if data is None: return -1 # error return n = len(data) if buf_size >= n: ctypes.memmove(buf, data, n) return n with patch.dict( "sys.modules", { "llama_cpp": MagicMock( llama_model_get_vocab=MagicMock(return_value=MagicMock()), llama_token_to_piece=fake_llama_token_to_piece, ), }, ): tok = LlamaCppTokenizer.__new__(LlamaCppTokenizer) tok.__init__(model) # Token 1 (error) must not appear in the vocabulary assert 1 not in tok.vocabulary.values() assert tok.vocabulary["ok"] == 0 assert tok.eos_token == eos_piece.decode() ================================================ FILE: tests/models/test_llamacpp_type_adapter.py ================================================ import pytest import io from llama_cpp import LogitsProcessorList from PIL import Image as PILImage from outlines_core import Index, Vocabulary from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor from outlines.inputs import Chat, Image from outlines.models.llamacpp import LlamaCppTypeAdapter @pytest.fixture def adapter(): return LlamaCppTypeAdapter() @pytest.fixture def logits_processor(): vocabulary = Vocabulary.from_pretrained("openai-community/gpt2") index = Index(r"[0-9]{3}", vocabulary) return OutlinesCoreLogitsProcessor(index, "numpy") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_llamacpp_type_adapter_format_input(adapter, image): # Anything else than a string/Chat with pytest.raises(NotImplementedError): adapter.format_input(["Hello, world!"]) # string assert adapter.format_input("Hello, world!") == "Hello, world!" # Chat messages = [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] assert adapter.format_input(Chat(messages=messages)) == messages # Multi-modal (invalid) with pytest.raises( ValueError, match="LlamaCpp does not support multi-modal messages." ): adapter.format_input(Chat(messages=[ {"role": "user", "content": ["prompt", Image(image)]}, ])) def test_llamacpp_type_adapter_format_input_with_chat_template(): adapter = LlamaCppTypeAdapter(has_chat_template=True) message = "prompt" result = adapter.format_input(message) assert result == [{"role": "user", "content": "prompt"}] def test_llamacpp_type_adapter_format_input_without_chat_template(): adapter = LlamaCppTypeAdapter(has_chat_template=False) message = "prompt" result = adapter.format_input(message) assert result == "prompt" def test_llamacpp_type_adapter_format_output_type(adapter, logits_processor): formatted = adapter.format_output_type(logits_processor) assert isinstance(formatted, LogitsProcessorList) assert formatted[0].index == logits_processor.index assert formatted[0].tensor_library_name == logits_processor.tensor_library_name ================================================ FILE: tests/models/test_lmstudio.py ================================================ import io import json import os import warnings from enum import Enum from typing import Annotated, AsyncGenerator, Generator import lmstudio import pytest from PIL import Image as PILImage from pydantic import BaseModel, Field import outlines from outlines.inputs import Chat, Image, Video from outlines.models import AsyncLMStudio, LMStudio from outlines.models.lmstudio import LMStudioTypeAdapter from tests.test_utils.mock_lmstudio_client import ( MockLMStudioClient, MockAsyncLMStudioClient, ) # If the LMSTUDIO_SERVER_URL environment variable is set, use the real LMStudio server # Otherwise, use the mock server lmstudio_server_url = os.environ.get("LMSTUDIO_SERVER_URL") lmstudio_model_name = os.environ.get( "LMSTUDIO_MODEL_NAME", "openai/gpt-oss-20b" ) # Image for testing (only create when server is available, as lms.prepare_image requires it) image_input = None if lmstudio_server_url: width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) image_input = Image(image) if lmstudio_server_url: lmstudio_client = lmstudio.Client(lmstudio_server_url) async_lmstudio_client = lmstudio.AsyncClient(lmstudio_server_url) else: warnings.warn("No LMStudio server URL provided, using mock server") lmstudio_client = MockLMStudioClient() async_lmstudio_client = MockAsyncLMStudioClient() class Foo(BaseModel): foo: Annotated[str, Field(max_length=10)] type_adapter = LMStudioTypeAdapter() # Mock responses for non-image tests (image tests require a running server # because lms.prepare_image() needs to connect to LM Studio) mock_responses = [ ( { "messages": type_adapter.format_input("Respond with one word. Not more."), }, "foo" ), ( { "messages": type_adapter.format_input( "Create a character with a name in the foo field." ), "response_format": type_adapter.format_output_type(Foo), }, '{"foo": "bar"}' ), ( { "messages": type_adapter.format_input("Write a sentence about a cat."), }, ["The ", "cat ", "sat."] ), ( { "messages": type_adapter.format_input("Create a character."), "response_format": type_adapter.format_output_type(Foo), }, ['{"foo":', ' "bar"}'] ), ] # If the LMSTUDIO_SERVER_URL environment variable is not set, add the mock # responses to the mock clients if not lmstudio_server_url: lmstudio_client.add_mock_responses(mock_responses) async_lmstudio_client.add_mock_responses(mock_responses) # Skip condition for tests that require a running LM Studio server (image tests) requires_lmstudio_server = pytest.mark.skipif( not lmstudio_server_url, reason=( "Image tests require a running LM Studio server (lms.prepare_image " + "needs connection)" ) ) @pytest.fixture def model(): return LMStudio(lmstudio_client, lmstudio_model_name) @pytest.fixture def model_no_model_name(): return LMStudio(lmstudio_client) @pytest.fixture def async_model(): if lmstudio_server_url: # We need to create a new lmstudio client client = lmstudio.AsyncClient(lmstudio_server_url) return AsyncLMStudio(client, lmstudio_model_name) return AsyncLMStudio(async_lmstudio_client, lmstudio_model_name) @pytest.fixture def async_model_no_model_name(): if lmstudio_server_url: # We need to create a new lmstudio client client = lmstudio.AsyncClient(lmstudio_server_url) return AsyncLMStudio(client) return AsyncLMStudio(async_lmstudio_client) def test_lmstudio_init_from_client(): if lmstudio_server_url: client = lmstudio.Client(lmstudio_server_url) # With model name model = outlines.from_lmstudio(client, lmstudio_model_name) assert isinstance(model, LMStudio) assert model.client == client assert model.model_name == lmstudio_model_name # Without model name model = outlines.from_lmstudio(client) assert isinstance(model, LMStudio) assert model.client == client assert model.model_name is None else: # With mock client, test direct instantiation client = MockLMStudioClient() client.add_mock_responses(mock_responses) model = LMStudio(client, lmstudio_model_name) assert model.client == client assert model.model_name == lmstudio_model_name model = LMStudio(client) assert model.client == client assert model.model_name is None # With invalid client with pytest.raises(ValueError, match="Invalid client type"): outlines.from_lmstudio(object()) def test_lmstudio_simple(model): result = model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) def test_lmstudio_direct(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", None, model=lmstudio_model_name, ) assert isinstance(result, str) @requires_lmstudio_server def test_lmstudio_simple_vision(model): result = model.generate( ["What does this logo represent?", image_input], model=lmstudio_model_name, ) assert isinstance(result, str) @requires_lmstudio_server def test_lmstudio_chat(model): result = model.generate( Chat( [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ "What does this logo represent?", image_input ]}, ] ), model=lmstudio_model_name, ) assert isinstance(result, str) def test_lmstudio_json(model): result = model("Create a character with a name in the foo field.", Foo) assert isinstance(result, str) assert "foo" in json.loads(result) def test_lmstudio_wrong_output_type(model): class BadFoo(Enum): bar = "Bar" foo = "Foo" with pytest.raises(TypeError, match="is not supported"): model.generate("foo?", BadFoo) def test_lmstudio_wrong_input_type(model): with pytest.raises(TypeError, match="is not available"): model.generate({"foo?": "bar?"}, None) with pytest.raises(ValueError, match="All assets provided must be of type Image"): model.generate(["foo?", image_input, Video("")], None) def test_lmstudio_stream(model): result = model.stream("Write a sentence about a cat.") assert isinstance(result, Generator) assert isinstance(next(result), str) def test_lmstudio_stream_json(model_no_model_name): generator = model_no_model_name.stream("Create a character.", Foo, model=lmstudio_model_name) generated_text = [] for text in generator: generated_text.append(text) assert "foo" in json.loads("".join(generated_text)) def test_lmstudio_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch(["Respond with one word.", "Respond with one word."]) def test_lmstudio_async_init_from_client(): if lmstudio_server_url: client = lmstudio.AsyncClient(lmstudio_server_url) # With model name model = outlines.from_lmstudio(client, lmstudio_model_name) assert isinstance(model, AsyncLMStudio) assert model.client == client assert model.model_name == lmstudio_model_name # Without model name model = outlines.from_lmstudio(client) assert isinstance(model, AsyncLMStudio) assert model.client == client assert model.model_name is None else: # With mock client, test direct instantiation client = MockAsyncLMStudioClient() client.add_mock_responses(mock_responses) model = AsyncLMStudio(client, lmstudio_model_name) assert model.client == client assert model.model_name == lmstudio_model_name model = AsyncLMStudio(client) assert model.client == client assert model.model_name is None @pytest.mark.asyncio async def test_lmstudio_async_simple(async_model): result = await async_model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) @pytest.mark.asyncio async def test_lmstudio_async_direct(async_model_no_model_name): result = await async_model_no_model_name( "Respond with one word. Not more.", None, model=lmstudio_model_name, ) assert isinstance(result, str) @requires_lmstudio_server @pytest.mark.asyncio async def test_lmstudio_async_simple_vision(async_model): result = await async_model.generate( ["What does this logo represent?", image_input], model=lmstudio_model_name, ) assert isinstance(result, str) @requires_lmstudio_server @pytest.mark.asyncio async def test_lmstudio_async_chat(async_model): result = await async_model.generate( Chat( [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ "What does this logo represent?", image_input ]}, ] ), model=lmstudio_model_name, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_lmstudio_async_json(async_model): result = await async_model("Create a character with a name in the foo field.", Foo) assert isinstance(result, str) assert "foo" in json.loads(result) @pytest.mark.asyncio async def test_lmstudio_async_wrong_output_type(async_model): class BadFoo(Enum): bar = "Bar" foo = "Foo" with pytest.raises(TypeError, match="is not supported"): await async_model.generate("foo?", BadFoo) @pytest.mark.asyncio async def test_lmstudio_async_wrong_input_type(async_model): with pytest.raises(TypeError, match="is not available"): await async_model.generate({"foo?": "bar?"}, None) with pytest.raises(ValueError, match="All assets provided must be of type Image"): await async_model.generate(["foo?", image_input, Video("")], None) @pytest.mark.asyncio async def test_lmstudio_async_stream(async_model): result = async_model.stream("Write a sentence about a cat.") assert isinstance(result, AsyncGenerator) assert isinstance(await result.__anext__(), str) @pytest.mark.asyncio async def test_lmstudio_async_stream_json(async_model_no_model_name): async_generator = async_model_no_model_name.stream("Create a character.", Foo, model=lmstudio_model_name) generated_text = [] async for chunk in async_generator: generated_text.append(chunk) assert "foo" in json.loads("".join(generated_text)) @pytest.mark.asyncio async def test_lmstudio_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch(["Respond with one word.", "Respond with one word."]) ================================================ FILE: tests/models/test_lmstudio_type_adapter.py ================================================ import io import json import os import sys from dataclasses import dataclass import pytest from genson import SchemaBuilder from PIL import Image as PILImage from pydantic import BaseModel from outlines.inputs import Chat, Image from outlines.models.lmstudio import LMStudioTypeAdapter from outlines.types import cfg, json_schema, regex if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict # Skip condition for tests that require a running LM Studio server (image tests) requires_lmstudio_server = pytest.mark.skipif( not os.environ.get("LMSTUDIO_SERVER_URL"), reason=( "Image tests require a running LM Studio server (lms.prepare_image " + "needs connection)" ) ) @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", } @pytest.fixture def adapter(): return LMStudioTypeAdapter() @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_lmstudio_type_adapter_input_text(adapter): text_input = "prompt" result = adapter.format_input(text_input) assert isinstance(result, str) assert result == text_input @requires_lmstudio_server def test_lmstudio_type_adapter_input_vision(adapter, image): import lmstudio as lms image_input = Image(image) text_input = "prompt" result = adapter.format_input([text_input, image_input]) assert isinstance(result, lms.Chat) def test_lmstudio_type_adapter_input_chat(adapter): chat_input = Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "How are you?"}, ]) result = adapter.format_input(chat_input) # Should return an lmstudio.Chat object import lmstudio as lms assert isinstance(result, lms.Chat) def test_lmstudio_type_adapter_input_chat_no_system(adapter): chat_input = Chat(messages=[ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}, ]) result = adapter.format_input(chat_input) import lmstudio as lms assert isinstance(result, lms.Chat) @requires_lmstudio_server def test_lmstudio_type_adapter_input_chat_with_image(adapter, image): import lmstudio as lms image_input = Image(image) chat_input = Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ "What is in this image?", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = adapter.format_input(chat_input) assert isinstance(result, lms.Chat) def test_lmstudio_type_adapter_input_invalid(adapter): prompt = {"foo": "bar"} with pytest.raises(TypeError, match="The input type"): _ = adapter.format_input(prompt) def test_lmstudio_type_adapter_input_chat_invalid_content(adapter): chat_input = Chat(messages=[ {"role": "user", "content": {"foo": "bar"}}, ]) with pytest.raises(ValueError, match="Invalid content type"): _ = adapter.format_input(chat_input) def test_lmstudio_type_adapter_input_chat_invalid_role(adapter): chat_input = Chat(messages=[ {"role": "unknown", "content": "hello"}, ]) with pytest.raises(ValueError, match="Unsupported role"): _ = adapter.format_input(chat_input) def test_lmstudio_type_adapter_output_none(adapter): result = adapter.format_output_type(None) assert result is None def test_lmstudio_type_adapter_output_invalid(adapter): with pytest.raises(TypeError, match="The type `str` is not supported"): adapter.format_output_type(str) with pytest.raises(TypeError, match="The type `int` is not supported"): adapter.format_output_type(int) with pytest.raises(TypeError, match="Regex-based structured outputs are not"): adapter.format_output_type(regex("[0-9]")) with pytest.raises(TypeError, match="CFG-based structured outputs are not"): adapter.format_output_type(cfg("")) def test_lmstudio_type_adapter_output_dataclass(adapter, schema): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_lmstudio_type_adapter_output_typed_dict(adapter, schema): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_lmstudio_type_adapter_output_pydantic(adapter, schema): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_lmstudio_type_adapter_output_genson_schema_builder(adapter): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) result = adapter.format_output_type(builder) assert result == { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"] } def test_lmstudio_type_adapter_json_schema_str(adapter, schema): schema_str = json.dumps(schema) result = adapter.format_output_type(json_schema(schema_str)) assert result == schema def test_lmstudio_type_adapter_json_schema_dict(adapter, schema): result = adapter.format_output_type(json_schema(schema)) assert result == schema ================================================ FILE: tests/models/test_mistral.py ================================================ import io import json import os from typing import Annotated, Generator, AsyncGenerator import pytest from PIL import Image as PILImage from mistralai import Mistral as MistralClient from pydantic import BaseModel, Field import outlines from outlines.inputs import Chat, Image, Video from outlines.models.mistral import AsyncMistral, Mistral from outlines.types import JsonSchema, Regex MODEL_NAME = "mistral-large-latest" VISION_MODEL = "pixtral-large-latest" @pytest.fixture(scope="session") def api_key(): """Get the Mistral API key from the environment, providing a default value if not found. This fixture should be used for tests that do not make actual api calls, but still require to initialize the Mistral client. """ api_key = os.getenv("MISTRAL_API_KEY") if not api_key: return "MOCK_VALUE" return api_key @pytest.fixture(scope="session") def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture(scope="session") def model(api_key): return Mistral(MistralClient(api_key=api_key), MODEL_NAME) @pytest.fixture(scope="session") def vision_model(api_key): return Mistral(MistralClient(api_key=api_key), VISION_MODEL) @pytest.fixture(scope="session") def async_model(api_key): return AsyncMistral(MistralClient(api_key=api_key), MODEL_NAME) @pytest.fixture(scope="session") def async_vision_model(api_key): return AsyncMistral(MistralClient(api_key=api_key), VISION_MODEL) @pytest.fixture(scope="session") def model_no_model_name(api_key): return Mistral(MistralClient(api_key=api_key)) @pytest.fixture(scope="session") def async_model_no_model_name(api_key): return AsyncMistral(MistralClient(api_key=api_key)) def test_mistral_init_from_client(api_key): client = MistralClient(api_key=api_key) # With model name model = outlines.from_mistral(client, MODEL_NAME) assert isinstance(model, Mistral) assert model.client == client assert model.model_name == MODEL_NAME # Without model name model = outlines.from_mistral(client) assert isinstance(model, Mistral) assert model.client == client assert model.model_name is None def test_mistral_wrong_inference_parameters(model): with pytest.raises(RuntimeError, match="got an unexpected"): model("prompt", foo=10) def test_mistral_wrong_input_type(model): with pytest.raises(TypeError, match="is not available"): model(123) def test_mistral_wrong_output_type(model): with pytest.raises( TypeError, match="Regex-based structured outputs are not available with Mistral.", ): model("prompt", Regex("^.*$")) @pytest.mark.api_call def test_mistral_call(model): result = model("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.api_call def test_mistral_call_model_name(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", model=MODEL_NAME ) assert isinstance(result, str) @pytest.mark.api_call def test_mistral_multiple_samples(model): result = model("Respond with one word. Not more.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.api_call def test_mistral_vision(image, vision_model): result = vision_model(["What does this logo represent?", Image(image)]) assert isinstance(result, str) @pytest.mark.api_call def test_mistral_chat(image, vision_model): result = vision_model(Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ]), max_tokens=10) assert isinstance(result, str) @pytest.mark.api_call def test_mistral_pydantic(model): class Foo(BaseModel): bar: int result = model("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_mistral_pydantic_refusal(model): class Foo(BaseModel): bar: Annotated[str, Field(int, pattern=r"^\d+$")] with pytest.raises(TypeError, match="Mistral does not support your schema"): _ = model("foo?", Foo) @pytest.mark.api_call def test_mistral_vision_pydantic(vision_model, image): class Logo(BaseModel): name: int result = vision_model(["What does this logo represent?", Image(image)], Logo) assert isinstance(result, str) assert "name" in json.loads(result) @pytest.mark.api_call def test_mistral_json_schema(model): class Foo(BaseModel): bar: int schema = json.dumps(Foo.model_json_schema()) result = model("foo?", JsonSchema(schema)) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_mistral_streaming(model): result = model.stream("Respond with one word. Not more.") assert isinstance(result, Generator) assert isinstance(next(result), str) def test_mistral_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], ) def test_mistral_async_init_from_client(api_key): client = MistralClient(api_key=api_key) # Async with model name model = outlines.from_mistral(client, MODEL_NAME, async_client=True) assert isinstance(model, AsyncMistral) assert model.client == client assert model.model_name == MODEL_NAME # Async without model name model = outlines.from_mistral(client, async_client=True) assert isinstance(model, AsyncMistral) assert model.client == client assert model.model_name is None @pytest.mark.asyncio async def test_mistral_async_wrong_inference_parameters(async_model): with pytest.raises(RuntimeError, match="got an unexpected"): await async_model("prompt", foo=10) @pytest.mark.asyncio async def test_mistral_async_wrong_input_type(async_model): with pytest.raises(TypeError, match="is not available"): await async_model(123) @pytest.mark.asyncio async def test_mistral_async_wrong_output_type(async_model): with pytest.raises( TypeError, match="Regex-based structured outputs are not available with Mistral.", ): await async_model("prompt", Regex("^.*$")) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_call(async_model): result = await async_model("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_call_model_name(async_model_no_model_name): result = await async_model_no_model_name( "Respond with one word. Not more.", model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_multiple_samples(async_model): result = await async_model("Respond with one word. Not more.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_vision(async_vision_model, image): result = await async_vision_model(["What does this logo represent?", Image(image)]) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_chat(async_vision_model, image): result = await async_vision_model(Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ]), max_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_pydantic(async_model): class Foo(BaseModel): bar: int result = await async_model("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_pydantic_refusal(async_model): class Foo(BaseModel): bar: Annotated[str, Field(int, pattern=r"^\d+$")] with pytest.raises(TypeError, match="Mistral does not support your schema"): _ = await async_model("foo?", Foo) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_vision_pydantic(async_vision_model, image): class Logo(BaseModel): name: int result = await async_vision_model(["What does this logo represent?", Image(image)], Logo) assert isinstance(result, str) assert "name" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_json_schema(async_model): class Foo(BaseModel): bar: int schema = json.dumps(Foo.model_json_schema()) result = await async_model("foo?", JsonSchema(schema)) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_mistral_async_streaming(async_model): result = async_model.stream("Respond with one word. Not more.") assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk @pytest.mark.asyncio async def test_mistral_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): _ = await async_model.batch( ["Respond with one word.", "Respond with one word."], ) ================================================ FILE: tests/models/test_mistral_type_adapter.py ================================================ import io import json import sys from dataclasses import dataclass from typing import Literal import pytest from PIL import Image as PILImage from genson import SchemaBuilder from mistralai import ( AssistantMessage, SystemMessage, UserMessage, ) from pydantic import BaseModel from outlines.inputs import Chat, Image from outlines.models.mistral import MistralTypeAdapter from outlines.types import CFG, JsonSchema, Regex if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", "additionalProperties": False, } @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return MistralTypeAdapter() def test_mistral_type_adapter_input_text(adapter): message = "Hello world" result = adapter.format_input(message) assert len(result) == 1 assert isinstance(result[0], UserMessage) assert result[0].content == message def test_mistral_type_adapter_input_list(adapter, image): image_input = Image(image) message_list = ["Hello world", image_input] result = adapter.format_input(message_list) assert len(result) == 1 assert isinstance(result[0], UserMessage) message_content = result[0].content assert dict(message_content[0]) == {"type": "text", "text": "Hello world"} assert message_content[1].type == "image_url" assert hasattr(message_content[1], "image_url") def test_mistral_type_adapter_input_chat(adapter, image): image_input = Image(image) chat = Chat([ {"role": "system", "content": "You are helpful"}, {"role": "user", "content": ["Hello world", image_input]}, {"role": "assistant", "content": "Hi there"}, ]) result = adapter.format_input(chat) assert len(result) == 3 assert isinstance(result[0], SystemMessage) assert result[0].content == "You are helpful" assert isinstance(result[1], UserMessage) assert dict(result[1].content[0]) == {"type": "text", "text": "Hello world"} assert result[1].content[1].type == "image_url" assert hasattr(result[1].content[1], "image_url") assert isinstance(result[2], AssistantMessage) assert result[2].content == "Hi there" def test_mistral_type_adapter_input_invalid(adapter, image): @dataclass class Audio: file: str with pytest.raises(TypeError, match="is not available"): adapter.format_input(123) with pytest.raises(ValueError, match="Content list cannot be empty."): adapter.format_input([]) with pytest.raises( ValueError, match="The first item in the list should be a string.", ): adapter.format_input([Image(image)]) with pytest.raises( ValueError, match="Expected Image objects after the first string" ): adapter.format_input(["hello", Audio("file")]) with pytest.raises( TypeError, match="Invalid content type", ): adapter.format_input(Chat([{"role": "user", "content": {}}])) with pytest.raises(ValueError, match="Unsupported role"): adapter.format_input(Chat([{"role": "invalid", "content": "Hello"}])) def test_mistral_type_adapter_output_none(adapter): result = adapter.format_output_type(None) assert result == {} def test_mistral_type_adapter_output_json_mode(adapter): result = adapter.format_output_type(dict) assert result == {"type": "json_object"} def test_mistral_type_adapter_dataclass(adapter, schema): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["json_schema"]["strict"] is True assert result["json_schema"]["schema"] == schema def test_mistral_type_adapter_typed_dict(adapter, schema): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["json_schema"]["strict"] is True assert result["json_schema"]["schema"] == schema def test_mistral_type_adapter_pydantic(adapter, schema): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["json_schema"]["strict"] is True assert result["json_schema"]["schema"] == schema def test_mistral_type_adapter_genson_schema_builder(adapter, schema): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) result = adapter.format_output_type(builder) assert isinstance(result, dict) assert result["json_schema"]["strict"] is True expected_schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"], "additionalProperties": False } assert result["json_schema"]["schema"] == expected_schema def test_mistral_type_adapter_json_schema_str(adapter, schema): schema_str = json.dumps(schema) result = adapter.format_output_type(JsonSchema(schema_str)) assert isinstance(result, dict) assert result["json_schema"]["strict"] is True assert result["json_schema"]["schema"] == schema def test_mistral_type_adapter_output_unsupported(adapter): with pytest.raises( TypeError, match="Regex-based structured outputs are not available with Mistral.", ): adapter.format_output_type(Regex("[0-9]")) with pytest.raises( TypeError, match="CFG-based structured outputs are not available with Mistral.", ): adapter.format_output_type(CFG("")) with pytest.raises(TypeError, match="is not available with Mistral."): adapter.format_output_type(Literal["foo", "bar"]) ================================================ FILE: tests/models/test_mlxlm.py ================================================ import pytest import re from enum import Enum from typing import Generator import outlines from outlines.types import Regex from outlines.models.mlxlm import ( MLXLM, MLXLMTypeAdapter, from_mlxlm ) from outlines.models.transformers import TransformerTokenizer from pydantic import BaseModel try: import mlx_lm import mlx.core as mx HAS_MLX = mx.metal.is_available() except ImportError: HAS_MLX = False TEST_MODEL = "mlx-community/SmolLM-135M-Instruct-4bit" @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_model_initialization(): model = from_mlxlm(*mlx_lm.load(TEST_MODEL)) assert isinstance(model, MLXLM) assert isinstance(model.model, mlx_lm.models.llama.Model) assert isinstance( model.mlx_tokenizer, mlx_lm.tokenizer_utils.TokenizerWrapper ) assert isinstance(model.tokenizer, TransformerTokenizer) assert isinstance(model.type_adapter, MLXLMTypeAdapter) assert model.tensor_library_name == "mlx" @pytest.fixture(scope="session") def model(tmp_path_factory): model, tokenizer = mlx_lm.load(TEST_MODEL) return outlines.from_mlxlm(model, tokenizer) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_tokenizer(model): # Test single string encoding/decoding test_text = "Hello, world!" token_ids, _ = model.tokenizer.encode(test_text) token_ids = mx.array(token_ids) assert isinstance(token_ids, mx.array) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_simple(model): result = model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_call(model): result = model("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_invalid_input_type(model): with pytest.raises(NotImplementedError, match="is not available"): model(["Respond with one word. Not more."]) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_invalid_inference_kwargs(model): with pytest.raises(TypeError): model("Respond with one word. Not more.", foo="bar") @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_inference_kwargs(model): result = model("Write a short story about a cat.", max_tokens=2) assert isinstance(result, str) assert len(result) < 20 @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_regex(model): result = model("Give a number between 0 and 9.", Regex(r"[0-9]")) assert isinstance(result, str) assert re.match(r"[0-9]", result) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_json_schema(model): class Character(BaseModel): name: str result = model("Create a character with a name.", Character) assert "name" in result @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_choice(model): class Foo(Enum): cat = "cat" dog = "dog" result = model("Cat or dog?", Foo) assert result in ["cat", "dog"] @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_stream_text_stop(model): generator = model.stream( "Respond with one word. Not more.", None, max_tokens=100 ) assert isinstance(generator, Generator) assert isinstance(next(generator), str) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_batch(model): result = model.batch( ["Respond with one word.", "Respond with one word."], ) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_batch_output_type(model): with pytest.raises( NotImplementedError, match="mlx-lm does not support constrained generation with batching." ): model.batch( ["Respond with one word.", "Respond with one word."], Regex(r"[0-9]") ) ================================================ FILE: tests/models/test_mlxlm_type_adapter.py ================================================ import pytest import io from unittest.mock import MagicMock from outlines_core import Index, Vocabulary from PIL import Image as PILImage from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor from outlines.inputs import Chat, Image from outlines.models.mlxlm import MLXLMTypeAdapter try: import mlx_lm import mlx.core as mx HAS_MLX = mx.metal.is_available() except ImportError: HAS_MLX = False MODEL_NAME = "mlx-community/SmolLM-135M-Instruct-4bit" @pytest.fixture def adapter(): _, tokenizer = mlx_lm.load(MODEL_NAME) return MLXLMTypeAdapter(tokenizer=tokenizer) @pytest.fixture def logits_processor(): vocabulary = Vocabulary.from_pretrained(MODEL_NAME) index = Index(r"[0-9]{3}", vocabulary) return OutlinesCoreLogitsProcessor(index, "mlx") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_mlxlm_type_adapter_format_input_with_template(): tokenizer = MagicMock() tokenizer.chat_template = "some_template" tokenizer.apply_chat_template.return_value = "formatted_prompt" adapter = MLXLMTypeAdapter(tokenizer=tokenizer, has_chat_template=True) message = "prompt" result = adapter.format_input(message) assert result == "formatted_prompt" tokenizer.apply_chat_template.assert_called_once_with( [{"role": "user", "content": "prompt"}], tokenize=False, add_generation_prompt=True, ) def test_mlxlm_type_adapter_format_input_without_template(): tokenizer = MagicMock() tokenizer.chat_template = None adapter = MLXLMTypeAdapter(tokenizer=tokenizer, has_chat_template=False) message = "prompt" result = adapter.format_input(message) assert result == "prompt" @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_type_adapter_format_input(adapter, image): # Anything else than a string/Chat (invalid) with pytest.raises(NotImplementedError): adapter.format_input(["Hello, world!"]) # String assert adapter.format_input("Hello, world!") == "Hello, world!" # Chat messages = [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] expected = ( "<|im_start|>user\nHello, world!<|im_end|>\n<|im_start|>assistant\n" + "Hello, world!<|im_end|>\n<|im_start|>assistant\n" ) assert adapter.format_input(Chat(messages=messages)) == expected # Multi-modal (invalid) with pytest.raises( ValueError, match="mlx-lm does not support multi-modal messages." ): adapter.format_input(Chat(messages=[ {"role": "user", "content": ["prompt", Image(image)]}, ])) @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") def test_mlxlm_type_adapter_format_output_type(adapter, logits_processor): formatted = adapter.format_output_type(logits_processor) assert isinstance(formatted, list) assert len(formatted) == 1 assert isinstance(formatted[0], OutlinesCoreLogitsProcessor) ================================================ FILE: tests/models/test_ollama.py ================================================ import io import json from enum import Enum from typing import Annotated import pytest from PIL import Image as PILImage from ollama import AsyncClient, Client from pydantic import BaseModel, Field import outlines from outlines.inputs import Chat, Image, Video from outlines.models import AsyncOllama, Ollama MODEL_NAME = "tinyllama" @pytest.fixture def model(): return Ollama(Client(), MODEL_NAME) @pytest.fixture def model_no_model_name(): return Ollama(Client()) @pytest.fixture def async_model(): return AsyncOllama(AsyncClient(), MODEL_NAME) @pytest.fixture def async_model_no_model_name(): return AsyncOllama(AsyncClient()) @pytest.fixture(scope="session") def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_ollama_init_from_client(): client = Client() # With model name model = outlines.from_ollama(client, MODEL_NAME) assert isinstance(model, Ollama) assert model.client == client assert model.model_name == MODEL_NAME # Without model name model = outlines.from_ollama(client) assert isinstance(model, Ollama) assert model.client == client assert model.model_name is None # With invalid client with pytest.raises(ValueError, match="Invalid client type"): outlines.from_ollama(object()) def test_ollama_wrong_inference_parameters(model): with pytest.raises(TypeError, match="got an unexpected"): model.generate( "Respond with one word. Not more.", None, foo=10 ) def test_ollama_simple(model): result = model.generate( "Respond with one word. Not more.", None ) assert isinstance(result, str) def test_ollama_direct(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", None, model=MODEL_NAME, ) assert isinstance(result, str) def test_ollama_simple_vision(image, model): # This is not using a vision model, so it's not able to describe # the image, but we're still checking the model input syntax result = model.generate( ["What does this logo represent?", Image(image)], model=MODEL_NAME, ) assert isinstance(result, str) def test_ollama_chat(image, model): result = model.generate( Chat( [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ "What does this logo represent?", Image(image) ]}, ] ), model=MODEL_NAME, ) assert isinstance(result, str) def test_ollama_json(model): class Foo(BaseModel): foo: Annotated[str, Field(max_length=1)] result = model("Respond with one word. Not more.", Foo) assert isinstance(result, str) assert "foo" in json.loads(result) def test_ollama_wrong_output_type(model): class Foo(Enum): bar = "Bar" foor = "Foo" with pytest.raises(TypeError, match="is not supported"): model.generate("foo?", Foo) def test_ollama_wrong_input_type(model, image): with pytest.raises(TypeError, match="is not available"): model.generate({"foo?": "bar?"}, None) with pytest.raises(ValueError, match="All assets provided must be of type Image"): model.generate(["foo?", Image(image), Video("")], None) def test_ollama_stream(model): generator = model.stream("Write a sentence about a cat.") assert isinstance(next(generator), str) def test_ollama_stream_json(model_no_model_name): class Foo(BaseModel): foo: Annotated[str, Field(max_length=2)] generator = model_no_model_name.stream("Create a character.", Foo, model=MODEL_NAME) generated_text = [] for text in generator: generated_text.append(text) assert "foo" in json.loads("".join(generated_text)) def test_ollama_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], ) def test_ollama_async_init_from_client(): client = AsyncClient() # With model name model = outlines.from_ollama(client, MODEL_NAME) assert isinstance(model, AsyncOllama) assert model.client == client assert model.model_name == MODEL_NAME # Without model name model = outlines.from_ollama(client) assert isinstance(model, AsyncOllama) assert model.client == client assert model.model_name is None @pytest.mark.asyncio async def test_ollama_async_wrong_inference_parameters(async_model): with pytest.raises(TypeError, match="got an unexpected"): await async_model.generate( "Respond with one word. Not more.", None, foo=10 ) @pytest.mark.asyncio async def test_ollama_async_simple(async_model): result = await async_model.generate( "Respond with one word. Not more.", None ) assert isinstance(result, str) @pytest.mark.asyncio async def test_ollama_async_direct(async_model_no_model_name): result = await async_model_no_model_name( "Respond with one word. Not more.", None, model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_ollama_async_simple_vision(image, async_model): # This is not using a vision model, so it's not able to describe # the image, but we're still checking the model input syntax result = await async_model.generate( ["What does this logo represent?", Image(image)], model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_ollama_async_chat(image, async_model): result = await async_model.generate( Chat( [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ "What does this logo represent?", Image(image) ]}, ] ), model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_ollama_async_json(async_model): class Foo(BaseModel): foo: Annotated[str, Field(max_length=1)] result = await async_model("Respond with one word. Not more.", Foo) assert isinstance(result, str) assert "foo" in json.loads(result) @pytest.mark.asyncio async def test_ollama_async_wrong_output_type(async_model): class Foo(Enum): bar = "Bar" foor = "Foo" with pytest.raises(TypeError, match="is not supported"): await async_model.generate("foo?", Foo) @pytest.mark.asyncio async def test_ollama_async_wrong_input_type(async_model): with pytest.raises(TypeError, match="is not available"): await async_model.generate({"foo?": "bar?"}, None) @pytest.mark.asyncio async def test_ollama_async_stream(async_model): async_generator = async_model.stream("Write a sentence about a cat.") assert isinstance(await async_generator.__anext__(), str) @pytest.mark.asyncio async def test_ollama_async_stream_json(async_model_no_model_name): class Foo(BaseModel): foo: Annotated[str, Field(max_length=2)] async_generator = async_model_no_model_name.stream("Create a character.", Foo, model=MODEL_NAME) generated_text = [] async for chunk in async_generator: generated_text.append(chunk) assert "foo" in json.loads("".join(generated_text)) @pytest.mark.asyncio async def test_ollama_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch( ["Respond with one word.", "Respond with one word."], ) ================================================ FILE: tests/models/test_ollama_type_adapter.py ================================================ import io import json import pytest import sys from dataclasses import dataclass from genson import SchemaBuilder from PIL import Image as PILImage from pydantic import BaseModel from outlines.inputs import Chat, Image from outlines.models.ollama import OllamaTypeAdapter from outlines.types import cfg, json_schema, regex if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", } @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return OllamaTypeAdapter() def test_ollama_type_adapter_input_text(adapter): text_input = "prompt" result = adapter.format_input(text_input) assert isinstance(result, list) assert len(result) == 1 assert result[0] == {"role": "user", "content": text_input} def test_ollama_type_adapter_input_vision(adapter, image): image_input = Image(image) text_input = "prompt" result = adapter.format_input([text_input, image_input]) assert isinstance(result, list) assert len(result) == 1 assert result[0] == { "role": "user", "content": text_input, "images": [image_input.image_str], } def test_ollama_type_adapter_input_chat(adapter, image): image_input = Image(image) chat_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = adapter.format_input(chat_input) assert isinstance(result, list) assert len(result) == 3 assert result[0] == {"role": "system", "content": "prompt"} assert result[1] == {"role": "user", "content": "hello", "images": [image_input.image_str]} assert result[2] == {"role": "assistant", "content": "response"} def test_ollama_type_adapter_input_invalid(adapter): prompt = {"foo": "bar"} with pytest.raises(TypeError, match="The input type"): _ = adapter.format_input(prompt) prompt = Chat(messages=[ {"role": "user", "content": {"foo": "bar"}}, ]) with pytest.raises(ValueError, match="Invalid content type"): _ = adapter.format_input(prompt) def test_ollama_type_adapter_output_invalid(adapter): with pytest.raises(TypeError, match="The type `str` is not supported"): adapter.format_output_type(str) with pytest.raises(TypeError, match="The type `int` is not supported"): adapter.format_output_type(int) with pytest.raises(TypeError, match="Regex-based structured outputs are not"): adapter.format_output_type(regex("[0-9]")) with pytest.raises(TypeError, match="CFG-based structured outputs are not"): adapter.format_output_type(cfg("")) def test_ollama_type_adapter_output_dataclass(adapter, schema): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_ollama_type_adapter_output_typed_dict(adapter, schema): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_ollama_type_adapter_output_pydantic(adapter, schema): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert result == schema def test_ollama_type_adapter_output_genson_schema_builder(adapter): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) result = adapter.format_output_type(builder) assert result == { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"] } def test_ollama_type_adapter_json_schema_str(adapter, schema): schema_str = json.dumps(schema) result = adapter.format_output_type(json_schema(schema_str)) assert result == schema def test_ollama_type_adapter_json_schema_dict(adapter, schema): result = adapter.format_output_type(json_schema(schema)) assert result == schema ================================================ FILE: tests/models/test_openai.py ================================================ import io import json import os from typing import Annotated, Generator, AsyncGenerator import pytest from PIL import Image as PILImage from openai import AsyncOpenAI as AsyncOpenAIClient, OpenAI as OpenAIClient from pydantic import BaseModel, Field import outlines from outlines.inputs import Chat, Image, Video from outlines.models.openai import AsyncOpenAI, OpenAI from outlines.types import json_schema MODEL_NAME = "gpt-4o-mini-2024-07-18" @pytest.fixture(scope="session") def api_key(): """Get the OpenAI API key from the environment, providing a default value if not found. This fixture should be used for tests that do not make actual api calls, but still require to initialize the OpenAI client. """ api_key = os.getenv("OPENAI_API_KEY") if not api_key: return "MOCK_VALUE" return api_key @pytest.fixture(scope="session") def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture(scope="session") def model(api_key): return OpenAI(OpenAIClient(api_key=api_key), MODEL_NAME) @pytest.fixture(scope="session") def async_model(api_key): return AsyncOpenAI(AsyncOpenAIClient(api_key=api_key), MODEL_NAME) @pytest.fixture(scope="session") def model_no_model_name(api_key): return OpenAI(OpenAIClient(api_key=api_key)) @pytest.fixture(scope="session") def async_model_no_model_name(api_key): return AsyncOpenAI(AsyncOpenAIClient(api_key=api_key)) def test_openai_init_from_client(api_key): client = OpenAIClient(api_key=api_key) # With model name model = outlines.from_openai(client, "gpt-4o") assert isinstance(model, OpenAI) assert model.client == client assert model.model_name == "gpt-4o" # Without model name model = outlines.from_openai(client) assert isinstance(model, OpenAI) assert model.client == client assert model.model_name is None def test_openai_wrong_inference_parameters(model): with pytest.raises(TypeError, match="got an unexpected"): model.generate("prompt", foo=10) def test_openai_wrong_input_type(model, image): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(TypeError, match="is not available"): model.generate(Foo("prompt")) with pytest.raises(ValueError, match="All assets provided must be of type Image"): model.generate(["foo?", Image(image), Video("")]) def test_openai_wrong_output_type(model): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(TypeError, match="is not available"): model.generate("prompt", Foo(1)) @pytest.mark.api_call def test_openai_simple_call(model): result = model.generate("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.api_call def test_openai_simple_call_multiple_samples(model): result = model.generate("Respond with one word. Not more.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.api_call def test_openai_direct_call(model_no_model_name): result = model_no_model_name( "Respond with one word. Not more.", model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.api_call def test_openai_simple_vision(image, model): result = model.generate(["What does this logo represent?", Image(image)]) assert isinstance(result, str) @pytest.mark.api_call def test_openai_chat(image, model): result = model.generate(Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ]), max_tokens=10) assert isinstance(result, str) @pytest.mark.api_call def test_openai_simple_pydantic(model): class Foo(BaseModel): bar: int result = model.generate("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_openai_simple_pydantic_refusal(model): class Foo(BaseModel): bar: Annotated[str, Field(int, pattern=r"^\d+$")] with pytest.raises(TypeError, match="OpenAI does not support your schema"): _ = model.generate("foo?", Foo) @pytest.mark.api_call def test_openai_simple_vision_pydantic(image, model): class Logo(BaseModel): name: int result = model.generate(["What does this logo represent?", Image(image)], Logo) assert isinstance(result, str) assert "name" in json.loads(result) @pytest.mark.api_call def test_openai_simple_json_schema(model): class Foo(BaseModel): bar: int schema = json.dumps(Foo.model_json_schema()) result = model.generate("foo?", json_schema(schema)) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.api_call def test_openai_streaming(model): result = model.stream("Respond with one word. Not more.") assert isinstance(result, Generator) assert isinstance(next(result), str) def test_openai_batch(model): with pytest.raises(NotImplementedError, match="does not support"): model.batch( ["Respond with one word.", "Respond with one word."], ) def test_openai_async_init_from_client(api_key): client = AsyncOpenAIClient(api_key=api_key) # With model name model = outlines.from_openai(client, "gpt-4o") assert isinstance(model, AsyncOpenAI) assert model.client == client assert model.model_name == "gpt-4o" # Without model name model = outlines.from_openai(client) assert isinstance(model, AsyncOpenAI) assert model.client == client assert model.model_name is None @pytest.mark.asyncio async def test_openai_async_wrong_inference_parameters(async_model): with pytest.raises(TypeError, match="got an unexpected"): await async_model.generate("prompt", foo=10) @pytest.mark.asyncio async def test_openai_async_wrong_input_type(async_model, image): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(TypeError, match="is not available"): await async_model.generate(Foo("prompt")) with pytest.raises(ValueError, match="All assets provided must be of type Image"): await async_model.generate(["foo?", Image(image), Video("")]) @pytest.mark.asyncio async def test_openai_async_wrong_output_type(async_model): class Foo: def __init__(self, foo): self.foo = foo with pytest.raises(TypeError, match="is not available"): await async_model.generate("prompt", Foo(1)) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_call(async_model): result = await async_model.generate("Respond with one word. Not more.") assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_call_multiple_samples(async_model): result = await async_model.generate("Respond with one word. Not more.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_direct_call(async_model_no_model_name): result = await async_model_no_model_name( "Respond with one word. Not more.", model=MODEL_NAME, ) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_vision(image, async_model): result = await async_model.generate(["What does this logo represent?", Image(image)]) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_chat(image, async_model): result = await async_model.generate(Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": ["What does this logo represent?", Image(image)] }, ]), max_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_pydantic(async_model): class Foo(BaseModel): bar: int result = await async_model.generate("foo?", Foo) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_pydantic_refusal(async_model): class Foo(BaseModel): bar: Annotated[str, Field(int, pattern=r"^\d+$")] with pytest.raises(TypeError, match="OpenAI does not support your schema"): _ = await async_model.generate("foo?", Foo) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_vision_pydantic(image, async_model): class Logo(BaseModel): name: int result = await async_model.generate(["What does this logo represent?", Image(image)], Logo) assert isinstance(result, str) assert "name" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_simple_json_schema(async_model): class Foo(BaseModel): bar: int schema = json.dumps(Foo.model_json_schema()) result = await async_model.generate("foo?", json_schema(schema)) assert isinstance(result, str) assert "bar" in json.loads(result) @pytest.mark.asyncio @pytest.mark.api_call async def test_openai_async_streaming(async_model): result = async_model.stream("Respond with a single word.") assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk @pytest.mark.asyncio async def test_openai_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch( ["Respond with one word.", "Respond with one word."], ) ================================================ FILE: tests/models/test_openai_type_adapter.py ================================================ import io import json import pytest import sys from dataclasses import dataclass from typing import Literal from genson import SchemaBuilder from PIL import Image as PILImage from pydantic import BaseModel from outlines import cfg, json_schema, regex from outlines.inputs import Chat, Image from outlines.models.openai import OpenAITypeAdapter if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict @pytest.fixture def schema(): return { "properties": { "user_id": {"title": "User Id", "type": "integer"}, "name": {"title": "Name", "type": "string"}, }, "required": ["user_id", "name"], "title": "User", "type": "object", "additionalProperties": False, } @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def adapter(): return OpenAITypeAdapter() def test_openai_type_adapter_input_text(adapter): message = "prompt" result = adapter.format_input(message) assert result == [{"role": "user", "content": message}] def test_openai_type_adapter_input_vision(adapter, image): image_input = Image(image) text_input = "hello" result = adapter.format_input([text_input, image_input]) assert result == [ { "role": "user", "content": [ {"type": "text", "text": text_input}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], }, ] def test_openai_type_adapter_input_chat(adapter, image): image_input = Image(image) model_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = adapter.format_input(model_input) assert result == [ {"role": "system", "content": "prompt"}, { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ] }, {"role": "assistant", "content": "response"}, ] def test_openai_type_adapter_input_invalid(adapter): @dataclass class Audio: file: str with pytest.raises(TypeError, match="is not available"): _ = adapter.format_input(Audio("file")) with pytest.raises( ValueError, match="All assets provided must be of type Image", ): _ = adapter.format_input(["prompt", Audio("file")]) with pytest.raises( ValueError, match="The content must be a string or a list", ): _ = adapter.format_input( Chat(messages=[{"role": "user", "content": {"foo": "bar"}}]) ) def test_openai_type_adapter_output_invalid(adapter): with pytest.raises(TypeError, match="The type `str` is not available"): adapter.format_output_type(str) with pytest.raises(TypeError, match="The type `int` is not available"): adapter.format_output_type(int) with pytest.raises(TypeError, match="The type `Literal` is not available"): adapter.format_output_type(Literal[1, 2]) with pytest.raises(TypeError, match="Neither regex-based"): adapter.format_output_type(regex("[0-9]")) with pytest.raises(TypeError, match="CFG-based structured outputs"): adapter.format_output_type(cfg("")) class Foo(BaseModel): bar: str with pytest.raises(TypeError, match="The type `list` is not available"): adapter.format_output_type(list[Foo]) def test_openai_type_adapter_output_none(adapter): result = adapter.format_output_type(None) assert result == {} def test_openai_type_adapter_json_mode(adapter): result = adapter.format_output_type(dict) assert result == {"response_format": {"type": "json_object"}} def test_openai_type_adapter_dataclass(adapter, schema): @dataclass class User: user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True assert result["response_format"]["json_schema"]["schema"] == schema def test_openai_type_adapter_typed_dict(adapter, schema): class User(TypedDict): user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True assert result["response_format"]["json_schema"]["schema"] == schema def test_openai_type_adapter_pydantic(adapter, schema): class User(BaseModel): user_id: int name: str result = adapter.format_output_type(User) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True assert result["response_format"]["json_schema"]["schema"] == schema def test_openai_type_adapter_genson_schema_builder(adapter, schema): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) result = adapter.format_output_type(builder) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True expected_schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"], "additionalProperties": False # OpenAI adds this } assert result["response_format"]["json_schema"]["schema"] == expected_schema def test_openai_type_adapter_json_schema_str(adapter, schema): schema_str = json.dumps(schema) result = adapter.format_output_type(json_schema(schema_str)) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True assert result["response_format"]["json_schema"]["schema"] == schema def test_openai_type_adapter_json_schema_dict(adapter, schema): result = adapter.format_output_type(json_schema(schema)) assert isinstance(result, dict) assert result["response_format"]["json_schema"]["strict"] is True assert result["response_format"]["json_schema"]["schema"] == schema ================================================ FILE: tests/models/test_sglang.py ================================================ # ATTENTION: When running this test with an actual SGLang server, use the # llguidance backend (--grammar-backend llguidance) # The outlines backend does not support the EBNF grammar. The xgrammar # backend is slow and buggy. import io import os import re import warnings from typing import AsyncGenerator, Generator import pytest from PIL import Image as PILImage from openai import AsyncOpenAI, OpenAI from outlines.inputs import Chat, Image from outlines.models.sglang import SGLang, AsyncSGLang, from_sglang from outlines.types.dsl import CFG, Regex, JsonSchema from tests.test_utils.mock_openai_client import MockOpenAIClient, MockAsyncOpenAIClient EBNF_YES_NO_GRAMMAR = """ root ::= answer answer ::= "yes" | "no" """ # Image for testing width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) image_input = Image(image) # If the SGLANG_SERVER_URL environment variable is set, use the real SGLang server # Otherwise, use the mock server sglang_server_url = os.environ.get("SGLANG_SERVER_URL") sglang_model_name = os.environ.get( "SGLANG_MODEL_NAME", "qwen/qwen2.5-0.5b-instruct" ) if sglang_server_url: openai_client = OpenAI(base_url=sglang_server_url, api_key="foo") async_openai_client = AsyncOpenAI(base_url=sglang_server_url, api_key="foo") else: warnings.warn("No SGLang server URL provided, using mock server") openai_client = MockOpenAIClient() async_openai_client = MockAsyncOpenAIClient() mock_responses = [ ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'model': sglang_model_name, }, "foo" ), ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'model': sglang_model_name, 'stream': True }, ["foo", "bar"] ), ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'n': 2, 'model': sglang_model_name, }, ["foo", "bar"] ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': sglang_model_name, 'max_tokens': 10, 'response_format': { 'type': 'json_schema', 'json_schema': { 'name': 'default', 'strict': True, 'schema': { 'type': 'object', 'properties': {'bar': {'type': 'string'}}, 'additionalProperties': False } } } }, '{"foo": "bar"}' ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': sglang_model_name, 'max_tokens': 10, 'extra_body': { 'regex': '([0-9]{3})', }, }, "123" ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': sglang_model_name, 'max_tokens': 10, 'extra_body': { 'ebnf': EBNF_YES_NO_GRAMMAR, }, }, "yes" ), ( { 'messages': [ { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ] } ], 'model': sglang_model_name, 'max_tokens': 10, }, "foo" ), ( { 'messages': [ {"role": "system", "content": "prompt"}, { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], }, {"role": "assistant", "content": "response"}, ], 'model': sglang_model_name, 'max_tokens': 10, }, "foo" ) ] # If the SGLANG_SERVER_URL environment variable is not set, add the mock # responses to the mock clients if not sglang_server_url: async_openai_client.add_mock_responses(mock_responses) openai_client.add_mock_responses(mock_responses) @pytest.fixture def sync_model(): return SGLang(openai_client, model_name=sglang_model_name) @pytest.fixture def sync_model_no_model_name(): return SGLang(openai_client) @pytest.fixture def async_model(): return AsyncSGLang(async_openai_client, model_name=sglang_model_name) @pytest.fixture def async_model_no_model_name(): return AsyncSGLang(async_openai_client) def test_sglang_init(): # We do not rely on the mock server here because we need an object # of type OpenAI and AsyncOpenAI to test the init function. openai_client = OpenAI(base_url="http://localhost:11434", api_key="foo") async_openai_client = AsyncOpenAI(base_url="http://localhost:11434", api_key="foo") # Sync with model name model = from_sglang(openai_client, sglang_model_name) assert isinstance(model, SGLang) assert model.client == openai_client assert model.model_name == sglang_model_name # Sync without model name model = from_sglang(openai_client) assert isinstance(model, SGLang) assert model.client == openai_client assert model.model_name is None # Async with model name model = from_sglang(async_openai_client, sglang_model_name) assert isinstance(model, AsyncSGLang) assert model.client == async_openai_client assert model.model_name == sglang_model_name # Async without model name model = from_sglang(async_openai_client) assert isinstance(model, AsyncSGLang) assert model.client == async_openai_client assert model.model_name is None with pytest.raises(ValueError, match="Unsupported client type"): from_sglang("foo") def test_sglang_sync_simple_call(sync_model): result = sync_model("Respond with a single word.",) assert isinstance(result, str) def test_sglang_sync_streaming(sync_model_no_model_name): result = sync_model_no_model_name.stream( "Respond with a single word.", model=sglang_model_name, ) assert isinstance(result, Generator) assert isinstance(next(result), str) def test_sglang_sync_batch(sync_model): with pytest.raises(NotImplementedError, match="does not support"): sync_model.batch( ["Respond with one word.", "Respond with one word."], ) def test_sglang_sync_vision(sync_model): result = sync_model(["hello", image_input], max_tokens=10) assert isinstance(result, str) def test_sglang_sync_vision_chat(sync_model): result = sync_model( Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]), max_tokens=10, ) assert isinstance(result, str) def test_sglang_sync_multiple_samples(sync_model): result = sync_model("Respond with a single word.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) def test_sglang_sync_json(sync_model): json_string = ( '{"type": "object", "properties":' + ' {"bar": {"type": "string"}}}' ) result = sync_model("foo?", JsonSchema(json_string), max_tokens=10) assert isinstance(result, str) assert "bar" in result def test_sglang_sync_regex(sync_model): result = sync_model("foo?", Regex(r"[0-9]{3}"), max_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) def test_sglang_sync_cfg(sync_model): with pytest.warns( UserWarning, match="SGLang grammar-based structured outputs expects an EBNF" ): result = sync_model("foo?", CFG(EBNF_YES_NO_GRAMMAR), max_tokens=10) assert isinstance(result, str) assert result in ["yes", "no"] @pytest.mark.asyncio async def test_sglang_async_simple_call(async_model): result = await async_model("Respond with a single word.",) assert isinstance(result, str) @pytest.mark.asyncio async def test_sglang_async_streaming(async_model_no_model_name): result = async_model_no_model_name.stream( "Respond with a single word.", model=sglang_model_name, ) assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk @pytest.mark.asyncio async def test_sglang_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch( ["Respond with one word.", "Respond with one word."], ) @pytest.mark.asyncio async def test_sglang_async_vision(async_model): result = await async_model(["hello", image_input], max_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio async def test_sglang_async_vision_chat(async_model): result = await async_model( Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]), max_tokens=10, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_sglang_async_multiple_samples(async_model): result = await async_model("Respond with a single word.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.asyncio async def test_sglang_async_json(async_model): json_string = ( '{"type": "object", "properties":' + ' {"bar": {"type": "string"}}}' ) result = await async_model("foo?", JsonSchema(json_string), max_tokens=10) assert isinstance(result, str) assert "bar" in result @pytest.mark.asyncio async def test_sglang_async_regex(async_model): result = await async_model("foo?", Regex(r"[0-9]{3}"), max_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) @pytest.mark.asyncio async def test_sglang_async_cfg(async_model): result = await async_model("foo?", CFG(EBNF_YES_NO_GRAMMAR), max_tokens=10) assert isinstance(result, str) assert result in ["yes", "no"] ================================================ FILE: tests/models/test_sglang_type_adapter.py ================================================ import io import json import pytest from dataclasses import dataclass from PIL import Image as PILImage from outlines.inputs import Chat, Image from outlines.models.sglang import SGLangTypeAdapter from outlines.types import CFG, JsonSchema CFG_STRING = """ ?start: expr ?expr: NUMBER """ JSON_SCHEMA_STRING = """ { "type": "object", "properties": { "answer": {"type": "number"} } } """ @pytest.fixture def type_adapter(): return SGLangTypeAdapter() @pytest.fixture def cfg_instance(): return CFG(CFG_STRING) @pytest.fixture def json_schema_instance(): return JsonSchema(JSON_SCHEMA_STRING) @pytest.fixture def json_schema_whitespace_instance(): return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern="\n") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_sglang_type_adapter_input_text(type_adapter): message = "prompt" result = type_adapter.format_input(message) assert result == [{"role": "user", "content": message}] def test_sglang_type_adapter_input_vision(type_adapter, image): image_input = Image(image) result = type_adapter.format_input(["hello", image_input]) assert result == [ { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ] } ] def test_sglang_type_adapter_input_chat(type_adapter, image): image_input = Image(image) model_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = type_adapter.format_input(model_input) assert result == [ {"role": "system", "content": "prompt"}, { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], }, {"role": "assistant", "content": "response"}, ] def test_sglang_type_adapter_input_invalid(type_adapter): @dataclass class Audio: file: str prompt = Audio( "file", ) with pytest.raises(TypeError, match="The input type"): _ = type_adapter.format_input(prompt) def test_sglang_type_adapter_output_type( type_adapter, cfg_instance, json_schema_instance, json_schema_whitespace_instance, ): assert type_adapter.format_output_type(None) == {} with pytest.warns( UserWarning, match="SGLang grammar-based structured outputs expects an EBNF" ): assert type_adapter.format_output_type(cfg_instance) == { "extra_body": {"ebnf": CFG_STRING} } assert type_adapter.format_output_type(json_schema_instance) == { "response_format": { "type": "json_schema", "json_schema": { "name": "default", "strict": True, "schema": { **json.loads(JSON_SCHEMA_STRING), "additionalProperties": False, }, }, } } # whitespace pattern is ignored assert type_adapter.format_output_type(json_schema_whitespace_instance) == { "response_format": { "type": "json_schema", "json_schema": { "name": "default", "strict": True, "schema": { **json.loads(JSON_SCHEMA_STRING), "additionalProperties": False, }, }, } } assert type_adapter.format_output_type(int) == { "extra_body": {"regex": "([+-]?(0|[1-9][0-9]*))"} } ================================================ FILE: tests/models/test_tgi.py ================================================ import os import re import warnings from typing import AsyncGenerator, Generator import pytest from huggingface_hub import InferenceClient, AsyncInferenceClient from outlines.models.tgi import TGI, AsyncTGI, from_tgi from outlines.types.dsl import CFG, Regex, JsonSchema from tests.test_utils.mock_tgi_client import MockTGIInferenceClient, MockAsyncTGIInferenceClient YES_NO_GRAMMAR = """ ?start: answer answer: "yes" | "no" """ # If the TGI_SERVER_URL environment variable is set, use the real TGI server # Otherwise, use the mock server tgi_server_url = os.environ.get("TGI_SERVER_URL") if tgi_server_url: tgi_client = InferenceClient(tgi_server_url) async_tgi_client = AsyncInferenceClient(tgi_server_url) else: warnings.warn("No TGI server URL provided, using mock server") tgi_client = MockTGIInferenceClient() async_tgi_client = MockAsyncTGIInferenceClient() mock_responses = [ ( { 'prompt': 'Respond with a single word.', 'max_new_tokens': 10, }, "foo" ), ( { 'prompt': 'Respond with a single word.', 'max_new_tokens': 10, 'stream': True }, ["foo", "bar"] ), ( { 'prompt': 'foo?', 'max_new_tokens': 10, 'grammar': { 'type': 'json', 'value': { 'type': 'object', 'properties': { 'bar': {'type': 'string'} }, 'required': ['bar'] } } }, '{"foo": "bar"}' ), ( { 'prompt': 'foo?', 'max_new_tokens': 10, 'grammar': { 'type': 'regex', 'value': '([0-9]{3})', }, }, "123" ), ] # If the TGI_SERVER_URL environment variable is not set, add the mock # responses to the mock clients if not tgi_server_url: async_tgi_client.add_mock_responses(mock_responses) tgi_client.add_mock_responses(mock_responses) @pytest.fixture def sync_model(): return TGI(tgi_client) @pytest.fixture def async_model(): return AsyncTGI(async_tgi_client) def test_tgi_init(): model = from_tgi( InferenceClient("http://localhost:11434"), ) assert isinstance(model, TGI) model = from_tgi( AsyncInferenceClient("http://localhost:11434"), ) assert isinstance(model, AsyncTGI) with pytest.raises(ValueError, match="Unsupported client type"): from_tgi("foo") def test_tgi_sync_simple_call(sync_model): result = sync_model("Respond with a single word.", max_new_tokens=10) assert isinstance(result, str) def test_tgi_sync_streaming(sync_model): result = sync_model.stream( "Respond with a single word.", max_new_tokens=10, ) assert isinstance(result, Generator) assert isinstance(next(result), str) def test_tgi_sync_batch(sync_model): with pytest.raises(NotImplementedError, match="does not support"): sync_model.batch( ["Respond with one word.", "Respond with one word."], ) def test_tgi_sync_json(sync_model): json_string = '{"type": "object", "properties": {"bar": {"type": "string"}}, "required": ["bar"]}' result = sync_model("foo?", JsonSchema(json_string), max_new_tokens=10) assert isinstance(result, str) assert "bar" in result def test_tgi_sync_regex(sync_model): result = sync_model("foo?", Regex(r"[0-9]{3}"), max_new_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) def test_tgi_sync_cfg(sync_model): with pytest.raises( NotImplementedError, match="TGI does not support CFG-based structured outputs", ): sync_model("foo?", CFG(YES_NO_GRAMMAR), max_new_tokens=10) @pytest.mark.asyncio async def test_tgi_async_simple_call(async_model): result = await async_model("Respond with a single word.", max_new_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio async def test_tgi_async_streaming(async_model): result = async_model.stream("Respond with a single word.", max_new_tokens=10) assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk @pytest.mark.asyncio async def test_tgi_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch( ["Respond with one word.", "Respond with one word."], ) @pytest.mark.asyncio async def test_tgi_async_json(async_model): json_string = '{"type": "object", "properties": {"bar": {"type": "string"}}, "required": ["bar"]}' result = await async_model("foo?", JsonSchema(json_string), max_new_tokens=10) assert isinstance(result, str) assert "bar" in result @pytest.mark.asyncio async def test_tgi_async_regex(async_model): result = await async_model("foo?", Regex(r"[0-9]{3}"), max_new_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) @pytest.mark.asyncio async def test_tgi_async_cfg(async_model): with pytest.raises( NotImplementedError, match="TGI does not support CFG-based structured outputs", ): await async_model("foo?", CFG(YES_NO_GRAMMAR), max_new_tokens=10) ================================================ FILE: tests/models/test_tgi_model_adapter.py ================================================ import json import pytest from outlines.models.tgi import TGITypeAdapter from outlines.types import CFG, JsonSchema CFG_STRING = """ ?start: expr ?expr: NUMBER """ JSON_SCHEMA_STRING = """ { "type": "object", "properties": { "answer": {"type": "number"} } } """ @pytest.fixture def type_adapter(): return TGITypeAdapter() @pytest.fixture def cfg_instance(): return CFG(CFG_STRING) @pytest.fixture def json_schema_instance(): return JsonSchema(JSON_SCHEMA_STRING) @pytest.fixture def json_schema_whitespace_instance(): return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern="\n") def test_tgi_type_adapter_input_text(type_adapter): message = "prompt" assert message == type_adapter.format_input(message) def test_tgi_type_adapter_input_invalid(type_adapter): with pytest.raises( NotImplementedError, match="is not available with TGI", ): type_adapter.format_input({"foo": "bar"}) def test_tgi_type_adapter_output_type( type_adapter, json_schema_instance, json_schema_whitespace_instance, ): assert type_adapter.format_output_type(None) == {} assert type_adapter.format_output_type(json_schema_instance) == { "grammar": { "type": "json", "value": json.loads(JSON_SCHEMA_STRING), } } # whitespace_pattern is ignored assert type_adapter.format_output_type(json_schema_whitespace_instance) == { "grammar": { "type": "json", "value": json.loads(JSON_SCHEMA_STRING), } } assert type_adapter.format_output_type(int) == { "grammar": { "type": "regex", "value": "([+-]?(0|[1-9][0-9]*))", } } def test_tgi_type_adapter_output_type_invalid( type_adapter, cfg_instance, ): with pytest.raises( NotImplementedError, match="TGI does not support CFG-based structured outputs.", ): type_adapter.format_output_type(cfg_instance) ================================================ FILE: tests/models/test_tokenizer.py ================================================ import pytest from outlines.models.tokenizer import Tokenizer, _check_hf_chat_template def test_tokenizer(): with pytest.raises(TypeError, match="instantiate abstract"): Tokenizer() def test_check_hf_chat_template(): from transformers import AutoTokenizer assert _check_hf_chat_template(AutoTokenizer.from_pretrained("openai-community/gpt2")) is False assert _check_hf_chat_template(AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")) is True ================================================ FILE: tests/models/test_transformers.py ================================================ import re from enum import Enum from pydantic import BaseModel import pytest import torch import transformers import outlines from outlines.inputs import Chat from outlines.models.transformers import ( Transformers, TransformerTokenizer, TransformersTypeAdapter, ) from outlines.types import Regex TEST_MODEL = "erwanf/gpt2-mini" TEST_MODEL_MAMBA = "hf-internal-testing/tiny-random-MambaForCausalLM" TEST_MODEL_BART = "trl-internal-testing/tiny-BartModel" def test_transformers_instantiate_invalid(): with pytest.raises(ValueError): outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL), int, ) def test_transformers_instantiate_simple(): model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL), transformers.AutoTokenizer.from_pretrained(TEST_MODEL), ) assert isinstance(model, Transformers) assert isinstance(model.tokenizer, TransformerTokenizer) assert isinstance(model.type_adapter, TransformersTypeAdapter) assert model.tensor_library_name == "torch" def test_transformers_instantiate_mamba(): model = outlines.from_transformers( transformers.MambaForCausalLM.from_pretrained(TEST_MODEL_MAMBA), transformers.AutoTokenizer.from_pretrained(TEST_MODEL), ) assert isinstance(model, Transformers) def test_transformers_instantiate_tokenizer_kwargs_dtype(): model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL), transformers.AutoTokenizer.from_pretrained( TEST_MODEL, additional_special_tokens=["", ""] ), device_dtype=torch.bfloat16, ) assert "" in model.tokenizer.special_tokens assert "" in model.tokenizer.special_tokens assert model.device_dtype == torch.bfloat16 @pytest.fixture def model(): model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained(TEST_MODEL), transformers.AutoTokenizer.from_pretrained(TEST_MODEL), ) chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}' model.type_adapter.tokenizer.chat_template = chat_template return model @pytest.fixture def model_bart(): model = outlines.from_transformers( transformers.BartForConditionalGeneration.from_pretrained(TEST_MODEL_BART), transformers.BartTokenizer.from_pretrained(TEST_MODEL_BART), ) return model def test_transformers_simple(model): result = model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) def test_transformers_call(model, model_bart): result = model("Respond with one word. Not more.") assert isinstance(result, str) model.device_dtype = torch.bfloat16 result = model("Respond with one word. Not more.") assert isinstance(result, str) result = model_bart("Respond with one word. Not more.") assert isinstance(result, str) def test_transformers_chat(model): result = model( Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is the capital of France?"}, ]) ) assert isinstance(result, str) def test_transformers_inference_kwargs(model): result = model("Respond with one word. Not more.", max_new_tokens=100) assert isinstance(result, str) def test_transformers_invalid_inference_kwargs(model): with pytest.raises(ValueError): model("Respond with one word. Not more.", foo="bar") def test_transformers_regex(model): result = model("Give a number between 0 and 9.", Regex(r"[0-9]")) assert isinstance(result, str) assert re.match(r"[0-9]", result) def test_transformers_json(model): class Character(BaseModel): name: str result = model("Create a character with a name.", Character) assert "name" in result def test_transformers_choice(model): class Foo(Enum): cat = "cat" dog = "dog" result = model("Cat or dog?", Foo) assert result in ["cat", "dog"] def test_transformers_multiple_samples(model): result = model("Respond with one word. Not more.") assert isinstance(result, str) result = model( "Respond with one word. Not more.", num_return_sequences=2, do_sample=True ) assert isinstance(result, list) assert len(result) == 2 def test_transformers_batch(model): result = model.batch( ["Respond with one word. Not more.", "Respond with one word. Not more."] ) assert isinstance(result, list) assert len(result) == 2 result = model.batch( ["Respond with one word. Not more.", "Respond with one word. Not more."], num_return_sequences=2, do_sample=True, ) assert isinstance(result, list) assert len(result) == 2 for item in result: assert isinstance(item, list) assert len(item) == 2 result = model.batch( [ Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is the capital of France?"}, ]), Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is the capital of France?"}, ]), ], ) assert isinstance(result, list) assert len(result) == 2 def test_transformers_multiple_samples_constrained(model): class Foo(Enum): cat = "cat" dog = "dog" result = model("Cat or dog?", Foo, num_return_sequences=2, do_sample=True) assert isinstance(result, list) assert len(result) == 2 assert result[0] in ["cat", "dog"] assert result[1] in ["cat", "dog"] def test_transformers_batch_constrained(model): class Foo(Enum): cat = "cat" dog = "dog" result = model.batch( ["Cat or dog?", "Cat or dog?"], Foo, ) assert isinstance(result, list) assert len(result) == 2 assert result[0] in ["cat", "dog"] assert result[1] in ["cat", "dog"] result = model.batch( ["Cat or dog?", "Cat or dog?"], Foo, num_return_sequences=2, do_sample=True, ) assert isinstance(result, list) assert len(result) == 2 for item in result: assert isinstance(item, list) assert len(item) == 2 assert item[0] in ["cat", "dog"] assert item[1] in ["cat", "dog"] def test_transformers_streaming(model): with pytest.raises(NotImplementedError, match="Streaming is not implemented"): model.stream("Respond with one word. Not more.") @pytest.mark.parametrize( "model_name", [ TEST_MODEL, "HuggingFaceTB/SmolLM2-135M" ], ) def test_transformers_parametrized_smoke(model_name): """ Smoke test to ensure basic constrained generation works across different tokenizers. """ hf_model = transformers.AutoModelForCausalLM.from_pretrained(model_name) hf_model.eval() hf_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = outlines.from_transformers(hf_model, hf_tokenizer) prompt = "Is 1+1=2? Answer Yes or No:" constraint = Regex(r"\s*(Yes|No)") out = model( prompt, constraint, max_new_tokens=5, do_sample=False, ) assert out.strip() in {"Yes", "No"} ================================================ FILE: tests/models/test_transformers_multimodal.py ================================================ # we only test vision models here as audio models are too heavy to run on CI import io import re import torch from enum import Enum import pytest from PIL import Image as PILImage from pydantic import BaseModel from transformers import ( LlavaForConditionalGeneration, AutoProcessor, ) import outlines from outlines.inputs import Chat, Image from outlines.models.transformers import ( TransformersMultiModal, TransformerTokenizer, TransformersMultiModalTypeAdapter, ) from outlines.types import Regex TEST_MODEL = "trl-internal-testing/tiny-LlavaForConditionalGeneration" @pytest.fixture def image(): width, height = 256, 256 blue_background = (0, 0, 255) image = PILImage.new("RGB", (width, height), blue_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image @pytest.fixture def model(): model = outlines.from_transformers( LlavaForConditionalGeneration.from_pretrained(TEST_MODEL), AutoProcessor.from_pretrained(TEST_MODEL), ) return model def test_transformers_multimodal_instantiate(): model = outlines.from_transformers( LlavaForConditionalGeneration.from_pretrained(TEST_MODEL), AutoProcessor.from_pretrained(TEST_MODEL), device_dtype=torch.bfloat16, ) assert isinstance(model, TransformersMultiModal) assert isinstance(model.tokenizer, TransformerTokenizer) assert isinstance(model.type_adapter, TransformersMultiModalTypeAdapter) assert model.tensor_library_name == "torch" assert model.device_dtype == torch.bfloat16 def test_transformers_multimodal_simple(model, image): result = model.generate( ["Describe this image in one sentence:", Image(image)], None, max_new_tokens=2, ) assert isinstance(result, str) def test_transformers_multimodal_call(model, image): result = model( ["Describe this image in one sentence:", Image(image)], max_new_tokens=2, ) assert isinstance(result, str) model.device_dtype = torch.bfloat16 result = model( ["Describe this image in one sentence:", Image(image)], max_new_tokens=2, ) assert isinstance(result, str) def test_transformers_multimodal_wrong_number_image(model, image): with pytest.raises(ValueError): model( [ "Describe this image in one sentence:", Image(image), Image(image), ], ) def test_transformers_multimodal_wrong_input_type(model): with pytest.raises(TypeError): model.generate("invalid input", None) def test_transformers_multimodal_chat(model, image): result = model( Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ "Describe this image in one sentence:", Image(image), ], }, ]), max_new_tokens=2, ) assert isinstance(result, str) result = model( Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ {"type": "text", "text": "Describe this image in one sentence:"}, {"type": "image", "image": Image(image)}, ], }, ]), max_new_tokens=2, ) assert isinstance(result, str) def test_transformers_inference_kwargs(model, image): result = model( ["Describe this image in one sentence:", Image(image)], max_new_tokens=2, ) assert isinstance(result, str) def test_transformers_invalid_inference_kwargs(model, image): with pytest.raises(ValueError): model( [ "Describe this image in one sentence:", Image(image), ], foo="bar", ) def test_transformers_several_image(model, image): result = model( [ "Describe this image in one sentence:", Image(image), Image(image), ], max_new_tokens=2, ) assert isinstance(result, str) def test_transformers_multimodal_json(model, image): class Foo(BaseModel): name: str result = model( ["Give the name of the color.", Image(image)], Foo, max_new_tokens=10, ) assert "name" in result def test_transformers_multimodal_regex(model, image): result = model( ["How warn is the color from 0 to 9?", Image(image)], Regex(r"[0-9]") ) assert isinstance(result, str) assert re.match(r"[0-9]", result) def test_transformers_multimodal_choice(model, image): class Foo(Enum): white = "white" blue = "blue" result = model( ["Is it a white or a blue?", Image(image)], Foo, ) assert isinstance(result, str) assert result in ["white", "blue"] def test_transformers_multimodal_multiple_samples(model, image): result = model( ["Describe this image in one sentence.", Image(image)], num_return_sequences=2, num_beams=2, max_new_tokens=2, ) assert isinstance(result, list) assert len(result) == 2 def test_transformers_multimodal_batch(model, image): result = model.batch( [ ["Describe this image in one sentence.", Image(image)], ["Describe this image in one sentence.", Image(image)], ], max_new_tokens=2, ) assert isinstance(result, list) assert len(result) == 2 result = model.batch( [ ["Describe this image in one sentence.", Image(image), Image(image)], ["Describe this image in one sentence.", Image(image), Image(image)], ], num_return_sequences=2, num_beams=2, max_new_tokens=2, ) assert isinstance(result, list) assert len(result) == 2 for item in result: assert isinstance(item, list) assert len(item) == 2 result = model.batch( [ Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ "Describe this image in one sentence:", Image(image), ], }, ]), Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ "Describe this image in one sentence:", Image(image), ], }, ]), ], max_new_tokens=2, ) assert isinstance(result, list) assert len(result) == 2 result = model.batch( [ Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ {"type": "text", "text": "Describe this image in one sentence:"}, {"type": "image", "image": Image(image)}, ], }, ]), Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ {"type": "text", "text": "Describe this image in one sentence:"}, {"type": "image", "image": Image(image)}, ], }, ]), ], max_new_tokens=2, ) assert isinstance(result, list) assert len(result) == 2 ================================================ FILE: tests/models/test_transformers_multimodal_type_adapter.py ================================================ import pytest from PIL import Image as PILImage from outlines_core import Index, Vocabulary from transformers import ( AutoProcessor, LogitsProcessorList, ) from outlines.inputs import Audio, Chat, Image, Video from outlines.models.transformers import TransformersMultiModalTypeAdapter from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor MODEL_NAME = "trl-internal-testing/tiny-LlavaForConditionalGeneration" @pytest.fixture def adapter(): processor = AutoProcessor.from_pretrained(MODEL_NAME) tokenizer = processor.tokenizer type_adapter = TransformersMultiModalTypeAdapter(tokenizer=tokenizer) return type_adapter @pytest.fixture def logits_processor(): vocabulary = Vocabulary.from_pretrained("openai-community/gpt2") index = Index(r"[0-9]{3}", vocabulary) return OutlinesCoreLogitsProcessor(index, "torch") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) image.format = "PNG" return image @pytest.fixture def video(): # Simple mock video data return "mock_video_data" @pytest.fixture def audio(): # Simple mock audio data return "mock_audio_data" def test_transformers_multimodal_type_adapter_format_input(adapter, image): with pytest.raises(TypeError): adapter.format_input("hello") with pytest.raises(TypeError): adapter.format_input({"foo": "bar"}) with pytest.raises(ValueError, match="All assets must be of the same type"): adapter.format_input(["foo", Image(image), Video("")]) class MockAsset: pass with pytest.raises(ValueError, match="Unsupported asset type"): adapter.format_input(["foo", MockAsset()]) image_asset = Image(image) assert adapter.format_input(["foo", image_asset]) == { "text": "foo", "images": [image_asset.image], } chat_prompt = Chat(messages=[ {"role": "system", "content": "foo"}, {"role": "user", "content": ["bar", image_asset]}, ]) result = adapter.format_input(chat_prompt) assert isinstance(result, dict) assert isinstance(result["text"], str) assert isinstance(result["images"], list) assert len(result["images"]) == 1 assert result["images"][0] == image_asset.image chat_prompt = Chat(messages=[ {"role": "system", "content": "foo"}, {"role": "user", "content": [{"type": "text", "text": "bar"}, {"type": "image", "image": image_asset}]}, ]) result = adapter.format_input(chat_prompt) assert isinstance(result, dict) assert isinstance(result["text"], str) assert isinstance(result["images"], list) assert len(result["images"]) == 1 assert result["images"][0] == image_asset.image def test_transformers_multimodal_type_adapter_format_input_empty_assets(adapter): result = adapter.format_input(["Just text prompt"]) assert result == {"text": "Just text prompt"} def test_transformers_multimodal_type_adapter_format_input_chat_invalid_asset_type(adapter, image): class MockAsset: pass chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "Hello"}, {"type": "image", "image": MockAsset()} # Wrong type ]} ]) with pytest.raises(ValueError, match="Assets must be of type"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_format_input_chat_unsupported_content_type(adapter): chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "Hello"}, {"type": "unsupported", "data": "some_data"} # Unsupported type ]} ]) with pytest.raises(ValueError, match="Content must be 'text'"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_format_output_type( adapter, logits_processor ): formatted = adapter.format_output_type(logits_processor) assert isinstance(formatted, LogitsProcessorList) assert len(formatted) == 1 assert formatted[0].index == logits_processor.index assert formatted[0].tensor_library_name == logits_processor.tensor_library_name formatted = adapter.format_output_type(None) assert formatted is None def test_transformers_multimodal_type_adapter_format_input_chat_missing_asset_key(adapter, image): image_asset = Image(image) # Test missing 'image' key when type is 'image' chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "What's in this image?"}, {"type": "image", "txt": image_asset} # Wrong key: 'txt' instead of 'image' ]} ]) with pytest.raises(ValueError, match="Item with type 'image' must contain a 'image' key"): adapter.format_input(chat_prompt) # Test missing 'video' key when type is 'video' video_asset = Video("dummy_video") chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "What's in this video?"}, {"type": "video", "vid": video_asset} # Wrong key: 'vid' instead of 'video' ]} ]) with pytest.raises(ValueError, match="Item with type 'video' must contain a 'video' key"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_format_input_chat_missing_type_key(adapter, image): image_asset = Image(image) chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"text": "What's in this image?"}, # Missing 'type' key {"type": "image", "image": image_asset} ]} ]) with pytest.raises(ValueError, match="Each item in the content list must be a dictionary with a 'type' key"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_format_input_invalid_content_type(adapter): chat_prompt = Chat(messages=[ {"role": "user", "content": 42} # Invalid content type (integer) ]) with pytest.raises(ValueError, match="Invalid content type"): adapter.format_input(chat_prompt) # Test with another invalid type chat_prompt = Chat(messages=[ {"role": "user", "content": {"invalid": "dict"}} # Invalid content type (dict not in list) ]) with pytest.raises(ValueError, match="Invalid content type"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_format_asset_for_template(adapter, image, video, audio): # Test Image asset image_asset = Image(image) formatted_image = adapter._format_asset_for_template(image_asset) assert formatted_image == {"type": "image", "image": image_asset} # Test Video asset video_asset = Video(video) formatted_video = adapter._format_asset_for_template(video_asset) assert formatted_video == {"type": "video", "video": video_asset} # Test Audio asset audio_asset = Audio(audio) formatted_audio = adapter._format_asset_for_template(audio_asset) assert formatted_audio == {"type": "audio", "audio": audio_asset} def test_transformers_multimodal_type_adapter_format_asset_for_template_invalid_type(adapter): class MockUnsupportedAsset: pass # This test requires accessing the private method directly since the error # would normally be caught earlier in the validation chain unsupported_asset = MockUnsupportedAsset() with pytest.raises(ValueError, match="Assets must be of type `Image`, `Video` or `Audio`"): adapter._format_asset_for_template(unsupported_asset) def test_transformers_multimodal_type_adapter_multiple_assets_in_single_item(adapter, image): image_asset = Image(image) video_asset = Video("dummy_video") chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "What's in this?"}, {"type": "image", "image": image_asset, "video": video_asset} # Multiple asset types ]} ]) with pytest.raises(ValueError, match="Found item with multiple keys:"): adapter.format_input(chat_prompt) def test_transformers_multimodal_type_adapter_correct_multiple_assets_usage(adapter, image): image_asset1 = Image(image) image_asset2 = Image(image) # Correct way: separate dictionary items for each asset chat_prompt = Chat(messages=[ {"role": "user", "content": [ {"type": "text", "text": "What's in these images?"}, {"type": "image", "image": image_asset1}, {"type": "image", "image": image_asset2} ]} ]) result = adapter.format_input(chat_prompt) assert isinstance(result, dict) assert "text" in result assert "images" in result assert len(result["images"]) == 2 ================================================ FILE: tests/models/test_transformers_tokenizer.py ================================================ import pytest import transformers from outlines.models.transformers import ( get_llama_tokenizer_types, TransformerTokenizer, ) TEST_MODEL = "erwanf/gpt2-mini" TEST_MODEL_SEQ2SEQ = "hf-internal-testing/tiny-random-t5" @pytest.fixture def tokenizer(): return transformers.AutoTokenizer.from_pretrained(TEST_MODEL) @pytest.fixture def tokenizer_no_pad_token_id(tokenizer): tokenizer.pad_token_id = None return tokenizer @pytest.fixture def tokenizer_seq2seq(): return transformers.AutoTokenizer.from_pretrained(TEST_MODEL_SEQ2SEQ) @pytest.fixture def transformer_tokenizer(tokenizer): return TransformerTokenizer(tokenizer) @pytest.fixture def another_transformer_tokenizer(tokenizer): return TransformerTokenizer(tokenizer) @pytest.fixture def transformer_tokenizer_seq2seq(tokenizer_seq2seq): return TransformerTokenizer(tokenizer_seq2seq) def test_get_llama_tokenizer_types(): tokenizer_types = get_llama_tokenizer_types() assert tokenizer_types[0] is transformers.models.llama.LlamaTokenizer assert tokenizer_types[1] is transformers.models.llama.LlamaTokenizerFast assert tokenizer_types[2] is transformers.models.code_llama.CodeLlamaTokenizer assert tokenizer_types[3] is transformers.models.code_llama.CodeLlamaTokenizerFast def test_transformer_tokenizer_init( tokenizer, tokenizer_no_pad_token_id ): # tokenizer with a pad_token_id transformer_tokenizer = TransformerTokenizer(tokenizer) assert transformer_tokenizer.tokenizer == tokenizer assert transformer_tokenizer.eos_token_id == tokenizer.eos_token_id assert transformer_tokenizer.pad_token_id == tokenizer.pad_token_id assert transformer_tokenizer.special_tokens == set(tokenizer.all_special_tokens) assert transformer_tokenizer.vocabulary == tokenizer.get_vocab() # tokenizer with no pad_token_id transformer_tokenizer_no_pad_token_id = TransformerTokenizer(tokenizer_no_pad_token_id) assert transformer_tokenizer_no_pad_token_id.tokenizer == tokenizer_no_pad_token_id assert transformer_tokenizer_no_pad_token_id.eos_token_id == tokenizer_no_pad_token_id.eos_token_id assert transformer_tokenizer_no_pad_token_id.pad_token_id == tokenizer_no_pad_token_id.eos_token_id assert transformer_tokenizer_no_pad_token_id.special_tokens == set(tokenizer_no_pad_token_id.all_special_tokens) assert transformer_tokenizer_no_pad_token_id.vocabulary == tokenizer_no_pad_token_id.get_vocab() def test_transformer_tokenizer_encode(transformer_tokenizer): input_ids, attention_mask = transformer_tokenizer.encode("Hello, world!") assert input_ids is not None assert attention_mask is not None assert input_ids.shape == attention_mask.shape def test_transformer_tokenizer_decode(transformer_tokenizer): input_ids, _ = transformer_tokenizer.encode("Hello, world!") decoded_text = transformer_tokenizer.decode(input_ids) assert isinstance(decoded_text, list) assert "Hello, world!" in decoded_text[0] def test_transformer_tokenizer_convert_token_to_string(transformer_tokenizer): # regular transformer_tokenizer.is_llama = False token = transformer_tokenizer.tokenizer.tokenize("Hello")[0] string = transformer_tokenizer.convert_token_to_string(token) assert isinstance(string, str) assert "Hello" in string # is_llama + <0x20> transformer_tokenizer.is_llama = True string = transformer_tokenizer.convert_token_to_string("<0x20>") assert isinstance(string, str) assert " " in string def test_transformer_tokenizer_eq( transformer_tokenizer, another_transformer_tokenizer, transformer_tokenizer_seq2seq, ): # different types of object assert transformer_tokenizer.__eq__(1) == NotImplemented # regular case assert transformer_tokenizer == another_transformer_tokenizer assert transformer_tokenizer != transformer_tokenizer_seq2seq # with model name and kwargs attributes transformer_tokenizer.model_name = "foo" transformer_tokenizer.kwargs = {"foo": "bar"} another_transformer_tokenizer.model_name = "foo" another_transformer_tokenizer.kwargs = {"foo": "bar"} assert transformer_tokenizer == another_transformer_tokenizer def test_transformer_tokenizer_hash( transformer_tokenizer, another_transformer_tokenizer, transformer_tokenizer_seq2seq, ): assert isinstance(hash(transformer_tokenizer), int) assert hash(transformer_tokenizer) == hash(another_transformer_tokenizer) assert hash(transformer_tokenizer) != hash(transformer_tokenizer_seq2seq) def test_transformer_tokenizer_getstate_setstate( transformer_tokenizer, another_transformer_tokenizer, ): state = transformer_tokenizer.__getstate__() assert "tokenizer" in state another_transformer_tokenizer.__setstate__(state) assert another_transformer_tokenizer == transformer_tokenizer ================================================ FILE: tests/models/test_transformers_type_adapter.py ================================================ import io import pytest import transformers from transformers import LogitsProcessorList from outlines_core import Index, Vocabulary from PIL import Image as PILImage from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor from outlines.inputs import Chat, Image from outlines.models.transformers import TransformersTypeAdapter MODEL_NAME = "erwanf/gpt2-mini" @pytest.fixture def adapter(): tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) type_adapter = TransformersTypeAdapter(tokenizer=tokenizer) chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}' type_adapter.tokenizer.chat_template = chat_template return type_adapter @pytest.fixture def logits_processor(): vocabulary = Vocabulary.from_pretrained("openai-community/gpt2") index = Index(r"[0-9]{3}", vocabulary) return OutlinesCoreLogitsProcessor(index, "torch") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_transformers_type_adapter_format_input(adapter, image): # invalid input with pytest.raises(TypeError, match="is not available."): adapter.format_input(["prompt", Image(image)]) # string with chat template # The fixture sets a chat template, so it should be formatted adapter.has_chat_template = True assert adapter.format_input("Hello, world!") == "user: Hello, world!" # string without chat template adapter.has_chat_template = False assert adapter.format_input("Hello, world!") == "Hello, world!" # chat # Restore chat template for chat test adapter.has_chat_template = True assert isinstance(adapter.format_input(Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ])), str) def test_transformers_type_adapter_format_output_type( adapter, logits_processor ): formatted = adapter.format_output_type(logits_processor) assert isinstance(formatted, LogitsProcessorList) assert formatted[0].index == logits_processor.index assert formatted[0].tensor_library_name == logits_processor.tensor_library_name formatted = adapter.format_output_type(None) assert formatted is None ================================================ FILE: tests/models/test_utils.py ================================================ from outlines.models.utils import set_additional_properties_false_json_schema def test_set_additional_properties_false_json_schema(): # additionalProperties is not set schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, "required": ["name"], } modified_schema = set_additional_properties_false_json_schema(schema) target_schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, "required": ["name"], "additionalProperties": False, } assert modified_schema == target_schema # additionalProperties is set to False schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, "required": ["name"], "additionalProperties": False, } modified_schema = set_additional_properties_false_json_schema(schema) assert modified_schema == schema # additionalProperties is set to True schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, }, "required": ["name"], "additionalProperties": True, } modified_schema = set_additional_properties_false_json_schema(schema) assert modified_schema == schema ================================================ FILE: tests/models/test_vllm.py ================================================ import io import os import re import warnings import base64 from typing import AsyncGenerator, Generator import pytest from PIL import Image as PILImage from openai import AsyncOpenAI, OpenAI from outlines.inputs import Chat, Image from outlines.models.vllm import VLLM, AsyncVLLM, from_vllm from outlines.types.dsl import CFG, Regex, JsonSchema from tests.test_utils.mock_openai_client import MockOpenAIClient, MockAsyncOpenAIClient YES_NO_GRAMMAR = """ ?start: answer answer: "yes" | "no" """ # Image for testing width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) image_input = Image(image) # If the VLLM_SERVER_URL environment variable is set, use the real vLLM server # Otherwise, use the mock server vllm_server_url = os.environ.get("VLLM_SERVER_URL") vllm_model_name = os.environ.get( "VLLM_MODEL_NAME", "Qwen/Qwen2.5-VL-3B-Instruct" ) if vllm_server_url: openai_client = OpenAI(base_url=vllm_server_url, api_key="foo") async_openai_client = AsyncOpenAI(base_url=vllm_server_url, api_key="foo") else: warnings.warn("No VLLM server URL provided, using mock server") openai_client = MockOpenAIClient() async_openai_client = MockAsyncOpenAIClient() mock_responses = [ ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'model': vllm_model_name, }, "foo" ), ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'model': vllm_model_name, 'stream': True }, ["foo", "bar"] ), ( { 'messages': [ {'role': "user", 'content': 'Respond with a single word.'} ], 'n': 2, 'model': vllm_model_name, }, ["foo", "bar"] ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': vllm_model_name, 'max_tokens': 10, 'extra_body': { 'guided_json': { 'type': 'object', 'properties': { 'bar': {'type': 'string'} } }, } }, '{"foo": "bar"}' ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': vllm_model_name, 'max_tokens': 10, 'extra_body': { 'guided_regex': '([0-9]{3})', }, }, "123" ), ( { 'messages': [{'role': "user", 'content': 'foo?'}], 'model': vllm_model_name, 'max_tokens': 10, 'extra_body': { 'guided_grammar': YES_NO_GRAMMAR, }, }, "yes" ), ( { 'messages': [ { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ] } ], 'model': vllm_model_name, 'max_tokens': 10, }, "foo" ), ( { 'messages': [ {"role": "system", "content": "prompt"}, { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], }, {"role": "assistant", "content": "response"}, ], 'model': vllm_model_name, 'max_tokens': 10, }, "foo" ) ] # If the VLLM_SERVER_URL environment variable is not set, add the mock # responses to the mock clients if not vllm_server_url: async_openai_client.add_mock_responses(mock_responses) openai_client.add_mock_responses(mock_responses) @pytest.fixture def sync_model(): return VLLM(openai_client, vllm_model_name) @pytest.fixture def sync_model_no_model_name(): return VLLM(openai_client) @pytest.fixture def async_model(): return AsyncVLLM(async_openai_client, vllm_model_name) @pytest.fixture def async_model_no_model_name(): return AsyncVLLM(async_openai_client) def test_vllm_init(): # We do not rely on the mock server here because we need an object # of type OpenAI and AsyncOpenAI to test the init function. openai_client = OpenAI(base_url="http://localhost:11434", api_key="foo") async_openai_client = AsyncOpenAI(base_url="http://localhost:11434", api_key="foo") # Sync with model name model = from_vllm(openai_client, vllm_model_name) assert isinstance(model, VLLM) assert model.client == openai_client assert model.model_name == vllm_model_name # Sync without model name model = from_vllm(openai_client) assert isinstance(model, VLLM) assert model.client == openai_client assert model.model_name is None # Async with model name model = from_vllm(async_openai_client, vllm_model_name) assert isinstance(model, AsyncVLLM) assert model.client == async_openai_client assert model.model_name == vllm_model_name # Async without model name model = from_vllm(async_openai_client) assert isinstance(model, AsyncVLLM) assert model.client == async_openai_client assert model.model_name is None with pytest.raises(ValueError, match="Unsupported client type"): from_vllm("foo") def test_vllm_sync_simple_call(sync_model): result = sync_model("Respond with a single word.",) assert isinstance(result, str) def test_vllm_sync_streaming(sync_model_no_model_name): result = sync_model_no_model_name.stream( "Respond with a single word.", model=vllm_model_name, ) assert isinstance(result, Generator) assert isinstance(next(result), str) def test_vllm_sync_batch(sync_model): with pytest.raises(NotImplementedError, match="does not support"): sync_model.batch( ["Respond with one word.", "Respond with one word."], ) def test_vllm_sync_vision(sync_model): result = sync_model(["hello", image_input], max_tokens=10) assert isinstance(result, str) def test_vllm_sync_vision_chat(sync_model): result = sync_model( Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]), max_tokens=10, ) assert isinstance(result, str) def test_vllm_sync_multiple_samples(sync_model): result = sync_model("Respond with a single word.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) def test_vllm_sync_json(sync_model): json_string = '{"type": "object", "properties": {"bar": {"type": "string"}}}' result = sync_model("foo?", JsonSchema(json_string), max_tokens=10) assert isinstance(result, str) assert "bar" in result def test_vllm_sync_regex(sync_model): result = sync_model("foo?", Regex(r"[0-9]{3}"), max_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) def test_vllm_sync_cfg(sync_model): result = sync_model("foo?", CFG(YES_NO_GRAMMAR), max_tokens=10) assert isinstance(result, str) assert result in ["yes", "no"] @pytest.mark.asyncio async def test_vllm_async_simple_call(async_model): result = await async_model("Respond with a single word.",) assert isinstance(result, str) @pytest.mark.asyncio async def test_vllm_async_streaming(async_model_no_model_name): result = async_model_no_model_name.stream( "Respond with a single word.", model=vllm_model_name, ) assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk @pytest.mark.asyncio async def test_vllm_async_batch(async_model): with pytest.raises(NotImplementedError, match="does not support"): await async_model.batch( ["Respond with one word.", "Respond with one word."], ) @pytest.mark.asyncio async def test_vllm_async_vision(async_model): result = await async_model(["hello", image_input], max_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio async def test_vllm_async_vision_chat(async_model): result = await async_model( Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]), max_tokens=10, ) assert isinstance(result, str) @pytest.mark.asyncio async def test_vllm_async_multiple_samples(async_model): result = await async_model("Respond with a single word.", n=2) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], str) assert isinstance(result[1], str) @pytest.mark.asyncio async def test_vllm_async_json(async_model): json_string = '{"type": "object", "properties": {"bar": {"type": "string"}}}' result = await async_model("foo?", JsonSchema(json_string), max_tokens=10) assert isinstance(result, str) assert "bar" in result @pytest.mark.asyncio async def test_vllm_async_regex(async_model): result = await async_model("foo?", Regex(r"[0-9]{3}"), max_tokens=10) assert isinstance(result, str) assert re.match(r"[0-9]{3}", result) @pytest.mark.asyncio async def test_vllm_async_cfg(async_model): result = await async_model("foo?", CFG(YES_NO_GRAMMAR), max_tokens=10) assert isinstance(result, str) assert result in ["yes", "no"] ================================================ FILE: tests/models/test_vllm_offline.py ================================================ import io import re from enum import Enum import pytest from PIL import Image as PILImage from pydantic import BaseModel try: from vllm import LLM, SamplingParams HAS_VLLM = True except ImportError: HAS_VLLM = False import outlines from outlines.inputs import Chat from outlines.models.vllm_offline import ( VLLMOffline, VLLMOfflineTypeAdapter, from_vllm_offline ) from outlines.types import Regex TEST_MODEL = "microsoft/Phi-3-mini-4k-instruct" pytestmark = pytest.mark.skipif( not HAS_VLLM, reason="vLLM models can only be run on GPU." ) @pytest.fixture(scope="session") def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_vllm_model_initialization(): model = from_vllm_offline(LLM(TEST_MODEL)) assert isinstance(model, VLLMOffline) assert isinstance(model.model, LLM) assert isinstance(model.type_adapter, VLLMOfflineTypeAdapter) @pytest.fixture(scope="session") def model(tmp_path_factory): model = outlines.from_vllm_offline(LLM(TEST_MODEL)) return model def test_vllm_simple(model): result = model.generate("Respond with one word. Not more.", None) assert isinstance(result, str) def test_vllm_call(model): result = model("Respond with one word. Not more.") assert isinstance(result, str) def test_vllm_inference_kwargs(model): result = model( "Write a short story about a cat.", sampling_params=SamplingParams(max_tokens=2), use_tqdm=True ) assert isinstance(result, str) assert len(result) <= 20 def test_vllm_chat(model): result = model( Chat(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is the capital of France?"}, {"role": "assistant", "content": "Response: "}, ]), sampling_params=SamplingParams(max_tokens=2), ) assert isinstance(result, str) def test_vllm_invalid_inference_kwargs(model): with pytest.raises(TypeError): model("Respond with one word. Not more.", foo="bar") def test_vllm_regex(model): result = model("Give a number between 0 and 9.", Regex(r"[0-9]")) assert isinstance(result, str) assert re.match(r"[0-9]", result) def test_vllm_json(model): class Character(BaseModel): name: str result = model("Create a character with a name.", Character) assert "name" in result def test_vllm_choice(model): class Foo(Enum): cat = "cat" dog = "dog" result = model("Cat or dog?", Foo) assert result in ["cat", "dog"] def test_vllm_multiple_samples(model): result = model( "Respond with one word. Not more.", sampling_params=SamplingParams(n=2) ) assert isinstance(result, list) assert len(result) == 2 def test_vllm_batch(model): result = model.batch( ["Respond with one word. Not more.", "Respond with one word. Not more."] ) assert isinstance(result, list) assert len(result) == 2 result = model.batch( ["Respond with one word. Not more.", "Respond with one word. Not more."], sampling_params=SamplingParams(n=2) ) assert isinstance(result, list) assert len(result) == 2 for item in result: assert isinstance(item, list) assert len(item) == 2 with pytest.raises(TypeError, match="Batch generation is not available"): model.batch( [ Chat(messages=[ {"role": "user", "content": "What is the capital of France?"}, ]), ] ) def test_vllm_streaming(model): with pytest.raises( NotImplementedError, match="Streaming is not available" ): model.stream("Respond with one word. Not more.") ================================================ FILE: tests/models/test_vllm_offline_type_adapter.py ================================================ import io import json import pytest from PIL import Image as PILImage from outlines.inputs import Chat, Image from outlines.models.vllm_offline import VLLMOfflineTypeAdapter from outlines.types import CFG, JsonSchema, Regex CFG_STRING = """ ?start: expr ?expr: NUMBER """ JSON_SCHEMA_STRING = """ { "type": "object", "properties": { "answer": {"type": "number"} } } """ @pytest.fixture def type_adapter(): return VLLMOfflineTypeAdapter() @pytest.fixture def cfg_instance(): return CFG(CFG_STRING) @pytest.fixture def json_schema_instance(): return JsonSchema(JSON_SCHEMA_STRING) @pytest.fixture def json_schema_whitespace_instance(): return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern="\n") @pytest.fixture def regex_instance(): return Regex(r"[0-9]+") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_vllm_offline_type_adapter_input_text(type_adapter): message = "prompt" result = type_adapter.format_input(message) assert result == message def test_vllm_offline_type_adapter_input_text_with_template(): adapter = VLLMOfflineTypeAdapter(has_chat_template=True) message = "prompt" result = adapter.format_input(message) assert result == [{"role": "user", "content": "prompt"}] def test_vllm_offline_type_adapter_input_text_without_template(): adapter = VLLMOfflineTypeAdapter(has_chat_template=False) message = "prompt" result = adapter.format_input(message) assert result == "prompt" def test_vllm_offline_type_adapter_input_chat(type_adapter): model_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": "hello"}, {"role": "assistant", "content": "response"}, ]) result = type_adapter.format_input(model_input) assert result == [ {"role": "system", "content": "prompt"}, {"role": "user", "content": "hello"}, {"role": "assistant", "content": "response"}, ] def test_vllm_offline_type_adapter_input_invalid(type_adapter, image): with pytest.raises(TypeError, match="is not available"): _ = type_adapter.format_input(["Hello", Image(image)]) with pytest.raises(ValueError, match="Assets are not supported"): _ = type_adapter.format_input(Chat(messages=[ {"role": "user", "content": [ "Hello", Image(image), ]}, ])) def test_vllm_offline_type_adapter_output_type( type_adapter, cfg_instance, json_schema_instance, json_schema_whitespace_instance, regex_instance, ): assert type_adapter.format_output_type(None) == {} assert type_adapter.format_output_type(cfg_instance) == { "grammar": CFG_STRING } assert type_adapter.format_output_type(json_schema_instance) == { "json": json.loads(JSON_SCHEMA_STRING) } assert type_adapter.format_output_type(json_schema_whitespace_instance) == { "json": json.loads(JSON_SCHEMA_STRING), "whitespace_pattern": "\n" } assert type_adapter.format_output_type(regex_instance) == { "regex": "([0-9]+)" } ================================================ FILE: tests/models/test_vllm_type_adapter.py ================================================ import io import json import pytest from dataclasses import dataclass from PIL import Image as PILImage from outlines.inputs import Chat, Image from outlines.models.vllm import VLLMTypeAdapter from outlines.types import CFG, JsonSchema CFG_STRING = """ ?start: expr ?expr: NUMBER """ JSON_SCHEMA_STRING = """ { "type": "object", "properties": { "answer": {"type": "number"} } } """ @pytest.fixture def type_adapter(): return VLLMTypeAdapter() @pytest.fixture def cfg_instance(): return CFG(CFG_STRING) @pytest.fixture def json_schema_instance(): return JsonSchema(JSON_SCHEMA_STRING) @pytest.fixture def json_schema_whitespace_instance(): return JsonSchema(JSON_SCHEMA_STRING, whitespace_pattern="\n") @pytest.fixture def image(): width, height = 1, 1 white_background = (255, 255, 255) image = PILImage.new("RGB", (width, height), white_background) # Save to an in-memory bytes buffer and read as png buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) image = PILImage.open(buffer) return image def test_vllm_type_adapter_input_text(type_adapter): message = "prompt" result = type_adapter.format_input(message) assert result == [{"role": "user", "content": message}] def test_vllm_type_adapter_input_vision(type_adapter, image): image_input = Image(image) result = type_adapter.format_input(["hello", image_input]) assert result == [ { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], } ] def test_vllm_type_adapter_input_chat(type_adapter, image): image_input = Image(image) model_input = Chat(messages=[ {"role": "system", "content": "prompt"}, {"role": "user", "content": [ "hello", image_input, ]}, {"role": "assistant", "content": "response"}, ]) result = type_adapter.format_input(model_input) assert result == [ {"role": "system", "content": "prompt"}, { "role": "user", "content": [ {"type": "text", "text": "hello"}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_input.image_str}" }, }, ], }, {"role": "assistant", "content": "response"}, ] def test_vllm_type_adapter_input_invalid(type_adapter): @dataclass class Audio: file: str prompt = Audio( "file", ) with pytest.raises(TypeError, match="The input type"): _ = type_adapter.format_input(prompt) def test_vllm_type_adapter_output_type( type_adapter, cfg_instance, json_schema_instance, json_schema_whitespace_instance, ): assert type_adapter.format_output_type(None) == {} assert type_adapter.format_output_type(cfg_instance) == { "guided_grammar": CFG_STRING } assert type_adapter.format_output_type(json_schema_instance) == { "guided_json": json.loads(JSON_SCHEMA_STRING) } assert type_adapter.format_output_type(json_schema_whitespace_instance) == { "guided_json": json.loads(JSON_SCHEMA_STRING), "whitespace_pattern": "\n" } assert type_adapter.format_output_type(int) == { "guided_regex": "([+-]?(0|[1-9][0-9]*))" } ================================================ FILE: tests/processors/test_base_processor.py ================================================ from typing import List import numpy as np import pytest import torch from outlines.processors.base_logits_processor import OutlinesLogitsProcessor try: import mlx.core as mx HAS_MLX = True except ImportError: HAS_MLX = False libraries = ["numpy", "torch"] if HAS_MLX: libraries.append("mlx") # we check the accepted shapes: # - both 1D # - both 2D # - input_ids 1D and logits 2D with a single sequence # we raise an error if the shapes are not accepted: # - input_ids 2D and logits 1D # - input_ids 1D and logits 2D, but with multiple sequences # - both 3D arrays = { "numpy": [ (np.array([1, 2], dtype=np.float32), np.array([1, 2], dtype=np.int32), None), (np.array([[1, 2], [3, 4]], dtype=np.float32), np.array([[1, 2], [3, 4]], dtype=np.int32), None), (np.array([1, 2], dtype=np.float32), np.array([[1, 2]], dtype=np.int32), None), (np.array([[1, 2]], dtype=np.float32), np.array([1, 2], dtype=np.int32), AssertionError), (np.array([1, 2], dtype=np.float32), np.array([[1, 2], [3, 4]], dtype=np.int32), AssertionError), (np.array([[[1, 2]]], dtype=np.float32), np.array([[[1, 2]]], dtype=np.int32), ValueError), ], "torch": [ (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([1, 2], dtype=torch.int32), None), (torch.tensor([[1, 2], [3, 4]], dtype=torch.float32), torch.tensor([[1, 2], [3, 4]], dtype=torch.int32), None), (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([[1, 2]], dtype=torch.int32), None), (torch.tensor([[1, 2]], dtype=torch.float32), torch.tensor([1, 2], dtype=torch.int32), AssertionError), (torch.tensor([1, 2], dtype=torch.float32), torch.tensor([[1, 2], [3, 4]], dtype=torch.int32), AssertionError), (torch.tensor([[[1, 2]]], dtype=torch.float32), torch.tensor([[[1, 2]]], dtype=torch.int32), ValueError), ], } if HAS_MLX: arrays["mlx"] = [ (mx.array([1, 2], dtype=mx.float32), mx.array([1, 2], dtype=mx.int32), None), (mx.array([[1, 2], [3, 4]], dtype=mx.float32), mx.array([[1, 2], [3, 4]], dtype=mx.int32), None), (mx.array([1, 2], dtype=mx.float32), mx.array([[1, 2]], dtype=mx.int32), None), (mx.array([[1, 2]], dtype=mx.float32), mx.array([1, 2], dtype=mx.int32), AssertionError), (mx.array([1, 2], dtype=mx.float32), mx.array([[1, 2], [3, 4]], dtype=mx.int32), AssertionError), (mx.array([[[1, 2]]], dtype=mx.float32), mx.array([[[1, 2]]], dtype=mx.int32), ValueError), ] class MockLogitsProcessor(OutlinesLogitsProcessor): def process_logits(self, input_ids, logits): # check that input_ids and logits received are 2D tensors assert len(self.tensor_adapter.shape(input_ids)) == 2 assert len(self.tensor_adapter.shape(logits)) == 2 return logits @pytest.mark.parametrize("library", libraries) def test_base_logits_processor_init(library): processor = MockLogitsProcessor(library) assert processor.tensor_adapter is not None with pytest.raises(NotImplementedError): processor = MockLogitsProcessor("foo") processor.reset() @pytest.mark.parametrize("library", libraries) def test_base_logits_processor_call(library): processor = MockLogitsProcessor(library) input_values = arrays[library] for input_value in input_values: input_ids, logits, expected_error = input_value if expected_error is not None: with pytest.raises(expected_error): processor(input_ids, logits) else: original_shape = processor.tensor_adapter.shape(logits) processed_logits = processor(input_ids, logits) # we check that the shape of logits is preserved assert processor.tensor_adapter.shape(processed_logits) == original_shape @pytest.mark.parametrize("library", libraries) def test_base_logits_processor_init_library_name(library): processor = MockLogitsProcessor(library) assert processor.tensor_adapter is not None with pytest.raises(NotImplementedError): processor = MockLogitsProcessor("foo") ================================================ FILE: tests/processors/test_tensor_adapters.py ================================================ import pytest from pytest import mark import numpy as np import torch from outlines.processors.tensor_adapters import ( NumpyTensorAdapter, TorchTensorAdapter, MLXTensorAdapter, ) try: import mlx_lm import mlx.core as mx HAS_MLX = mx.metal.is_available() except ImportError: HAS_MLX = False adapters = { "numpy": NumpyTensorAdapter(), "torch": TorchTensorAdapter(), } if HAS_MLX: adapters["mlx"] = MLXTensorAdapter() frameworks = ["numpy", "torch", "mlx"] def create_tensor(framework, shape, dtype=None): if framework == "torch": return torch.randn(*shape) elif framework == "numpy": return np.random.randn(*shape) elif framework == "mlx": if not HAS_MLX: pytest.skip("MLX not available") return mx.random.normal(shape) def compare_tensors(framework, tensor1, tensor2): if framework == "torch": return torch.allclose(tensor1, tensor2) elif framework == "numpy": return np.array_equal(tensor1, tensor2) elif framework == "mlx": if not HAS_MLX: pytest.skip("MLX not available") return mx.array_equal(tensor1, tensor2) @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_shape(framework): # 1d tensor tensor_1d = create_tensor(framework, (2,)) result_1d = adapters[framework].shape(tensor_1d) assert len(result_1d) == 1 assert result_1d[0] == 2 # 2d tensor tensor_2d = create_tensor(framework, (2, 3)) result_2d = adapters[framework].shape(tensor_2d) assert len(result_2d) == 2 assert result_2d[0] == 2 assert result_2d[1] == 3 # 3d tensor tensor_3d = create_tensor(framework, (2, 2, 3)) result_3d = adapters[framework].shape(tensor_3d) assert len(result_3d) == 3 assert result_3d[0] == 2 assert result_3d[1] == 2 assert result_3d[2] == 3 @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_unsqueeze(framework): # 1d tensor tensor_1d = create_tensor(framework, (2,)) result_1d = adapters[framework].unsqueeze(tensor_1d) assert result_1d.shape == (1, 2) # 2d tensor tensor_2d = create_tensor(framework, (2, 3)) result_2d = adapters[framework].unsqueeze(tensor_2d) assert result_2d.shape == (1, 2, 3) @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_squeeze(framework): # 1d tensor tensor_1d = create_tensor(framework, (1,)) result_1d = adapters[framework].squeeze(tensor_1d) with pytest.raises(TypeError): len(result_1d) # 2d tensor tensor_2d = create_tensor(framework, (1, 2)) result_2d = adapters[framework].squeeze(tensor_2d) assert result_2d.shape == (2,) # 3d tensor tensor_3d = create_tensor(framework, (1, 2, 3)) result_3d = adapters[framework].squeeze(tensor_3d) assert result_3d.shape == (2, 3) @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_to_list(framework): # 1d tensor tensor_1d = create_tensor(framework, (2,)) result_1d = adapters[framework].to_list(tensor_1d) assert isinstance(result_1d, list) assert len(result_1d) == 2 # 2d tensor tensor_2d = create_tensor(framework, (2, 3)) result_2d = adapters[framework].to_list(tensor_2d) assert isinstance(result_2d, list) assert len(result_2d) == 2 assert len(result_2d[0]) == 3 assert len(result_2d[1]) == 3 # 3d tensor tensor_3d = create_tensor(framework, (2, 2, 3)) result_3d = adapters[framework].to_list(tensor_3d) assert isinstance(result_3d, list) assert len(result_3d) == 2 assert len(result_3d[0]) == 2 assert len(result_3d[1]) == 2 assert len(result_3d[0][0]) == 3 assert len(result_3d[0][1]) == 3 assert len(result_3d[1][0]) == 3 assert len(result_3d[1][1]) == 3 @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_to_scalar(framework): # multi-elements tensor, should raise an error tensor_multi = create_tensor(framework, (2, 3)) if framework == "torch": with pytest.raises(RuntimeError): adapters[framework].to_scalar(tensor_multi) else: with pytest.raises(ValueError): adapters[framework].to_scalar(tensor_multi) # single-element tensor tensor_single = create_tensor(framework, (1, 1)) scalar = adapters[framework].to_scalar(tensor_single) assert isinstance(scalar, float) @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_full_like(framework): tensor = create_tensor(framework, (2, 3)) result = adapters[framework].full_like(tensor, 0) assert result.shape == (2, 3) for i in range(2): for j in range(3): assert result[i, j] == 0 @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_concatenate(framework): # 1d tensors tensor1 = create_tensor(framework, (2,)) tensor2 = create_tensor(framework, (2,)) result = adapters[framework].concatenate([tensor1, tensor2]) assert result.shape == (4,) assert result[0] == tensor1[0] assert result[1] == tensor1[1] assert result[2] == tensor2[0] assert result[3] == tensor2[1] # 2d tensors tensor1 = create_tensor(framework, (2, 3)) tensor2 = create_tensor(framework, (2, 3)) result = adapters[framework].concatenate([tensor1, tensor2]) assert result.shape == (4, 3) for i in range(2): for j in range(3): assert result[i, j] == tensor1[i, j] assert result[i + 2, j] == tensor2[i, j] # 3d tensors tensor1 = create_tensor(framework, (2, 2, 3)) tensor2 = create_tensor(framework, (2, 2, 3)) result = adapters[framework].concatenate([tensor1, tensor2]) assert result.shape == (4, 2, 3) for i in range(2): for j in range(2): for k in range(3): assert result[i, j, k] == tensor1[i, j, k] assert result[i + 2, j, k] == tensor2[i, j, k] @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_get_to_device(framework): tensor = create_tensor(framework, (2, 3)) device = adapters[framework].get_device(tensor) device_tensor = adapters[framework].to_device(tensor, device) if framework == "torch": assert isinstance(device_tensor.device.type, str) assert compare_tensors(framework, device_tensor, tensor) else: assert compare_tensors(framework, device_tensor, tensor) @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_boolean_ones_like(framework): tensor = create_tensor(framework, (2, 3)) ones = adapters[framework].boolean_ones_like(tensor) assert ones.shape == (2, 3) for i in range(2): for j in range(3): assert ones[i, j] @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_apply_mask(framework): tensor = create_tensor(framework, (2, 3)) if framework == "torch": mask = torch.randn(2, 3) > 0 elif framework == "numpy": mask = np.random.randn(2, 3) > 0 elif framework == "mlx": if not HAS_MLX: pytest.skip("MLX not available") mask = mx.random.normal((2, 3)) > 0 masked = adapters[framework].apply_mask(tensor, mask, float("-inf")) assert masked.shape == (2, 3) for i in range(2): for j in range(3): if mask[i, j]: assert masked[i, j] == float("-inf") else: assert masked[i, j] == tensor[i, j] @pytest.mark.parametrize("framework", frameworks) def test_tensor_adapter_argsort_descending(framework): tensor = create_tensor(framework, (2, 3)) indices = adapters[framework].argsort_descending(tensor) assert indices.shape == (2, 3) for i in range(2): sorted_values = [tensor[i][idx] for idx in indices[i]] for j in range(len(sorted_values) - 1): assert sorted_values[j] >= sorted_values[j + 1] ================================================ FILE: tests/test_applications.py ================================================ from typing import Any import jinja2 import pytest import transformers from outlines import from_transformers from outlines.applications import Application from outlines.templates import Template @pytest.fixture(scope="session") def model(): return from_transformers( transformers.AutoModelForCausalLM.from_pretrained("gpt2"), transformers.AutoTokenizer.from_pretrained("gpt2"), ) @pytest.fixture(scope="session") def another_model(): return from_transformers( transformers.AutoModelForCausalLM.from_pretrained("gpt2"), transformers.AutoTokenizer.from_pretrained("gpt2"), ) def test_application_initialization(): template = Template.from_string("Test {{ value }}") output_type = None application = Application(template, output_type) assert application.template == template assert application.output_type == output_type assert application.model is None assert application.generator is None def test_application_generator_no_model(): template = Template.from_string("Test {{ value }}") output_type = None application = Application(template, output_type) with pytest.raises(ValueError): application(None, {"value": "example"}) def test_application_template_call(model): template = Template.from_string("Test {{ value }}") output_type = None application = Application(template, output_type) result = application(model, {"value": "example"}, max_new_tokens=10) assert isinstance(result, str) def test_application_callable_call(model): def template(value): return f"Test {value}" output_type = None application = Application(template, output_type) result = application(model, {"value": "example"}, max_new_tokens=10) assert isinstance(result, str) def test_application_template_error(model): template = Template.from_string("Test {{ value }}") output_type = None application = Application(template, output_type) with pytest.raises(jinja2.exceptions.UndefinedError): application(model, {"foo": "bar"}) def test_application_generator_reuse(model, another_model): template = Template.from_string("Test {{ value }}") output_type = None application = Application(template, output_type) application(model, {"value": "example"}, max_new_tokens=10) first_generator = application.generator first_model = application.model application(model, {"value": "example"}, max_new_tokens=10) assert application.model == first_model assert application.generator == first_generator application(another_model, {"value": "example"}, max_new_tokens=10) assert application.model == another_model assert application.model != first_model assert application.generator != first_generator ================================================ FILE: tests/test_cache.py ================================================ import os import tempfile import unittest from importlib import reload import diskcache import pytest from diskcache import Cache, UNKNOWN from outlines.caching import CloudpickleDisk @pytest.fixture def temp_dir(): """Create a temporary directory for testing.""" directory = tempfile.mkdtemp() yield directory @pytest.fixture def refresh_environment(): """Refresh the test environment. This deletes any reference to `outlines` in the modules dictionary and unsets the `OUTLINES_CACHE_DIR` environment variable if set. This is necessary because we are using a module variable to hold the cache. """ import sys for key in list(sys.modules.keys()): if "outlines" in key: del sys.modules[key] try: del os.environ["OUTLINES_CACHE_DIR"] except KeyError: pass @pytest.fixture def test_cache(refresh_environment): """Initialize a temporary cache and delete it after the test has run.""" with tempfile.TemporaryDirectory() as tempdir: os.environ["OUTLINES_CACHE_DIR"] = tempdir import outlines memory = outlines.get_cache() assert memory.directory == tempdir yield outlines.caching.cache() memory.clear() def test_get_cache(test_cache): import outlines memory = outlines.get_cache() assert isinstance(memory, diskcache.Cache) # If the cache is enabled then the size # of `store` should not increase the # second time `f` is called. store = list() @test_cache def f(x): store.append(1) return x f(1) store_size = len(store) f(1) assert len(store) == store_size f(2) assert len(store) == store_size + 1 def test_disable_cache(test_cache): """Make sure that we can disable the cache.""" import outlines outlines.disable_cache() # If the cache is disabled then the size # of `store` should increase every time # `f` is called. store = list() @test_cache def f(x): store.append(1) return x f(1) store_size = len(store) f(1) assert len(store) == store_size + 1 def test_clear_cache(test_cache): """Make sure that we can clear the cache.""" import outlines store = list() @test_cache def f(x): store.append(1) return x # The size of `store` does not increase since # `f` is cached after the first run. f(1) store_size = len(store) f(1) assert len(store) == store_size # The size of `store` should increase if we call `f` # after clearing the cache. outlines.clear_cache() f(1) assert len(store) == store_size + 1 def test_version_upgrade_cache_invalidate(test_cache, mocker): """Ensure we can change the signature of a cached function if we upgrade the version""" import outlines.caching def simulate_restart_outlines(): # clearing in-memory lru_cache which returns the diskcache in # order to simulate a reload, we're not clearing the diskcache itself outlines.caching.get_cache.cache_clear() mocker.patch("outlines._version.__version__", new="0.0.0") simulate_restart_outlines() # initialize cache with signature of Tuple-of-3 @test_cache def foo(): return (1, 2, 3) a, b, c = foo() # "restart" outlines without upgrading version simulate_restart_outlines() # change signature to Tuple-of-2 @test_cache def foo(): return (1, 2) # assert without version upgrade, old, bad cache is used with pytest.raises(ValueError): a, b = foo() # "restart" outlines WITH version upgrade mocker.patch("outlines._version.__version__", new="0.0.1") simulate_restart_outlines() # change signature to Tuple-of-2 @test_cache def foo(): return (1, 2) # assert with version upgrade, old cache is invalidated and new cache is used a, b = foo() def test_cache_disabled_decorator(test_cache): """Ensure cache can be disabled in a local scope""" from outlines.caching import cache_disabled mock = unittest.mock.MagicMock() @test_cache def fn(): mock() return 1 # first call isn't cached fn() assert mock.call_count == 1 # second call doesn't run fn, uses cache fn() assert mock.call_count == 1 # cache_disabled decorator disables cache within scope with cache_disabled(): fn() assert mock.call_count == 2 # called once in cache_disabled scope # scope has exited, cache is enabled again fn() assert mock.call_count == 2 @pytest.fixture def temp_cache_dir(): import os import tempfile import outlines.caching with tempfile.TemporaryDirectory() as tempdir: os.environ["OUTLINES_CACHE_DIR"] = tempdir outlines.caching.get_cache.cache_clear() reload(outlines) cache_status = outlines.caching._caching_enabled try: outlines.caching._caching_enabled = True yield finally: outlines.caching._caching_enabled = cache_status ================================================ FILE: tests/test_generator.py ================================================ import pytest from typing import AsyncGenerator, Generator as TypingGenerator, Literal import transformers from outlines_core import Index, Vocabulary import outlines from outlines.backends.outlines_core import OutlinesCoreLogitsProcessor from outlines.generator import ( BlackBoxGenerator, SteerableGenerator, Generator, AsyncBlackBoxGenerator, ) from outlines.models import AsyncVLLM, VLLM from outlines.processors import ( OutlinesLogitsProcessor, ) from outlines.types import CFG from tests.test_utils.mock_openai_client import ( MockAsyncOpenAIClient, MockOpenAIClient, ) MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" # We used the mocked vllm model to test the black box generator async_openai_client = MockAsyncOpenAIClient() openai_client = MockOpenAIClient() mock_responses = [ ( { 'messages': [ {'role': "user", 'content': 'Write a very short sentence'} ], 'model': MODEL_NAME, 'max_tokens': 10, 'extra_body': {'guided_regex': '("[^"]*")'}, }, "Mock response" ), ( { 'messages': [ {'role': "user", 'content': 'Write a very short sentence'} ], 'model': MODEL_NAME, 'max_tokens': 10, 'extra_body': {'guided_regex': '("[^"]*")'}, 'stream': True, }, ["Mock", "response"] ), ] async_openai_client.add_mock_responses(mock_responses) openai_client.add_mock_responses(mock_responses) @pytest.fixture(scope="session") def steerable_model(): model = outlines.from_transformers( transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"), transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini"), ) return model @pytest.fixture(scope="session") def sample_processor(): vocabulary = Vocabulary.from_pretrained("openai-community/gpt2") index = Index(r"[0-9]{3}", vocabulary) return OutlinesCoreLogitsProcessor(index, "torch") @pytest.fixture(scope="module") def black_box_sync_model(): return VLLM(openai_client, MODEL_NAME) @pytest.fixture(scope="module") def black_box_async_model(): return AsyncVLLM(async_openai_client, MODEL_NAME) # SteerableGenerator def test_steerable_generator_init_valid_processor(steerable_model, sample_processor): generator = SteerableGenerator.from_processor(steerable_model, sample_processor) assert generator.logits_processor == sample_processor assert generator.model == steerable_model def test_steerable_generator_init_cfg_output_type(steerable_model): generator = SteerableGenerator(steerable_model, CFG('start: "a"')) assert generator.model == steerable_model assert isinstance(generator.logits_processor, OutlinesLogitsProcessor) def test_steerable_generator_init_other_output_type(steerable_model): generator = SteerableGenerator(steerable_model, Literal["foo", "bar"]) assert generator.model == steerable_model assert isinstance(generator.logits_processor, OutlinesLogitsProcessor) def test_steerable_generator_init_invalid_output_type(steerable_model, sample_processor): with pytest.raises(ValueError): SteerableGenerator(steerable_model, sample_processor) def test_steerable_generator_call(steerable_model): generator = SteerableGenerator(steerable_model, Literal["foo", "bar"]) result = generator("foo", max_new_tokens=10) assert isinstance(result, str) def test_steerable_generator_stream(steerable_model): with pytest.raises(NotImplementedError): generator = SteerableGenerator(steerable_model, Literal["foo", "bar"]) result = generator.stream("foo", max_tokens=10) assert isinstance(result, TypingGenerator) assert isinstance(next(result), str) # BlackBoxGenerator def test_black_box_generator_init(black_box_sync_model): generator = BlackBoxGenerator(black_box_sync_model, Literal["foo", "bar"]) assert generator.model == black_box_sync_model assert generator.output_type == Literal["foo", "bar"] def test_black_box_generator_call(black_box_sync_model): generator = BlackBoxGenerator(black_box_sync_model, str) result = generator("Write a very short sentence", max_tokens=10) assert isinstance(result, str) def test_black_box_generator_stream(black_box_sync_model): generator = BlackBoxGenerator(black_box_sync_model, str) result = generator.stream("Write a very short sentence", max_tokens=10) assert isinstance(result, TypingGenerator) assert isinstance(next(result), str) # AsyncBlackBoxGenerator def test_async_black_box_generator_init(black_box_async_model): generator = AsyncBlackBoxGenerator(black_box_async_model, Literal["foo", "bar"]) assert generator.model == black_box_async_model assert generator.output_type == Literal["foo", "bar"] @pytest.mark.asyncio async def test_async_black_box_generator_call(black_box_async_model): generator = AsyncBlackBoxGenerator(black_box_async_model, str) result = await generator("Write a very short sentence", max_tokens=10) assert isinstance(result, str) @pytest.mark.asyncio async def test_async_black_box_generator_stream(black_box_async_model): generator = AsyncBlackBoxGenerator(black_box_async_model, str) result = generator.stream("Write a very short sentence", max_tokens=10) assert isinstance(result, AsyncGenerator) async for chunk in result: assert isinstance(chunk, str) break # Just check the first chunk # Generator def test_generator_init_no_model(): with pytest.raises(ValueError): Generator(None, Literal["foo", "bar"]) def test_generator_init_multiple_output_type(steerable_model, sample_processor): with pytest.raises(ValueError): Generator(steerable_model, Literal["foo", "bar"], processor=sample_processor) def test_generator_steerable_output_type(steerable_model): generator = Generator(steerable_model, Literal["foo", "bar"]) assert isinstance(generator, SteerableGenerator) assert generator.model == steerable_model assert isinstance(generator.logits_processor, OutlinesLogitsProcessor) def test_generator_steerable_processor(steerable_model, sample_processor): generator = Generator(steerable_model, processor=sample_processor) assert isinstance(generator, SteerableGenerator) assert generator.model == steerable_model assert isinstance(generator.logits_processor, OutlinesLogitsProcessor) def test_generator_black_box_sync_output_type(black_box_sync_model): generator = Generator(black_box_sync_model, Literal["foo", "bar"]) assert isinstance(generator, BlackBoxGenerator) assert generator.model == black_box_sync_model assert generator.output_type == Literal["foo", "bar"] def test_generator_black_box_sync_processor(black_box_sync_model, sample_processor): with pytest.raises(NotImplementedError): Generator(black_box_sync_model, processor=sample_processor) def test_generator_black_box_async_output_type(black_box_async_model): generator = Generator(black_box_async_model, Literal["foo", "bar"]) assert isinstance(generator, AsyncBlackBoxGenerator) assert generator.model == black_box_async_model assert generator.output_type == Literal["foo", "bar"] def test_generator_black_box_async_processor(black_box_async_model, sample_processor): with pytest.raises(NotImplementedError): Generator(black_box_async_model, processor=sample_processor) ================================================ FILE: tests/test_inputs.py ================================================ """Unit tests for the inputs module.""" import base64 import tempfile from io import BytesIO from typing import Dict, List, Any import pytest from PIL import Image as PILImage from outlines.inputs import Image, Video, Audio, Chat @pytest.fixture def image_input(): image = PILImage.new("RGB", (100, 100), color="red") image.format = "PNG" buffer = BytesIO() image.save(buffer, format="PNG") return Image(image=image) def test_image_initialization(): # png image = PILImage.new("RGB", (100, 100), color="red") image.format = "PNG" buffer = BytesIO() image.save(buffer, format="PNG") image_input = Image(image=image) assert image_input.image == image assert image_input.image_format == "image/png" assert image_input.image_str == base64.b64encode(buffer.getvalue()).decode("utf-8") # jpeg image = PILImage.new("RGB", (100, 100), color="blue") image.format = "JPEG" buffer = BytesIO() image.save(buffer, format="JPEG") image_input = Image(image=image) assert image_input.image == image assert image_input.image_format == "image/jpeg" assert image_input.image_str == base64.b64encode(buffer.getvalue()).decode("utf-8") def test_image_initialization_invalid(): """Test that Image initialization fails when image has no format.""" # No format image = PILImage.new("RGB", (100, 100), color="yellow") with pytest.raises(TypeError, match="Could not read the format of the image"): Image(image=image) # Empty string format image = PILImage.new("RGB", (100, 100), color="orange") image.format = "" with pytest.raises(TypeError, match="Could not read the format of the image"): Image(image=image) def test_video_initialization(): video = "foo" video_input = Video(video=video) assert video_input.video == video def test_audio_initialization(): audio = "foo" audio_input = Audio(audio=audio) assert audio_input.audio == audio def test_chat_initialization(): # Empty chat = Chat() assert chat.messages == [] assert len(chat.messages) == 0 assert str(chat) == "" assert repr(chat) == "Chat(messages=[])" # With messages messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hi there!"} ] chat = Chat(messages=messages) assert chat.messages == messages assert len(chat.messages) == 3 assert str(chat) == "{'role': 'system', 'content': 'You are a helpful assistant.'}\n{'role': 'user', 'content': 'Hello!'}\n{'role': 'assistant', 'content': 'Hi there!'}" assert repr(chat) == "Chat(messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}, {'role': 'assistant', 'content': 'Hi there!'}])" def test_chat_append(): chat = Chat(messages=[]) message = {"role": "user", "content": "Hello"} chat.append(message) assert len(chat.messages) == 1 assert chat.messages[0] == message def test_chat_extend(): chat = Chat(messages=[]) messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi"} ] chat.extend(messages) assert len(chat.messages) == 2 assert chat.messages == messages def test_chat_pop(): # Pop from non-empty chat messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi"} ] chat = Chat(messages=messages.copy()) popped_message = chat.pop() assert popped_message == {"role": "assistant", "content": "Hi"} assert len(chat.messages) == 1 assert chat.messages[0] == {"role": "user", "content": "Hello"} # Pop from empty chat chat = Chat(messages=[]) with pytest.raises(IndexError): chat.pop() def test_chat_add_system_message(image_input): # Add a string chat = Chat(messages=[]) chat.add_system_message("You are a helpful assistant.") assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "system" assert chat.messages[0]["content"] == "You are a helpful assistant." # Add a list chat = Chat(messages=[]) chat.add_system_message(["prompt", image_input]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "system" assert chat.messages[0]["content"] == ["prompt", image_input] # Add a list of dict items with explicit types chat = Chat(messages=[]) chat.add_system_message([{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "system" assert chat.messages[0]["content"] == [{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}] def test_add_user_message_string(image_input): # Add a string chat = Chat(messages=[]) chat.add_user_message("Hello, how are you?") assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "user" assert chat.messages[0]["content"] == "Hello, how are you?" # Add a list chat = Chat(messages=[]) chat.add_user_message(["prompt", image_input]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "user" assert chat.messages[0]["content"] == ["prompt", image_input] # Add a list of dict items with explicit types chat = Chat(messages=[]) chat.add_user_message([{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "user" assert chat.messages[0]["content"] == [{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}] def test_add_assistant_message_string(image_input): # Add a string chat = Chat(messages=[]) chat.add_assistant_message("I'm doing well, thank you!") assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "assistant" assert chat.messages[0]["content"] == "I'm doing well, thank you!" # Add a list chat = Chat(messages=[]) chat.add_assistant_message(["prompt", image_input]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "assistant" assert chat.messages[0]["content"] == ["prompt", image_input] # Add a list of dict items with explicit types chat = Chat(messages=[]) chat.add_assistant_message([{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}]) assert len(chat.messages) == 1 assert chat.messages[0]["role"] == "assistant" assert chat.messages[0]["content"] == [{"type": "text", "text": "prompt"}, {"type": "image", "image": image_input}] ================================================ FILE: tests/test_templates.py ================================================ import base64 import os import tempfile from typing import Optional import pytest from PIL import Image as PILImage from io import BytesIO from pydantic import BaseModel, Field from outlines.inputs import Image from outlines.templates import ( Template, build_template_from_string, Vision, get_fn_name, get_fn_args, get_fn_description, get_fn_source, get_fn_signature, get_schema, ) def sample_function(x, y=2): """This is a sample function.""" return x + y def function_with_annotations(x: int, y: str) -> str: """Function with annotations.""" return f"{x} {y}" def function_with_no_docstring(x, y): return x * y class CallableClass: def __call__(self): pass class PydanticClass(BaseModel): foo: str def test_vision_initialization(): # Create a simple image for testing image = PILImage.new("RGB", (10, 10), color="red") image.format = "PNG" # Initialize the Vision object with pytest.deprecated_call(): vision = Vision(prompt="Test prompt", image=image) # Check that the prompt is set correctly assert isinstance(vision, list) assert len(vision) == 2 assert vision[0] == "Test prompt" assert isinstance(vision[1], Image) # Check that the image is encoded correctly buffer = BytesIO() image.save(buffer, format=image.format) expected_image_str = base64.b64encode(buffer.getvalue()).decode("utf-8") assert vision[1].image_str == expected_image_str # Check that the image format is set correctly assert vision[1].image_format == "image/png" def test_vision_invalid_image_format(): # Create an image without a format image = PILImage.new("RGB", (10, 10), color="blue") # Expect a TypeError when the image format is not set with pytest.deprecated_call(): with pytest.raises(TypeError, match="Could not read the format"): Vision(prompt="Test prompt", image=image) def render(content: str, filters: Optional[dict] = None, **kwargs): template = build_template_from_string(content, filters or {}) return template.render(kwargs) def test_render(): tpl = """ A test string""" assert render(tpl) == "A test string" tpl = """ A test string """ assert render(tpl) == "A test string" tpl = """ A test Another test """ assert render(tpl) == "A test\nAnother test" tpl = """A test Another test """ assert render(tpl) == "A test\nAnother test" tpl = """ A test line An indented line """ assert render(tpl) == "A test line\n An indented line" tpl = """ A test line An indented line """ assert render(tpl) == "A test line\n An indented line\n" def test_render_escaped_linebreak(): tpl = """ A long test \ that we break \ in several lines """ assert render(tpl) == "A long test that we break in several lines" tpl = """ Break in \ several lines \ But respect the indentation on line breaks. And after everything \ Goes back to normal """ assert ( render(tpl) == "Break in several lines But respect the indentation\n on line breaks.\nAnd after everything Goes back to normal" ) def test_render_jinja(): """Make sure that we can use basic Jinja2 syntax, and give examples of how we can use it for basic use cases. """ # Notice the newline after the end of the loop examples = ["one", "two"] prompt = render( """ {% for e in examples %} Example: {{e}} {% endfor -%}""", examples=examples, ) assert prompt == "Example: one\nExample: two\n" # We can remove the newline by cloing with -%} examples = ["one", "two"] prompt = render( """ {% for e in examples %} Example: {{e}} {% endfor -%} Final""", examples=examples, ) assert prompt == "Example: one\nExample: two\nFinal" # Same for conditionals tpl = """ {% if is_true %} true {% endif -%} final """ assert render(tpl, is_true=True) == "true\nfinal" assert render(tpl, is_true=False) == "final" def test_render_filters(): def foo(bar: str) -> str: """This is a sample function.""" return bar class PydanticClass(BaseModel): foo: str = Field(description="bar") def custom_filter(x: str) -> str: return x.upper() # name filter tpl = """ {{ func | name }} """ assert render(tpl, func=foo) == "foo" # description filter tpl = """ {{ func | description }} """ assert render(tpl, func=foo) == "This is a sample function." # source filter tpl = """ {{ func | source }} """ assert render(tpl, func=foo) == 'def foo(bar: str) -> str:\n """This is a sample function."""\n return bar\n' # signature filter tpl = """ {{ func | signature }} """ assert render(tpl, func=foo) == "bar: str" # args filter tpl = """ {{ func | args }} """ assert render(tpl, func=foo) == "bar: str" # schema filter tpl = """ {{ schema | schema }} """ assert render(tpl, schema=PydanticClass) == '{\n "foo": "bar"\n}' # custom filters tpl = """ {{ name | custom_filter }} """ assert render(tpl, {"custom_filter": custom_filter}, name="John") == "JOHN" @pytest.fixture def temp_prompt_file(): test_dir = tempfile.mkdtemp() base_template_path = os.path.join(test_dir, "base_template.txt") with open(base_template_path, "w") as f: f.write( """{% block content %}{% endblock %} """ ) include_file_path = os.path.join(test_dir, "include.txt") with open(include_file_path, "w") as f: f.write( """{% for example in examples %} - Q: {{ example.question }} - A: {{ example.answer }} {% endfor %} """ ) prompt_file_path = os.path.join(test_dir, "prompt.txt") with open(prompt_file_path, "w") as f: f.write( """{% extends "base_template.txt" %} {% block content %} Here is a prompt with examples: {% include "include.txt" %} Now please answer the following question: Q: {{ question }} A: {% endblock %} """ ) yield prompt_file_path def test_prompt_from_file(temp_prompt_file): prompt = Template.from_file(temp_prompt_file) examples = [ {"question": "What is the capital of France?", "answer": "Paris"}, {"question": "What is 2 + 2?", "answer": "4"}, ] question = "What is the Earth's diameter?" rendered = prompt(examples=examples, question=question) expected = """Here is a prompt with examples: - Q: What is the capital of France? - A: Paris - Q: What is 2 + 2? - A: 4 Now please answer the following question: Q: What is the Earth's diameter? A: """ assert rendered.strip() == expected.strip() def test_prompt_from_str(): content = """ Hello, {{ name }}! """ prompt = Template.from_string(content) assert prompt(name="World") == "Hello, World!" def test_template_from_str_with_extra_linebreaks(): content = """ Hello, {{ name }}! """ template = build_template_from_string(content) assert template.render(name="World") == "Hello, World!\n" def test_get_fn_name(): with pytest.raises(TypeError): get_fn_name(1) assert get_fn_name(sample_function) == "sample_function" assert get_fn_name(function_with_annotations) == "function_with_annotations" no_name_func = lambda x: x assert get_fn_name(no_name_func) == "" assert get_fn_name(CallableClass()) == "CallableClass" def test_get_fn_args(): with pytest.raises(TypeError): get_fn_args(1) assert get_fn_args(sample_function) == "x, y=2" assert get_fn_args(function_with_annotations) == "x: int, y: str" def test_get_fn_description(): with pytest.raises(TypeError): get_fn_description(1) assert get_fn_description(sample_function) == "This is a sample function." assert get_fn_description(function_with_annotations) == "Function with annotations." assert get_fn_description(function_with_no_docstring) == "" def test_get_fn_source(): with pytest.raises(TypeError, match="The `source` filter only applies to callables."): get_fn_source(1) source = ( 'def sample_function(x, y=2):\n' ' """This is a sample function."""\n' ' return x + y' ) assert get_fn_source(sample_function).strip() == source def test_get_fn_signature(): with pytest.raises(TypeError, match="The `source` filter only applies to callables."): get_fn_signature(1) sample_function_signature = "x, y=2" assert get_fn_signature(sample_function) == sample_function_signature function_with_annotations_signature = "x: int, y: str" assert get_fn_signature(function_with_annotations) == function_with_annotations_signature def test_get_schema(): with pytest.raises(NotImplementedError): get_schema(1) dict_schema = {"foo": "bar"} dict_schema_output = get_schema(dict_schema) assert dict_schema_output == '{\n "foo": "bar"\n}' pydantic_schema_output = get_schema(PydanticClass) assert pydantic_schema_output == '{\n "foo": ""\n}' ================================================ FILE: tests/test_utils/mock_lmstudio_client.py ================================================ import json from typing import Any, Dict, List, Optional, Tuple from tests.test_utils.utils import hash_dict def normalize_for_hash(obj): """Normalize objects for consistent hashing. lms.Chat objects have unique identifiers that change between instances, so we convert them to a canonical dict format for hashing. """ obj_str = str(obj) if obj_str.startswith("Chat.from_history("): # Get the json from the string representation json_part = obj_str[len("Chat.from_history("):-1] data = json.loads(json_part) return { "type": "lms.Chat", "messages": normalize_lmstudio_messages(data.get("messages", [])) } elif isinstance(obj, dict): return {k: normalize_for_hash(v) for k, v in obj.items()} elif isinstance(obj, list): return [normalize_for_hash(item) for item in obj] else: return obj def normalize_lmstudio_messages(messages): """Normalize message list for hashing.""" result = [] for msg in messages: normalized_msg = { "role": msg.get("role", ""), "content": normalize_lmstudio_content(msg.get("content", "")), } result.append(normalized_msg) return result def normalize_lmstudio_content(content): """Normalize message content for hashing.""" if isinstance(content, list): result = [] for item in content: if isinstance(item, dict): if item.get("type") == "text": result.append({"type": "text", "text": item.get("text", "")}) elif item.get("type") == "file": result.append({"type": "file", "sizeBytes": item.get("sizeBytes", 0)}) else: result.append(item) else: result.append(str(item)) return result elif isinstance(content, str): return content else: return str(content) def hash_lmstudio_request(data: dict) -> str: """Hash a request dict, normalizing lms.Chat objects.""" normalized = normalize_for_hash(data) return hash_dict(normalized) class MockLMStudioResponse: """Mock for LMStudio response object""" def __init__(self, content: str): self.content = content class MockLMStudioModel: """Mock for LMStudio model object returned by client.llm.model()""" def __init__(self, mock_responses: Dict[str, Any]): self._mock_responses = mock_responses def respond(self, messages, **kwargs): request_key = hash_lmstudio_request({"messages": messages, **kwargs}) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {{'messages': {messages}, **{kwargs}}}") return MockLMStudioResponse(response) def respond_stream(self, messages, **kwargs): request_key = hash_lmstudio_request({"messages": messages, **kwargs}) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {{'messages': {messages}, **{kwargs}}}") for chunk in response: yield MockLMStudioResponse(chunk) class MockLMStudioLLM: """Mock for the llm attribute of Client""" def __init__(self, mock_responses: Dict[str, Any]): self._mock_responses = mock_responses def model(self, model_key=None): return MockLMStudioModel(self._mock_responses) class MockLMStudioClient: """Mock for LMStudio `Client` that can be used to test the LMStudio model""" def __init__(self): self._mock_responses: Dict[str, Any] = {} self.llm: Optional[MockLMStudioLLM] = None def add_mock_responses(self, mocks: List[Tuple[dict, Any]]): for kwargs, response in mocks: request_key = hash_lmstudio_request(kwargs) self._mock_responses[request_key] = response self.llm = MockLMStudioLLM(self._mock_responses) class MockAsyncLMStudioModel: """Mock for async LMStudio model object returned by client.llm.model()""" def __init__(self, mock_responses: Dict[str, Any]): self._mock_responses = mock_responses async def respond(self, messages, **kwargs): request_key = hash_lmstudio_request({"messages": messages, **kwargs}) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {{'messages': {messages}, **{kwargs}}}") return MockLMStudioResponse(response) async def respond_stream(self, messages, **kwargs): """Return an async iterator (must be awaited first, then iterated).""" request_key = hash_lmstudio_request({"messages": messages, **kwargs}) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {{'messages': {messages}, **{kwargs}}}") async def _stream(): for chunk in response: yield MockLMStudioResponse(chunk) return _stream() class MockAsyncLMStudioLLM: """Mock for the llm attribute of AsyncClient""" def __init__(self, mock_responses: Dict[str, Any]): self._mock_responses = mock_responses async def model(self, model_key=None): return MockAsyncLMStudioModel(self._mock_responses) class MockAsyncLMStudioClient: """Mock for LMStudio `AsyncClient` that can be used to test the AsyncLMStudio model""" def __init__(self): self._mock_responses: Dict[str, Any] = {} self.llm: Optional[MockAsyncLMStudioLLM] = None self._context_entered = False def add_mock_responses(self, mocks: List[Tuple[dict, Any]]): for kwargs, response in mocks: request_key = hash_lmstudio_request(kwargs) self._mock_responses[request_key] = response self.llm = MockAsyncLMStudioLLM(self._mock_responses) async def __aenter__(self): self._context_entered = True return self async def __aexit__(self, exc_type, exc_val, exc_tb): self._context_entered = False return False ================================================ FILE: tests/test_utils/mock_openai_client.py ================================================ from typing import List, Dict, Any, Optional from unittest.mock import MagicMock from tests.test_utils.utils import hash_dict class MockChoice: def __init__( self, content: str, finish_reason: str = "stop", refusal: Optional[str] = None ): self.message = MagicMock() self.message.content = content self.message.refusal = refusal self.finish_reason = finish_reason self.delta = MagicMock() self.delta.content = content class MockCompletionResponse: def __init__(self, choices: List[MockChoice]): self.choices = choices class MockStreamingChunk: def __init__(self, content: Optional[str] = None): self.choices = [] if content is not None: choice = MagicMock() delta = MagicMock() delta.content = content choice.delta = delta self.choices = [choice] class MockOpenAIClient: """Mock for OpenAI client that can be used to test vLLM integration""" def __init__(self): self.chat = MagicMock() self.chat.completions = MagicMock() self.chat.completions.create = MagicMock() # The method that will be called by the model when it makes a request def _create(**kwargs): # Hash the arguments to create a unique key request_key = hash_dict(kwargs) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {kwargs}") if kwargs.get("stream", False): return self._create_streaming_response(response) else: return self._create_standard_response(response) self.chat.completions.create.side_effect = _create self._mock_responses: Dict[str, Any] = {} def add_mock_responses(self, mocks: list): for kwargs, response in mocks: request_key = hash_dict(kwargs) self._mock_responses[request_key] = response def _create_standard_response(self, response): if isinstance(response, str): response = [response] choices = [MockChoice(content=chunk) for chunk in response] return MockCompletionResponse(choices=choices) def _create_streaming_response(self, response): chunks = [MockStreamingChunk(content=chunk) for chunk in response] return iter(chunks) class MockAsyncOpenAIClient: """Mock for AsyncOpenAI client that can be used to test AsyncVLLM integration""" def __init__(self): self.chat = MagicMock() self.chat.completions = MagicMock() self.chat.completions.create = MagicMock() # The method that will be called by the model when it makes a request async def _async_create(**kwargs): # Hash the arguments to create a unique key request_key = hash_dict(kwargs) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {kwargs}") if kwargs.get("stream", False): return self._create_async_streaming_response(response) else: return await self._create_async_standard_response(response) self.chat.completions.create.side_effect = _async_create self._mock_responses: Dict[str, Any] = {} def add_mock_responses(self, mocks: list): for kwargs, response in mocks: request_key = hash_dict(kwargs) self._mock_responses[request_key] = response async def _create_async_standard_response(self, response): """Create an async standard (non-streaming) response""" if isinstance(response, str): response = [response] choices = [MockChoice(content=chunk) for chunk in response] return MockCompletionResponse(choices=choices) async def _create_async_streaming_response(self, response): """Create an async streaming response generator""" chunks = [MockStreamingChunk(content=chunk) for chunk in response] for chunk in chunks: yield chunk ================================================ FILE: tests/test_utils/mock_tgi_client.py ================================================ from typing import Any, Dict from unittest.mock import MagicMock from tests.test_utils.utils import hash_dict class MockTGIInferenceClient: """Mock for TGI `InferenceClient` that can be used to test the TGI model""" def __init__(self): self.text_generation = MagicMock() # The method that will be called by the model when it makes a request def _create(**kwargs): # Hash the arguments to create a unique key request_key = hash_dict(kwargs) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {kwargs}") if kwargs.get("stream", False): return iter(response) else: return response self.text_generation.side_effect = _create self._mock_responses: Dict[str, Any] = {} def add_mock_responses(self, mocks: list): for kwargs, response in mocks: request_key = hash_dict(kwargs) self._mock_responses[request_key] = response class MockAsyncTGIInferenceClient: """Mock for TGI `InferenceClient` that can be used to test the TGI model""" def __init__(self): self.text_generation = MagicMock() # The method that will be called by the model when it makes a request async def _async_create(**kwargs): # Hash the arguments to create a unique key request_key = hash_dict(kwargs) response = self._mock_responses.get(request_key) if not response: raise ValueError(f"No response found for {kwargs}") if kwargs.get("stream", False): return self._create_async_streaming_response(response) else: return response self.text_generation.side_effect = _async_create self._mock_responses: Dict[str, Any] = {} def add_mock_responses(self, mocks: list): for kwargs, response in mocks: request_key = hash_dict(kwargs) self._mock_responses[request_key] = response async def _create_async_streaming_response(self, response): """Create an async streaming response generator""" for chunk in response: yield chunk ================================================ FILE: tests/test_utils/utils.py ================================================ import hashlib import pickle import sys def hash_dict(d) -> str: def make_hashable(obj): if isinstance(obj, (bool, int, float, str, type(None))): if isinstance(obj, str): return sys.intern(obj) return obj if isinstance(obj, dict): return tuple(sorted( (sys.intern(k) if isinstance(k, str) else k, make_hashable(v)) for k, v in obj.items() )) if isinstance(obj, (list, tuple)): return tuple(make_hashable(e) for e in obj) return str(obj) hashable_obj = make_hashable(d) pickled_obj = pickle.dumps(hashable_obj, protocol=4) return hashlib.sha256(pickled_obj).hexdigest() ================================================ FILE: tests/types/test_custom_types.py ================================================ import re import pytest from pydantic import BaseModel from outlines import types from outlines.types.dsl import to_regex @pytest.mark.parametrize( "custom_type,test_string,should_match", [ (types.locale.us.phone_number, "12", False), (types.locale.us.phone_number, "(123) 123-1234", True), (types.locale.us.phone_number, "123-123-1234", True), (types.locale.us.zip_code, "12", False), (types.locale.us.zip_code, "12345", True), (types.locale.us.zip_code, "12345-1234", True), (types.isbn, "ISBN 0-1-2-3-4-5", False), (types.isbn, "ISBN 978-0-596-52068-7", True), (types.isbn, "ISBN-13: 978-0-596-52068-7", True), (types.isbn, "978 0 596 52068 7", True), (types.isbn, "9780596520687", True), (types.isbn, "ISBN-10: 0-596-52068-9", True), (types.isbn, "0-596-52068-9", True), (types.email, "eitan@gmail.com", True), (types.email, "99@yahoo.com", True), (types.email, "eitan@.gmail.com", False), (types.email, "myemail", False), (types.email, "eitan@gmail", False), (types.email, "eitan@my.custom.domain", True), (types.integer, "-19", True), (types.integer, "19", True), (types.integer, "019", False), (types.integer, "1.9", False), (types.integer, "a", False), (types.boolean, "True", True), (types.boolean, "False", True), (types.boolean, "true", False), (types.number, "10", True), (types.number, "10.9", True), (types.number, "10.9e+3", True), (types.number, "10.9e-3", True), (types.number, "a", False), (types.date, "2022-03-23", True), (types.date, "2022-03-32", False), (types.date, "2022-13-23", False), (types.date, "32-03-2022", False), (types.time, "01:23:59", True), (types.time, "01:23:61", False), (types.time, "01:61:59", False), (types.time, "24:23:59", False), (types.sentence, "The temperature is 23.5 degrees !", True), (types.sentence, "Did you earn $1,234.56 last month ?", True), (types.sentence, "The #1 player scored 100 points .", True), (types.sentence, "Hello @world, this is a test!", True), (types.sentence, "invalid sentence.", False), (types.sentence, "Invalid sentence", False), (types.paragraph, "This is a paragraph!\n", True), (types.paragraph, "Line1\nLine2", False), (types.paragraph, "One sentence. Two sentences.\n\n", True), (types.paragraph, "One sentence. invalid sentence.", False), (types.paragraph, "One sentence. Invalid sentence\n", False), (types.hex_str, "0x123", True), (types.hex_str, "0xABC", True), (types.hex_str, "0xabc", True), (types.hex_str, "0x123ABC", True), (types.hex_str, "123", True), (types.hex_str, "ABC", True), (types.hex_str, "abc", True), (types.hex_str, "123ABC", True), (types.hex_str, "0xg123", False), (types.hex_str, "0x", False), (types.hex_str, "0x123G", False), (types.uuid4, "123e4567-e89b-42d3-a456-426614174000", True), (types.uuid4, "00000000-0000-4000-8000-000000000000", True), (types.uuid4, "123e4567-e89b-12d3-a456-426614174000", False), (types.uuid4, "123e4567-e89b-12d3-a456-42661417400", False), (types.uuid4, "123e4567-e89b-12d3-a456-4266141740000", False), (types.uuid4, "123e4567-e89b-12d3-x456-426614174000", False), (types.uuid4, "123e4567-e89b-12d3-a456-42661417400g", False), (types.ipv4, "192.168.1.1", True), (types.ipv4, "10.0.0.1", True), (types.ipv4, "172.16.0.1", True), (types.ipv4, "255.255.255.255", True), (types.ipv4, "0.0.0.0", True), (types.ipv4, "256.1.2.3", False), (types.ipv4, "1.256.2.3", False), (types.ipv4, "1.2.256.3", False), (types.ipv4, "1.2.3.256", False), (types.ipv4, "1.2.3", False), (types.ipv4, "1.2.3.4.5", False), (types.ipv4, "1.2.3.4.", False), (types.ipv4, ".1.2.3.4", False), (types.ipv4, "1..2.3.4", False), ], ) def test_type_regex(custom_type, test_string, should_match): class Model(BaseModel): attr: custom_type schema = Model.model_json_schema() assert schema["properties"]["attr"]["type"] == "string" regex_str = schema["properties"]["attr"]["pattern"] does_match = re.fullmatch(regex_str, test_string) is not None assert does_match is should_match regex_str = to_regex(custom_type) does_match = re.fullmatch(regex_str, test_string) is not None assert does_match is should_match @pytest.mark.parametrize( "custom_type,test_string,should_match", [ (types.airports.IATA, "CDG", True), (types.airports.IATA, "XXX", False), (types.countries.Alpha2, "FR", True), (types.countries.Alpha2, "XX", False), (types.countries.Alpha3, "UKR", True), (types.countries.Alpha3, "XXX", False), (types.countries.Numeric, "004", True), (types.countries.Numeric, "900", False), (types.countries.Name, "Ukraine", True), (types.countries.Name, "Wonderland", False), (types.countries.Flag, "🇿🇼", True), (types.countries.Flag, "🤗", False), ], ) def test_type_enum(custom_type, test_string, should_match): type_name = custom_type.__name__ class Model(BaseModel): attr: custom_type schema = Model.model_json_schema() assert isinstance(schema["$defs"][type_name]["enum"], list) does_match = test_string in schema["$defs"][type_name]["enum"] assert does_match is should_match does_match = test_string in custom_type.__members__ assert does_match is should_match ================================================ FILE: tests/types/test_dsl.py ================================================ import datetime import json import re as _re import sys import tempfile from dataclasses import dataclass from enum import Enum from typing import ( Literal, Tuple, Union, get_args, Optional as PyOptional ) import jsonschema import pytest from genson import SchemaBuilder from pydantic import BaseModel from outlines import grammars, types from outlines.types.dsl import ( Alternatives, JsonSchema, KleenePlus, KleeneStar, Optional, QuantifyBetween, QuantifyExact, QuantifyMaximum, QuantifyMinimum, Choice, Regex, Sequence, String, Term, either, CFG, _handle_dict, _handle_list, _handle_literal, _handle_tuple, _handle_union, _ensure_json_quoted, json_schema, one_or_more, zero_or_more, optional, between, at_most, at_least, exactly, regex, python_types_to_terms, to_regex, ) from outlines.types.utils import ( is_pydantic_model, is_typed_dict, is_dataclass, ) if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict def test_dsl_init(): string = String("test") assert string.value == "test" assert repr(string) == "String(value='test')" assert string.display_ascii_tree() == "└── String('test')\n" choice = Choice(["a", "b"]) assert choice.items == ["a", "b"] assert repr(choice) == "Choice(items=['a', 'b'])" assert choice.display_ascii_tree() == "└── Choice(['a', 'b'])\n" regex = Regex("[0-9]") assert regex.pattern == "[0-9]" assert repr(regex) == "Regex(pattern='[0-9]')" assert regex.display_ascii_tree() == "└── Regex('[0-9]')\n" schema = JsonSchema('{ "type": "string" }') assert schema.schema == '{ "type": "string" }' assert repr(schema) == 'JsonSchema(schema=\'{ "type": "string" }\')' assert schema.display_ascii_tree() == "└── JsonSchema('{ \"type\": \"string\" }')\n" kleene_star = KleeneStar(string) assert kleene_star.term == string assert repr(kleene_star) == "KleeneStar(term=String(value='test'))" assert kleene_star.display_ascii_tree() == "└── KleeneStar(*)\n └── String('test')\n" kleene_plus = KleenePlus(string) assert kleene_plus.term == string assert repr(kleene_plus) == "KleenePlus(term=String(value='test'))" assert kleene_plus.display_ascii_tree() == "└── KleenePlus(+)\n └── String('test')\n" optional = Optional(string) assert optional.term == string assert repr(optional) == "Optional(term=String(value='test'))" assert optional.display_ascii_tree() == "└── Optional(?)\n └── String('test')\n" alternatives = Alternatives([string, regex]) assert alternatives.terms[0] == string assert alternatives.terms[1] == regex assert ( repr(alternatives) == "Alternatives(terms=[String(value='test'), Regex(pattern='[0-9]')])" ) assert alternatives.display_ascii_tree() == "└── Alternatives(|)\n ├── String('test')\n └── Regex('[0-9]')\n" sequence = Sequence([string, regex]) assert sequence.terms[0] == string assert sequence.terms[1] == regex assert ( repr(sequence) == "Sequence(terms=[String(value='test'), Regex(pattern='[0-9]')])" ) assert sequence.display_ascii_tree() == "└── Sequence\n ├── String('test')\n └── Regex('[0-9]')\n" exact = QuantifyExact(string, 3) assert exact.term == string assert exact.count == 3 assert repr(exact) == "QuantifyExact(term=String(value='test'), count=3)" assert exact.display_ascii_tree() == "└── Quantify({3})\n └── String('test')\n" minimum = QuantifyMinimum(string, 3) assert minimum.term == string assert minimum.min_count == 3 assert repr(minimum) == "QuantifyMinimum(term=String(value='test'), min_count=3)" assert minimum.display_ascii_tree() == "└── Quantify({3,})\n └── String('test')\n" maximum = QuantifyMaximum(string, 3) assert maximum.term == string assert maximum.max_count == 3 assert repr(maximum) == "QuantifyMaximum(term=String(value='test'), max_count=3)" assert maximum.display_ascii_tree() == "└── Quantify({,3})\n └── String('test')\n" between = QuantifyBetween(string, 1, 3) assert between.term == string assert between.min_count == 1 assert between.max_count == 3 assert ( repr(between) == "QuantifyBetween(term=String(value='test'), min_count=1, max_count=3)" ) assert between.display_ascii_tree() == "└── Quantify({1,3})\n └── String('test')\n" with pytest.raises( ValueError, match="`max_count` must be greater than `min_count`" ): QuantifyBetween(string, 3, 1) def test_dsl_term_methods(): a = String("a") b = Regex("[0-9]") c = "c" assert a + b == Sequence([a, b]) assert a + c == Sequence([a, String(c)]) assert a.__radd__(b) == Sequence([b, a]) assert a.__radd__(c) == Sequence([String(c), a]) assert a | b == Alternatives([a, b]) assert a | c == Alternatives([a, String(c)]) assert a.__ror__(b) == Alternatives([b, a]) assert a.__ror__(c) == Alternatives([String(c), a]) core_schema = a.__get_pydantic_core_schema__("", "") validator = a.__get_validator__(core_schema) assert validator("a") == "a" with pytest.raises( ValueError, match="Input should be in the language of the regular expression", ): validator("b") assert a.__get_pydantic_json_schema__("", "") == {"type": "string", "pattern": "a"} assert a.matches("a") assert not a.matches("b") assert a.display_ascii_tree() == "└── String('a')\n" with pytest.raises(NotImplementedError): Term()._display_node() assert a._display_children("") == "" assert a.__str__() == "└── String('a')\n" def test_dsl_sequence(): a = String("a") b = String("b") sequence = a + b assert isinstance(sequence, Sequence) assert sequence.terms[0] == a assert sequence.terms[1] == b sequence = "a" + b assert isinstance(sequence, Sequence) assert isinstance(sequence.terms[0], String) assert sequence.terms[0].value == "a" assert sequence.terms[1].value == "b" sequence = a + "b" assert isinstance(sequence, Sequence) assert isinstance(sequence.terms[1], String) assert sequence.terms[0].value == "a" assert sequence.terms[1].value == "b" def test_dsl_alternatives(): a = String("a") b = String("b") alt = either(a, b) assert isinstance(alt, Alternatives) assert isinstance(alt.terms[0], String) assert isinstance(alt.terms[1], String) alt = either("a", "b") assert isinstance(alt, Alternatives) assert isinstance(alt.terms[0], String) assert isinstance(alt.terms[1], String) alt = either("a", b) assert isinstance(alt, Alternatives) assert isinstance(alt.terms[0], String) assert isinstance(alt.terms[1], String) def test_dsl_optional(): a = String("a") opt = a.optional() assert isinstance(opt, Optional) opt = optional("a") assert isinstance(opt, Optional) assert isinstance(opt.term, String) opt = a.optional() assert isinstance(opt, Optional) def test_dsl_exactly(): a = String("a") rep = a.exactly(2) assert isinstance(rep, QuantifyExact) assert rep.count == 2 rep = exactly(2, "a") assert isinstance(rep, QuantifyExact) assert isinstance(rep.term, String) rep = a.exactly(2) assert isinstance(rep, QuantifyExact) def test_dsl_at_least(): a = String("a") rep = a.at_least(2) assert isinstance(rep, QuantifyMinimum) assert rep.min_count == 2 rep = at_least(2, "a") assert isinstance(rep, QuantifyMinimum) assert isinstance(rep.term, String) rep = a.at_least(2) assert isinstance(rep, QuantifyMinimum) def test_dsl_at_most(): a = String("a") rep = a.at_most(2) assert isinstance(rep, QuantifyMaximum) assert rep.max_count == 2 rep = at_most(2, "a") assert isinstance(rep, QuantifyMaximum) assert isinstance(rep.term, String) rep = a.at_most(2) assert isinstance(rep, QuantifyMaximum) def test_between(): a = String("a") rep = a.between(1, 2) assert isinstance(rep, QuantifyBetween) assert rep.min_count == 1 assert rep.max_count == 2 rep = between(1, 2, "a") assert isinstance(rep, QuantifyBetween) assert isinstance(rep.term, String) rep = a.between(1, 2) assert isinstance(rep, QuantifyBetween) def test_dsl_zero_or_more(): a = String("a") rep = a.zero_or_more() assert isinstance(rep, KleeneStar) rep = zero_or_more("a") assert isinstance(rep, KleeneStar) assert isinstance(rep.term, String) rep = a.zero_or_more() assert isinstance(rep, KleeneStar) def test_dsl_one_or_more(): a = String("a") rep = a.one_or_more() assert isinstance(rep, KleenePlus) rep = one_or_more("a") assert isinstance(rep, KleenePlus) assert isinstance(rep.term, String) rep = a.zero_or_more() assert isinstance(rep, KleeneStar) def test_dsl_aliases(): test = regex("[0-9]") assert isinstance(test, Regex) test = json_schema('{"type": "string"}') assert isinstance(test, JsonSchema) def test_dsl_term_pydantic_simple(): a = String("a") class Model(BaseModel): field: a schema = Model.model_json_schema() assert schema == { "properties": {"field": {"pattern": "a", "title": "Field", "type": "string"}}, "required": ["field"], "title": "Model", "type": "object", } def test_dsl_term_pydantic_combination(): a = String("a") b = String("b") c = String("c") class Model(BaseModel): field: either((a + b), c) schema = Model.model_json_schema() assert schema == { "properties": { "field": {"pattern": "(ab|c)", "title": "Field", "type": "string"} }, "required": ["field"], "title": "Model", "type": "object", } def test_dsl_display(): a = String("a") b = String("b") c = Regex("[0-9]") d = Sequence([KleeneStar(Alternatives([a, b])), c]) tree = str(d) assert ( tree == "└── Sequence\n ├── KleeneStar(*)\n │ └── Alternatives(|)\n │ ├── String('a')\n │ └── String('b')\n └── Regex('[0-9]')\n" ) def test_cfg(): cfg_string = """ ?start: expr ?expr: NUMBER """ cfg = types.cfg(cfg_string) assert isinstance(cfg, CFG) assert cfg.definition.strip() == "?start: expr\n?expr: NUMBER" assert cfg._display_node() == "CFG('\n?start: expr\n?expr: NUMBER\n')" assert cfg.__repr__() == "CFG(definition='\n?start: expr\n?expr: NUMBER\n')" assert cfg == types.cfg(cfg_string) assert not cfg == "a" def test_json_schema(): # variables to be used in the tests json_schema = types.json_schema('{"type": "object", "properties": {"foo": {"type": "string"}, "bar": {"type": "integer"}}, "required": ["foo"]}') schema_builder_instance = SchemaBuilder() schema_builder_instance.add_schema({"type": "object", "properties": {"foo": {"type": "string"}, "bar": {"type": "integer"}}, "required": ["foo"]}) class MyPydanticModel(BaseModel): foo: str bar: PyOptional[int] = None class MyTypedDict(TypedDict): foo: str bar: int @dataclass class MyDataClass: foo: str bar: PyOptional[int] = None # init dict schema = types.json_schema({"type": "string"}) assert schema.schema == '{"type": "string"}' # init str schema = types.json_schema('{"type": "string"}') assert schema.schema == '{"type": "string"}' # init Pydantic model schema = types.json_schema(MyPydanticModel) assert schema.schema == '{"properties": {"foo": {"title": "Foo", "type": "string"}, "bar": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Bar"}}, "required": ["foo"], "title": "MyPydanticModel", "type": "object"}' # init TypedDict schema = types.json_schema(MyTypedDict) assert schema.schema == '{"properties": {"foo": {"title": "Foo", "type": "string"}, "bar": {"title": "Bar", "type": "integer"}}, "required": ["foo", "bar"], "title": "MyTypedDict", "type": "object"}' # init dataclass schema = types.json_schema(MyDataClass) assert schema.schema == '{"properties": {"foo": {"title": "Foo", "type": "string"}, "bar": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Bar"}}, "required": ["foo"], "title": "MyDataClass", "type": "object"}' # init SchemaBuilder schema = types.json_schema(schema_builder_instance) assert schema.schema == '{"$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"foo": {"type": "string"}, "bar": {"type": "integer"}}, "required": ["foo"]}' # init unsupported type with pytest.raises(ValueError, match="Cannot parse schema"): types.json_schema(1) # init invalide JSON schema with pytest.raises(jsonschema.exceptions.SchemaError): types.json_schema({"type": "strin"}) # is_json_schema assert not JsonSchema.is_json_schema(None) assert not JsonSchema.is_json_schema('{"type": "string"}') assert not JsonSchema.is_json_schema({"type": "string"}) assert JsonSchema.is_json_schema(json_schema) assert JsonSchema.is_json_schema(schema_builder_instance) assert JsonSchema.is_json_schema(MyPydanticModel) assert JsonSchema.is_json_schema(MyTypedDict) assert JsonSchema.is_json_schema(MyDataClass) # convert_to assert JsonSchema.convert_to(json_schema, ["str"]) == json_schema.schema assert JsonSchema.convert_to(json_schema, ["dict"]) == json.loads(json_schema.schema) assert JsonSchema.convert_to(MyPydanticModel, ["pydantic"]) == MyPydanticModel assert JsonSchema.convert_to(MyTypedDict, ["typeddict"]) == MyTypedDict assert JsonSchema.convert_to(MyDataClass, ["dataclass"]) == MyDataClass assert JsonSchema.convert_to(schema_builder_instance, ["genson"]) == schema_builder_instance assert JsonSchema.convert_to(MyPydanticModel, ["str"]) == JsonSchema(MyPydanticModel).schema assert JsonSchema.convert_to(MyPydanticModel, ["dict"]) == json.loads(JsonSchema(MyPydanticModel).schema) assert is_pydantic_model(JsonSchema.convert_to(json_schema, ["pydantic"])) assert is_typed_dict(JsonSchema.convert_to(json_schema, ["typeddict"])) assert is_dataclass(JsonSchema.convert_to(json_schema, ["dataclass"])) with pytest.raises(ValueError, match="Cannot convert schema type"): JsonSchema.convert_to(json_schema, ["genson"]) # other methods schema = types.json_schema('{"type": "string"}') assert schema._display_node() == "JsonSchema('{\"type\": \"string\"}')" assert schema.__repr__() == "JsonSchema(schema='{\"type\": \"string\"}')" assert schema == types.json_schema('{"type": "string"}') assert not schema == "a" def test_dsl_cfg_from_file(): grammar_content = """ ?start: expression ?expression: term (("+" | "-") term)* ?term: factor (("*" | "/") factor)* ?factor: NUMBER """ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=True) as temp_file: temp_file.write(grammar_content) temp_file.flush() temp_file_path = temp_file.name cfg = CFG.from_file(temp_file_path) assert cfg == CFG(grammar_content) def test_dsl_json_schema_from_file(): schema_content = """ { "type": "object", "properties": { "name": { "type": "string" } } } """ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=True) as temp_file: temp_file.write(schema_content) temp_file.flush() temp_file_path = temp_file.name schema = JsonSchema.from_file(temp_file_path) assert schema == JsonSchema(schema_content) def test_dsl_python_types_to_terms(): with pytest.raises(RecursionError): python_types_to_terms(None, 11) term = Term() assert python_types_to_terms(term) == term assert python_types_to_terms(int) == types.integer assert python_types_to_terms(float) == types.number assert python_types_to_terms(bool) == types.boolean assert python_types_to_terms(str) == types.string assert python_types_to_terms(datetime.time) == types.time assert python_types_to_terms(datetime.date) == types.date assert python_types_to_terms(datetime.datetime) == types.datetime assert python_types_to_terms(dict) == types.CFG(grammars.json) string_instance = "a" assert python_types_to_terms(string_instance) == String(string_instance) int_instance = 1 assert python_types_to_terms(int_instance) == Regex(r"1") float_instance = 1.0 assert python_types_to_terms(float_instance) == Regex(r"1.0") @dataclass class DataClass: a: int b: str assert python_types_to_terms(DataClass) == JsonSchema( { "properties": {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "string"}}, "required": ["a", "b"], "title": "DataClass", "type": "object", } ) class SomeTypedDict(TypedDict): a: int b: str assert python_types_to_terms(SomeTypedDict) == JsonSchema( { "properties": {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "string"}}, "required": ["a", "b"], "title": "SomeTypedDict", "type": "object", } ) class PydanticModel(BaseModel): a: int b: str assert python_types_to_terms(PydanticModel) == JsonSchema( { "properties": {"a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "string"}}, "required": ["a", "b"], "title": "PydanticModel", "type": "object", } ) builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) assert python_types_to_terms(builder) == JsonSchema( { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": {"hi": {"type": ["integer", "string"]}}, "required": ["hi"] } ) def func(a: int, b: str): return (a, b) assert python_types_to_terms(func) == JsonSchema( { "type": "object", "properties": { "a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "string"}, }, "required": ["a", "b"], "title": "func", } ) class SomeEnum(Enum): a = "a" b = int c = func result = python_types_to_terms(SomeEnum) assert isinstance(result, Alternatives) assert len(result.terms) == 3 assert result.terms[0] == String("a") assert result.terms[1] == types.integer assert isinstance(result.terms[2], JsonSchema) schema_dict = json.loads(result.terms[2].schema) assert schema_dict == { "properties": { "a": {"title": "A", "type": "integer"}, "b": {"title": "B", "type": "string"}, }, "required": ["a", "b"], "title": "func", "type": "object", } # for generic types we only test the dispatch as the functions that # convert to terms are tested in distinct tests below assert python_types_to_terms(Literal["a", "b"]) == _handle_literal(("a", "b")) assert python_types_to_terms(Union[int, str]) == _handle_union((int, str), recursion_depth=0) assert python_types_to_terms(list[int]) == _handle_list((int,), recursion_depth=0) assert python_types_to_terms(tuple[int, str]) == _handle_tuple((int, str), recursion_depth=0) assert python_types_to_terms(dict[int, str]) == _handle_dict((int, str), recursion_depth=0) # type not supported with pytest.raises(TypeError, match="is currently not supported"): python_types_to_terms(bytes) def test_dsl_handle_literal(): literal = Literal["a", 1] result = _handle_literal(get_args(literal)) assert isinstance(result, Alternatives) assert len(result.terms) == 2 assert result.terms[0] == String("a") assert result.terms[1] == Regex(r"1") def test_dsl_handle_union(): # test simple Union simple_union = Union[int, str] result = _handle_union(get_args(simple_union), recursion_depth=0) assert isinstance(result, Alternatives) assert len(result.terms) == 2 assert result.terms[0] == types.integer assert result.terms[1] == types.string # test with Optional[T] optional_type = PyOptional[int] result = _handle_union(get_args(optional_type), recursion_depth=0) assert isinstance(result, Alternatives) assert len(result.terms) == 2 assert result.terms[0] == types.integer assert result.terms[1] == String("None") # test with more complex types class TestModel(BaseModel): field: str class TestEnum(Enum): a = "a" b = "b" complex_union = Union[TestModel, TestEnum] result = _handle_union(get_args(complex_union), recursion_depth=0) assert isinstance(result, Alternatives) assert len(result.terms) == 2 assert isinstance(result.terms[0], JsonSchema) assert isinstance(result.terms[1], Alternatives) assert len(result.terms[1].terms) == 2 assert result.terms[1].terms[0] == String("a") assert result.terms[1].terms[1] == String("b") def test_dsl_handle_list(): with pytest.raises(TypeError): _handle_list(None, recursion_depth=0) with pytest.raises(TypeError): _handle_list((), recursion_depth=0) with pytest.raises(TypeError): _handle_list((int, str), recursion_depth=0) # simple type list_type = list[int] result = _handle_list(get_args(list_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 4 assert result.terms[0] == String("[") assert result.terms[1] == types.integer assert isinstance(result.terms[2], KleeneStar) assert result.terms[2].term == Sequence([String(", "), types.integer]) assert result.terms[3] == String("]") # more complex type list_type = list[Union[int, str]] result = _handle_list(get_args(list_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 4 assert result.terms[0] == String("[") assert result.terms[1] == _handle_union(get_args(Union[int, str]), recursion_depth=0) assert isinstance(result.terms[2], KleeneStar) assert result.terms[2].term == Sequence([String(", "), _handle_union(get_args(Union[int, str]), recursion_depth=0)]) assert result.terms[3] == String("]") def test_dsl_handle_tuple(): # empty tuple tuple_type = Tuple[()] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, String) assert result.value == "()" # tuple with ellipsis tuple_type = tuple[int, ...] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 4 assert result.terms[0] == String("(") assert result.terms[1] == types.integer assert isinstance(result.terms[2], KleeneStar) assert result.terms[2].term == Sequence([String(", "), types.integer]) assert result.terms[3] == String(")") # tuple with fixed length tuple_type = tuple[int, str] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 5 assert result.terms[0] == String("(") assert result.terms[1] == types.integer assert result.terms[2] == String(", ") assert result.terms[3] == types.string assert result.terms[4] == String(")") # tuple with fixed length and complex types tuple_type = tuple[int, Union[str, int]] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 5 assert result.terms[0] == String("(") assert result.terms[1] == types.integer assert result.terms[2] == String(", ") assert result.terms[3] == _handle_union(get_args(Union[str, int]), recursion_depth=0) assert result.terms[4] == String(")") def test_dsl_handle_dict(): # args of incorrect length with pytest.raises(TypeError): incorrect_dict_type = dict[int, str, int] _handle_dict(get_args(incorrect_dict_type), recursion_depth=0) # correct type dict_type = dict[int, str] result = _handle_dict(get_args(dict_type), recursion_depth=0) assert isinstance(result, Sequence) assert len(result.terms) == 3 assert result.terms[0] == String("{") assert isinstance(result.terms[1], Optional) assert isinstance(result.terms[1].term, Sequence) assert len(result.terms[1].term.terms) == 4 assert result.terms[1].term.terms[0] == types.integer assert result.terms[1].term.terms[1] == String(":") assert result.terms[1].term.terms[2] == types.string assert result.terms[1].term.terms[3] == KleeneStar(Sequence([String(", "), types.integer, String(":"), types.string])) assert result.terms[2] == String("}") def test_ensure_json_quoted_string(): """String terms are wrapped in double-quote delimiters.""" term = String("hello") result = _ensure_json_quoted(term) assert isinstance(result, String) assert result == String('"hello"') def test_ensure_json_quoted_alternatives(): """Each branch of an Alternatives is independently quoted.""" term = Alternatives([String("a"), String("b")]) result = _ensure_json_quoted(term) assert isinstance(result, Alternatives) assert len(result.terms) == 2 for branch in result.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') def test_ensure_json_quoted_passthrough(): """Non-String, non-Alternatives terms are returned unchanged.""" regex_term = types.integer assert _ensure_json_quoted(regex_term) is regex_term seq = Sequence([String("a"), String("b")]) assert _ensure_json_quoted(seq) is seq def test_list_of_literals_quoted(): """Literal strings inside List are JSON-quoted.""" list_type = list[Literal["cat", "dog"]] result = _handle_list(get_args(list_type), recursion_depth=0) assert isinstance(result, Sequence) assert result.terms[0] == String("[") item = result.terms[1] assert isinstance(item, Alternatives) for branch in item.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') def test_tuple_of_literals_quoted(): """Literal strings inside fixed Tuple are JSON-quoted.""" tuple_type = Tuple[Literal["x"], Literal["y"]] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, Sequence) assert result.terms[0] == String("(") first_item = result.terms[1] assert isinstance(first_item, Alternatives) assert isinstance(first_item.terms[0], String) assert first_item.terms[0].value.startswith('"') def test_dict_literal_key_quoted(): """Literal string keys in Dict are JSON-quoted.""" dict_type = dict[Literal["k1", "k2"], int] result = _handle_dict(get_args(dict_type), recursion_depth=0) assert isinstance(result, Sequence) inner = result.terms[1] assert isinstance(inner, Optional) key_term = inner.term.terms[0] assert isinstance(key_term, Alternatives) for branch in key_term.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') def test_list_of_int_unchanged(): """Non-string types in List are not wrapped in quotes.""" list_type = list[int] result = _handle_list(get_args(list_type), recursion_depth=0) assert result.terms[1] == types.integer def test_ensure_json_quoted_sequence_passthrough(): """A Sequence term (already structured) passes through unchanged.""" seq = Sequence([String("a"), String("b")]) assert _ensure_json_quoted(seq) is seq def test_ensure_json_quoted_regex_passthrough(): """Regex terms (e.g. types.string) already include quotes internally.""" assert _ensure_json_quoted(types.string) is types.string assert _ensure_json_quoted(types.integer) is types.integer assert _ensure_json_quoted(types.boolean) is types.boolean def test_list_single_literal(): """A single-variant Literal inside list is still quoted.""" list_type = list[Literal["only"]] result = _handle_list(get_args(list_type), recursion_depth=0) item = result.terms[1] assert isinstance(item, Alternatives) branch = item.terms[0] assert isinstance(branch, String) assert branch == String('"only"') def test_dict_literal_value_quoted(): """Literal string values (not just keys) in Dict are JSON-quoted.""" dict_type = dict[str, Literal["yes", "no"]] result = _handle_dict(get_args(dict_type), recursion_depth=0) inner = result.terms[1] assert isinstance(inner, Optional) value_term = inner.term.terms[2] assert isinstance(value_term, Alternatives) for branch in value_term.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') def test_tuple_ellipsis_literal_quoted(): """Variable-length Tuple with Literal element type is JSON-quoted.""" tuple_type = Tuple[Literal["a", "b"], ...] result = _handle_tuple(get_args(tuple_type), recursion_depth=0) assert isinstance(result, Sequence) item = result.terms[1] assert isinstance(item, Alternatives) for branch in item.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') def test_list_of_bool_unchanged(): """Boolean types in List are not wrapped in quotes.""" list_type = list[bool] result = _handle_list(get_args(list_type), recursion_depth=0) assert result.terms[1] == types.boolean def test_dict_int_value_unchanged(): """Non-string value type in Dict is not wrapped in quotes.""" dict_type = dict[str, int] result = _handle_dict(get_args(dict_type), recursion_depth=0) inner = result.terms[1] assert isinstance(inner, Optional) value_term = inner.term.terms[2] assert value_term == types.integer def test_ensure_json_quoted_nested_alternatives(): """Nested Alternatives are recursively quoted.""" inner_alt = Alternatives([String("x"), String("y")]) outer_alt = Alternatives([inner_alt, String("z")]) result = _ensure_json_quoted(outer_alt) assert isinstance(result, Alternatives) inner_result = result.terms[0] assert isinstance(inner_result, Alternatives) for branch in inner_result.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') z_result = result.terms[1] assert isinstance(z_result, String) assert z_result == String('"z"') def test_literal_with_special_characters(): """Literal strings with spaces and punctuation are quoted correctly.""" list_type = list[Literal["hello world", "foo-bar"]] result = _handle_list(get_args(list_type), recursion_depth=0) item = result.terms[1] assert isinstance(item, Alternatives) assert len(item.terms) == 2 for branch in item.terms: assert isinstance(branch, String) assert branch.value.startswith('"') and branch.value.endswith('"') # --------------------------------------------------------------------------- # End-to-end regex tests for JSON quoting in containers # These verify the full pipeline: python_types_to_terms → to_regex → re.fullmatch # --------------------------------------------------------------------------- def test_e2e_list_literal_matches_quoted_json(): """List[Literal[...]] regex matches JSON-quoted strings and rejects bare words.""" pattern = to_regex(python_types_to_terms(list[Literal["Paris", "London"]])) assert _re.fullmatch(pattern, '["Paris"]') assert _re.fullmatch(pattern, '["Paris", "London"]') assert _re.fullmatch(pattern, '["London", "Paris", "London"]') assert not _re.fullmatch(pattern, "[Paris]") assert not _re.fullmatch(pattern, "['Paris']") def test_e2e_standalone_literal_no_quotes(): """Standalone Literal (not inside container) should NOT add quotes.""" pattern = to_regex(python_types_to_terms(Literal["cat", "dog"])) assert _re.fullmatch(pattern, "cat") assert _re.fullmatch(pattern, "dog") assert not _re.fullmatch(pattern, '"cat"') def test_e2e_list_literal_empty_string(): """Empty string literal inside List produces quoted empty string.""" pattern = to_regex(python_types_to_terms(list[Literal[""]])) assert _re.fullmatch(pattern, '[""]') assert _re.fullmatch(pattern, '["", ""]') assert not _re.fullmatch(pattern, "[]") def test_e2e_list_mixed_literal_string_and_int(): """Mixed Literal with string and int: only string values are quoted.""" pattern = to_regex(python_types_to_terms(list[Literal["a", 1]])) assert _re.fullmatch(pattern, '["a"]') assert _re.fullmatch(pattern, "[1]") assert _re.fullmatch(pattern, '["a", 1]') assert _re.fullmatch(pattern, '[1, "a"]') assert not _re.fullmatch(pattern, "[a]") def test_e2e_dict_literal_keys_quoted(): """Dict with Literal keys produces JSON-quoted keys.""" pattern = to_regex(python_types_to_terms(dict[Literal["k1", "k2"], int])) assert _re.fullmatch(pattern, '{"k1":0}') assert _re.fullmatch(pattern, '{"k1":42, "k2":-7}') assert not _re.fullmatch(pattern, "{k1:0}") def test_e2e_dict_literal_values_quoted(): """Dict with Literal string values produces JSON-quoted values.""" pattern = to_regex(python_types_to_terms(dict[str, Literal["yes", "no"]])) assert _re.fullmatch(pattern, '{"answer":"yes"}') assert _re.fullmatch(pattern, '{"a":"yes", "b":"no"}') def test_e2e_tuple_fixed_literal_quoted(): """Fixed-length Tuple with Literal elements produces JSON-quoted strings.""" pattern = to_regex(python_types_to_terms(Tuple[Literal["x"], Literal["y"]])) assert _re.fullmatch(pattern, '("x", "y")') assert not _re.fullmatch(pattern, "(x, y)") def test_e2e_tuple_variadic_literal_quoted(): """Variable-length Tuple with Literal produces JSON-quoted strings.""" pattern = to_regex(python_types_to_terms(Tuple[Literal["a", "b"], ...])) assert _re.fullmatch(pattern, '("a")') assert _re.fullmatch(pattern, '("a", "b", "a")') assert not _re.fullmatch(pattern, "(a)") def test_e2e_list_enum_string_values_quoted(): """Enum with string members inside List produces JSON-quoted values.""" class Color(Enum): RED = "red" BLUE = "blue" pattern = to_regex(python_types_to_terms(list[Color])) assert _re.fullmatch(pattern, '["red"]') assert _re.fullmatch(pattern, '["red", "blue"]') assert not _re.fullmatch(pattern, "[red]") def test_e2e_list_int_not_quoted(): """List[int] should not have any quoting applied.""" pattern = to_regex(python_types_to_terms(list[int])) assert _re.fullmatch(pattern, "[42]") assert _re.fullmatch(pattern, "[1, 2, 3]") assert not _re.fullmatch(pattern, '["1"]') def test_e2e_list_literal_special_characters(): """Literal strings with spaces and hyphens are quoted correctly in regex.""" pattern = to_regex(python_types_to_terms(list[Literal["hello world", "foo-bar"]])) assert _re.fullmatch(pattern, '["hello world"]') assert _re.fullmatch(pattern, '["hello world", "foo-bar"]') assert not _re.fullmatch(pattern, "[hello world]") def test_e2e_dict_literal_key_and_enum_value(): """Dict with Literal keys and Enum values: both quoted.""" class Status(Enum): ON = "on" OFF = "off" pattern = to_regex(python_types_to_terms(dict[Literal["switch"], Status])) assert _re.fullmatch(pattern, '{"switch":"on"}') assert _re.fullmatch(pattern, '{"switch":"off"}') assert not _re.fullmatch(pattern, "{switch:on}") def test_to_regex(): string_term = String("hello") assert to_regex(string_term) == r"hello" regex_term = Regex("[0-9]+") assert to_regex(regex_term) == r"([0-9]+)" json_schema_term = JsonSchema({"type": "integer"}) assert to_regex(json_schema_term) == r"((-)?(0|[1-9][0-9]*))" choice_term = Choice(["a", "b", "c"]) assert to_regex(choice_term) == r"(a|b|c)" kleene_star = KleeneStar(String("a")) assert to_regex(kleene_star) == r"(a)*" kleene_plus = KleenePlus(String("a")) assert to_regex(kleene_plus) == r"(a)+" optional_term = Optional(String("a")) assert to_regex(optional_term) == r"(a)?" alt_term = Alternatives([String("a"), String("b")]) assert to_regex(alt_term) == r"(a|b)" seq_term = Sequence([String("a"), String("b")]) assert to_regex(seq_term) == r"ab" exact_term = QuantifyExact(String("a"), 3) assert to_regex(exact_term) == r"(a){3}" min_term = QuantifyMinimum(String("a"), 2) assert to_regex(min_term) == r"(a){2,}" max_term = QuantifyMaximum(String("a"), 5) assert to_regex(max_term) == r"(a){,5}" between_term = QuantifyBetween(String("a"), 1, 3) assert to_regex(between_term) == r"(a){1,3}" with pytest.raises(TypeError): to_regex(Term()) ================================================ FILE: tests/types/test_json_schema_utils.py ================================================ import sys from dataclasses import is_dataclass from typing import Any, List, Literal, Optional from pydantic import BaseModel, TypeAdapter from pydantic_core import PydanticUndefined from outlines.types.json_schema_utils import ( schema_type_to_python, json_schema_dict_to_typeddict, json_schema_dict_to_pydantic, json_schema_dict_to_dataclass ) if sys.version_info >= (3, 12): from typing import _TypedDictMeta # type: ignore else: from typing_extensions import _TypedDictMeta # type: ignore def test_schema_type_to_python_simple_types(): assert schema_type_to_python({"type": "string"}, "pydantic") is str assert schema_type_to_python({"type": "integer"}, "pydantic") is int assert schema_type_to_python({"type": "number"}, "pydantic") is float assert schema_type_to_python({"type": "boolean"}, "pydantic") is bool assert schema_type_to_python({"type": "object"}, "foo") is Any assert schema_type_to_python({}, "pydantic") is Any def test_schema_type_to_python_enum(): schema = {"enum": ["red", "green", "blue"]} result = schema_type_to_python(schema, "pydantic") assert result == Literal[("red", "green", "blue")] def test_schema_type_to_python_array(): # String items schema = {"type": "array", "items": {"type": "string"}} result = schema_type_to_python(schema, "pydantic") assert result == List[str] # Integer items schema = {"type": "array", "items": {"type": "integer"}} result = schema_type_to_python(schema, "pydantic") assert result == List[int] # Without items specification schema = {"type": "array"} result = schema_type_to_python(schema, "pydantic") assert result == List[Any] def test_schema_type_to_python_object(): schema = { "type": "object", "title": "TestObject", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } # Pydantic caller pydantic_result = schema_type_to_python(schema, "pydantic") assert issubclass(pydantic_result, BaseModel) assert pydantic_result.__name__ == "TestObject" assert pydantic_result.model_fields["name"].annotation is str assert pydantic_result.model_fields["age"].annotation == Optional[int] # Typeddict caller typeddict_result = schema_type_to_python(schema, "typeddict") assert isinstance(typeddict_result, _TypedDictMeta) assert typeddict_result.__name__ == "TestObject" assert typeddict_result.__annotations__["name"] is str assert typeddict_result.__annotations__["age"] == Optional[int] # Dataclass caller dataclass_result = schema_type_to_python(schema, "dataclass") print(TypeAdapter(dataclass_result).json_schema()) assert hasattr(dataclass_result, "__dataclass_fields__") assert dataclass_result.__annotations__["name"] is str assert not hasattr(dataclass_result, "name") assert dataclass_result.__annotations__["age"] is int assert dataclass_result.age is None def test_schema_type_to_python_unknown_type(): # Unknown type schema = {"type": "unknown"} result = schema_type_to_python(schema, "pydantic") assert result == Any # Schema without type schema = {} result = schema_type_to_python(schema, "pydantic") assert result == Any def test_json_schema_dict_to_typeddict_basic(): schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } result = json_schema_dict_to_typeddict(schema, "Person") assert isinstance(result, _TypedDictMeta) assert result.__name__ == "Person" annotations = result.__annotations__ assert annotations["name"] is str assert annotations["age"] == Optional[int] def test_json_schema_dict_to_typeddict_array_enum(): schema = { "type": "object", "properties": { "tags": { "type": "array", "items": {"type": "string"} }, "preferences": { "enum": ["light", "dark"] } }, "required": ["tags"] } result = json_schema_dict_to_typeddict(schema) assert isinstance(result, _TypedDictMeta) assert result.__name__ == "AnonymousTypedDict" annotations = result.__annotations__ assert annotations["tags"] == List[str] assert annotations["preferences"] == Optional[Literal[("light", "dark")]] def test_json_schema_dict_to_typeddict_nested_object(): schema = { "type": "object", "properties": { "field": { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } }, "required": ["field"] } result = json_schema_dict_to_typeddict(schema) assert isinstance(result, _TypedDictMeta) assert result.__name__ == "AnonymousTypedDict" annotations = result.__annotations__ assert isinstance(annotations["field"], _TypedDictMeta) assert annotations["field"].__name__ == "AnonymousTypedDict" assert annotations["field"].__annotations__["name"] is str assert annotations["field"].__annotations__["age"] == Optional[int] def test_json_schema_dict_to_pydantic_basic(): schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } result = json_schema_dict_to_pydantic(schema, "Person") assert issubclass(result, BaseModel) assert result.__name__ == "Person" assert result.model_fields["name"].annotation is str assert result.model_fields["age"].annotation == Optional[int] assert result.model_fields["name"].default == PydanticUndefined result.model_fields["age"].default is None def test_json_schema_dict_to_pydantic_array_enum(): schema = { "type": "object", "properties": { "tags": { "type": "array", "items": {"type": "string"} }, "status": { "enum": ["active", "inactive", "pending"] }, }, "required": ["status"] } result = json_schema_dict_to_pydantic(schema) assert issubclass(result, BaseModel) assert result.__name__ == "AnonymousPydanticModel" assert result.model_fields["tags"].annotation == Optional[List[str]] assert result.model_fields["status"].annotation == Literal[("active", "inactive", "pending")] assert result.model_fields["tags"].default is None assert result.model_fields["status"].default == PydanticUndefined def test_json_schema_dict_to_pydantic_nested_object(): schema = { "type": "object", "properties": { "field": { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } }, "required": ["field"] } result = json_schema_dict_to_pydantic(schema) assert issubclass(result, BaseModel) assert result.__name__ == "AnonymousPydanticModel" assert issubclass(result.model_fields["field"].annotation, BaseModel) assert result.model_fields["field"].annotation.__name__ == "AnonymousPydanticModel" field = result.model_fields["field"].annotation assert field.model_fields["name"].annotation is str assert field.model_fields["age"].annotation == Optional[int] assert field.model_fields["name"].default == PydanticUndefined assert field.model_fields["age"].default is None def test_json_schema_dict_to_dataclass_basic(): schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } result = json_schema_dict_to_dataclass(schema, "Person") assert is_dataclass(result) assert result.__name__ == "Person" annotations = result.__annotations__ assert annotations["name"] is str assert annotations["age"] is int assert not hasattr(result, "name") assert result.age is None def test_json_schema_dict_to_dataclass_array_enum(): schema = { "type": "object", "properties": { "status": { "enum": ["active", "inactive", "pending"] }, "tags": { "type": "array", "items": {"type": "string"} }, }, "required": ["status"] } result = json_schema_dict_to_dataclass(schema) assert is_dataclass(result) assert result.__name__ == "AnonymousDataclass" annotations = result.__annotations__ assert annotations["tags"] == List[str] assert annotations["status"] == Literal[("active", "inactive", "pending")] assert not hasattr(result, "status") assert result.tags is None def test_json_schema_dict_to_dataclass_nested_object(): schema = { "type": "object", "properties": { "field": { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"} }, "required": ["name"] } }, "required": ["field"] } result = json_schema_dict_to_dataclass(schema) assert is_dataclass(result) assert result.__name__ == "AnonymousDataclass" annotations = result.__annotations__ assert is_dataclass(annotations["field"]) assert annotations["field"].__name__ == "AnonymousDataclass" field = annotations["field"] assert field.__annotations__["name"] is str assert field.__annotations__["age"] is int assert not hasattr(field, "name") assert field.age is None ================================================ FILE: tests/types/test_to_regex.py ================================================ import pytest from outlines.types.dsl import ( Choice, String, Regex, JsonSchema, KleeneStar, KleenePlus, QuantifyBetween, QuantifyExact, QuantifyMaximum, QuantifyMinimum, Sequence, Alternatives, Optional, Term, to_regex, ) def test_to_regex_simple(): a = String("a") assert to_regex(a) == "a" assert a.matches("a") is True a = Regex("[0-9]") assert to_regex(a) == "([0-9])" assert a.matches(0) is True assert a.matches(10) is False assert a.matches("a") is False a = JsonSchema({"type": "integer"}) assert to_regex(a) == r"((-)?(0|[1-9][0-9]*))" assert a.matches(1) is True assert a.matches("1") is True assert a.matches("a") is False a = Choice(["a", "b"]) assert to_regex(a) == "(a|b)" assert a.matches("a") is True assert a.matches("b") is True assert a.matches("c") is False a = Optional(String("a")) assert to_regex(a) == "(a)?" assert a.matches("") is True assert a.matches("a") is True a = KleeneStar(String("a")) assert to_regex(a) == "(a)*" assert a.matches("") is True assert a.matches("a") is True assert a.matches("aaaaa") is True a = KleenePlus(String("a")) assert to_regex(a) == "(a)+" assert a.matches("") is False assert a.matches("a") is True assert a.matches("aaaaa") is True a = QuantifyExact(String("a"), 2) assert to_regex(a) == "(a){2}" assert a.matches("a") is False assert a.matches("aa") is True assert a.matches("aaa") is False a = QuantifyMinimum(String("a"), 2) assert to_regex(a) == "(a){2,}" assert a.matches("a") is False assert a.matches("aa") is True assert a.matches("aaa") is True a = QuantifyMaximum(String("a"), 2) assert to_regex(a) == "(a){,2}" assert a.matches("aa") is True assert a.matches("aaa") is False a = QuantifyBetween(String("a"), 1, 2) assert to_regex(a) == "(a){1,2}" assert a.matches("") is False assert a.matches("a") is True assert a.matches("aa") is True assert a.matches("aaa") is False with pytest.raises(TypeError, match="Cannot convert"): to_regex(Term()) def test_to_regex_combinations(): a = Sequence([Regex("dog|cat"), String("fish")]) assert to_regex(a) == "(dog|cat)fish" ================================================ FILE: tests/types/test_types_utils.py ================================================ import datetime import pytest import sys from dataclasses import dataclass from enum import Enum if sys.version_info >= (3, 11): from enum import member else: # Python < 3.11 doesn't have enum.member, but also doesn't warn about partial in enums def member(x): # type: ignore[no-redef] return x from functools import partial from typing import ( Annotated, Any, Dict, List, Literal, NewType, Optional, Tuple, Union ) from genson import SchemaBuilder from pydantic import BaseModel from outlines.types.dsl import Choice, JsonSchema from outlines.types.utils import ( get_enum_from_choice, get_enum_from_literal, get_schema_from_enum, get_schema_from_signature, is_bool, is_callable, is_date, is_dataclass, is_datetime, is_enum, is_float, is_float_instance, is_genson_schema_builder, is_int, is_int_instance, is_literal, is_native_dict, is_pydantic_model, is_str, is_str_instance, is_time, is_typed_dict, is_typing_dict, is_typing_list, is_typing_tuple, is_union ) if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict # Type identification @pytest.fixture def sample_enum(): class SampleEnum(Enum): A = 1 B = 2 return SampleEnum @pytest.fixture def sample_complex_enum(): def add_func(a: float, b: float) -> float: return a + b class SampleComplexEnum(Enum): add = member(partial(add_func)) a = "a" b = 2 return SampleComplexEnum @pytest.fixture def sample_empty_enum(): def add_func(a: float, b: float) -> float: return a + b # the enum is empty because the function is not registered as callable class SampleEmptyEnum(Enum): add = add_func return SampleEmptyEnum @pytest.fixture def sample_class(): class SampleClass: pass return SampleClass @pytest.fixture def sample_dataclass(): @dataclass class SampleDataclass: field1: str field2: int return SampleDataclass @pytest.fixture def sample_typed_dict(): class SampleTypedDict(TypedDict): name: str age: int return SampleTypedDict @pytest.fixture def sample_pydantic_model(): class SamplePydanticModel(BaseModel): name: str age: int return SamplePydanticModel @pytest.fixture def sample_schema_builder(): builder = SchemaBuilder() builder.add_schema({"type": "object", "properties": {}}) builder.add_object({"hi": "there"}) builder.add_object({"hi": 5}) return builder @pytest.fixture def sample_function(): def sample_function(foo: str, bar: List[int]): pass return sample_function @pytest.fixture def sample_function_missing_type(): def sample_function(foo, bar: List[int]): pass return sample_function def test_is_int(): assert is_int(int) assert not is_int(float) assert not is_int(1) assert not is_int(List[int]) assert not is_int(Dict[int, int]) assert is_int(Annotated[int, "some metadata"]) assert not is_int(Annotated[str, "some metadata"]) assert is_int(NewType("UserId", int)) assert not is_int(NewType("UserId", str)) def test_is_int_instance(): assert is_int_instance(1) assert not is_int_instance(True) assert not is_int_instance(1.0) assert not is_int_instance("1") assert not is_int_instance(int) def test_is_float(): assert is_float(float) assert not is_float(int) assert not is_float(1.0) assert not is_float(List[float]) assert not is_float(Dict[float, float]) assert is_float(Annotated[float, "some metadata"]) assert not is_float(Annotated[int, "some metadata"]) assert is_float(NewType("UserId", float)) assert not is_float(NewType("UserId", int)) def test_is_float_instance(): assert is_float_instance(1.0) assert not is_float_instance(1) assert not is_float_instance("1.0") assert not is_float_instance(float) def test_is_str(): assert is_str(str) assert not is_str(int) assert not is_str("hello") assert not is_str(List[str]) assert not is_str(Dict[str, str]) assert is_str(Annotated[str, "some metadata"]) assert not is_str(Annotated[int, "some metadata"]) assert is_str(NewType("UserId", str)) assert not is_str(NewType("UserId", int)) def test_is_str_instance(): assert is_str_instance("hello") assert is_str_instance("") assert is_str_instance("123") assert not is_str_instance(123) assert not is_str_instance(str) def test_is_bool(): assert is_bool(bool) assert not is_bool(int) assert not is_bool(True) assert is_bool(Annotated[bool, "some metadata"]) assert not is_bool(Annotated[int, "some metadata"]) assert is_bool(NewType("UserId", bool)) assert not is_bool(NewType("UserId", int)) def test_is_datetime(): assert is_datetime(datetime.datetime) assert not is_datetime(datetime.date) assert not is_datetime(datetime.time) assert not is_datetime(datetime.datetime.now()) def test_is_date(): assert is_date(datetime.date) assert not is_date(datetime.datetime) assert not is_date(datetime.time) assert not is_date(datetime.date.today()) def test_is_time(): assert is_time(datetime.time) assert not is_time(datetime.datetime) assert not is_time(datetime.date) assert not is_time(datetime.time(12, 30)) def test_is_native_dict(): assert is_native_dict(dict) assert not is_native_dict({}) assert not is_native_dict({"key": "value"}) assert not is_native_dict(list) assert not is_native_dict(dict[str, int]) def test_is_typing_dict(): assert is_typing_dict(dict[str, int]) assert is_typing_dict(Dict[int, str]) assert not is_typing_dict(dict) assert not is_typing_dict({}) def test_is_typing_list(): assert is_typing_list(list[int]) assert is_typing_list(List[int]) assert not is_typing_list(list) assert not is_typing_list([]) assert not is_typing_list(dict) def test_is_typing_tuple(): assert is_typing_tuple(tuple[int, str]) assert is_typing_tuple(Tuple[int, str]) assert not is_typing_tuple(tuple) assert not is_typing_tuple(()) assert not is_typing_tuple(list) def test_is_union(): assert is_union(Union[int, str]) assert is_union(Optional[int]) assert not is_union(list) assert not is_union(["a", "b"]) assert not is_union(Literal[int, str]) def test_is_literal(): assert is_literal(Literal["a", "b"]) assert not is_literal(str) assert not is_literal("a") assert not is_literal(["a", "b"]) assert not is_literal(Union[str, int]) def test_is_dataclass( sample_dataclass, sample_class, sample_typed_dict, sample_pydantic_model ): assert is_dataclass(sample_dataclass) assert not is_dataclass(sample_dataclass(field1="test", field2=123)) assert not is_dataclass(dict) assert not is_dataclass(sample_class) assert not is_dataclass(sample_typed_dict) assert not is_dataclass(sample_pydantic_model) def test_is_typed_dict( sample_typed_dict, sample_class, sample_dataclass, sample_pydantic_model ): assert is_typed_dict(sample_typed_dict) assert not is_typed_dict(sample_typed_dict(name="test", age=30)) assert not is_typed_dict(dict) assert not is_typed_dict(sample_class) assert not is_typed_dict(sample_dataclass) assert not is_typed_dict(sample_pydantic_model) def test_is_pydantic_model( sample_pydantic_model, sample_class, sample_dataclass, sample_typed_dict ): assert is_pydantic_model(sample_pydantic_model) assert not is_pydantic_model(sample_pydantic_model(name="test", age=30)) # Instance assert not is_pydantic_model(dict) assert not is_pydantic_model(sample_class) assert not is_pydantic_model(sample_dataclass) assert not is_pydantic_model(sample_typed_dict) def test_is_genson_schema_builder( sample_schema_builder, sample_class, sample_dataclass, sample_typed_dict, sample_pydantic_model ): assert is_genson_schema_builder(sample_schema_builder) assert not is_genson_schema_builder(dict) assert not is_genson_schema_builder(str) assert not is_genson_schema_builder({"type": 'object', "properties": {}}) assert not is_genson_schema_builder('{"type": "object", "properties": {}}') assert not is_genson_schema_builder(sample_class) assert not is_genson_schema_builder(sample_dataclass) assert not is_genson_schema_builder(sample_typed_dict) assert not is_genson_schema_builder(sample_pydantic_model) def test_is_enum(sample_enum): assert is_enum(sample_enum) assert not is_enum(sample_enum.A) assert not is_enum(dict) assert not is_enum(Literal["a", "b"]) assert not is_enum(["a", "b"]) def test_is_callable(sample_function, sample_class, sample_dataclass, sample_typed_dict, sample_pydantic_model): assert is_callable(sample_function) assert is_callable(lambda x: x) assert not is_callable(dict) assert not is_callable(sample_class) assert not is_callable(sample_dataclass) assert not is_callable(sample_typed_dict) assert not is_callable(sample_pydantic_model) # Type conversion def test_get_enum_from_choice(sample_enum): choice = Choice(["a", "b", sample_enum.A]) enum = get_enum_from_choice(choice) assert is_enum(enum) assert enum.a.value == "a" assert enum.b.value == "b" assert getattr(enum, "SampleEnum.A").value == sample_enum.A def test_get_enum_from_literal(sample_enum): basic_enum = get_enum_from_literal(Literal["a", "b"]) assert(is_enum(basic_enum)) assert basic_enum.a.value == "a" assert basic_enum.b.value == "b" complex_enum = get_enum_from_literal(Literal["a", 1, True, None, sample_enum.A]) assert is_enum(complex_enum) assert complex_enum.a.value == "a" assert getattr(complex_enum, "1").value == 1 assert getattr(complex_enum, "True").value assert getattr(complex_enum, "None").value is None assert getattr(complex_enum, "SampleEnum.A").value == sample_enum.A def test_get_schema_from_signature(sample_function, sample_function_missing_type): result = get_schema_from_signature(sample_function) assert result["type"] == "object" assert list(result["properties"].keys()) == ["foo", "bar"] assert result["properties"]["foo"]["type"] == "string" assert result["properties"]["bar"]["type"] == "array" assert result["properties"]["bar"]["items"]["type"] == "integer" # in case of a function missing type annotations with pytest.raises(ValueError): get_schema_from_signature(sample_function_missing_type) def test_get_schema_from_enum(sample_complex_enum, sample_empty_enum): schema = get_schema_from_enum(sample_complex_enum) assert JsonSchema(schema) assert schema["title"] == sample_complex_enum.__name__ assert len(schema["oneOf"]) == len(sample_complex_enum) for elt in schema["oneOf"]: assert type(elt) in [int, float, bool, type(None), str, dict] # in case of an empty enum because the function member is not registered as callable with pytest.raises(ValueError): get_schema_from_enum(sample_empty_enum)