Showing preview only (1,287K chars total). Download the full file or copy to clipboard to get everything.
Repository: dottxt-ai/outlines
Branch: main
Commit: 54827e6d539b
Files: 239
Total size: 1.2 MB
Directory structure:
gitextract_sobc03i9/
├── .devcontainer/
│ └── devcontainer.json
├── .editorconfig
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ └── config.yml
│ ├── PULL_REQUEST_TEMPLATE/
│ │ └── pull_request_template.md
│ ├── scripts/
│ │ └── build_sdist_and_wheel.sh
│ └── workflows/
│ ├── build_documentation.yml
│ ├── publish_documentation.yml
│ ├── release_pypi.yaml
│ ├── tests.yml
│ └── tests_api_models.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pydocstyle
├── .readthedocs.yaml
├── .vscode/
│ └── settings.json
├── LICENSE
├── README.md
├── docs/
│ ├── api_reference/
│ │ └── index.md
│ ├── blog/
│ │ └── index.md
│ ├── community/
│ │ ├── contribute.md
│ │ ├── examples.md
│ │ ├── feedback.md
│ │ ├── index.md
│ │ └── versioning.md
│ ├── core_concepts.md
│ ├── examples/
│ │ ├── chain_of_density.md
│ │ ├── chain_of_thought.md
│ │ ├── classification.md
│ │ ├── dating_profiles.md
│ │ ├── deploy-using-bentoml.md
│ │ ├── deploy-using-cerebrium.md
│ │ ├── deploy-using-modal.md
│ │ ├── earnings-reports.md
│ │ ├── extract_event_details.md
│ │ ├── extract_event_details.py
│ │ ├── extraction.md
│ │ ├── index.md
│ │ ├── knowledge_graph_extraction.md
│ │ ├── models_playing_chess.md
│ │ ├── prompt_templates/
│ │ │ ├── chain_of_density.txt
│ │ │ ├── classification.txt
│ │ │ ├── react_agent.txt
│ │ │ ├── simtom_prospective_taking.txt
│ │ │ └── simtom_simulation.txt
│ │ ├── qa-with-citations.md
│ │ ├── react_agent.md
│ │ ├── read-pdfs.md
│ │ ├── receipt-digitization.md
│ │ ├── simtom.md
│ │ └── structured_generation_workflow.md
│ ├── features/
│ │ ├── advanced/
│ │ │ ├── backends.md
│ │ │ └── logits_processors.md
│ │ ├── core/
│ │ │ ├── generator.md
│ │ │ ├── inputs.md
│ │ │ └── output_types.md
│ │ ├── index.md
│ │ ├── models/
│ │ │ ├── anthropic.md
│ │ │ ├── dottxt.md
│ │ │ ├── gemini.md
│ │ │ ├── index.md
│ │ │ ├── llamacpp.md
│ │ │ ├── mistral.md
│ │ │ ├── mlxlm.md
│ │ │ ├── ollama.md
│ │ │ ├── openai.md
│ │ │ ├── openai_compatible.md
│ │ │ ├── openrouter.md
│ │ │ ├── sglang.md
│ │ │ ├── tgi.md
│ │ │ ├── transformers.md
│ │ │ ├── transformers_multimodal.md
│ │ │ ├── vllm.md
│ │ │ └── vllm_offline.md
│ │ └── utility/
│ │ ├── application.md
│ │ ├── regex_dsl.md
│ │ └── template.md
│ ├── guide/
│ │ ├── architecture.md
│ │ ├── chat_templating.md
│ │ ├── core_concepts.md
│ │ ├── fastapi_vllm_deployment.md
│ │ ├── getting_started.md
│ │ ├── installation.md
│ │ ├── migration.md
│ │ ├── selecting_an_inference_backend.md
│ │ └── vlm.md
│ ├── index.md
│ ├── overrides/
│ │ ├── home.html
│ │ └── main.html
│ └── stylesheets/
│ └── extra.css
├── environment.yml
├── examples/
│ ├── babyagi.py
│ ├── beam-cloud/
│ │ ├── README.md
│ │ └── app.py
│ ├── bentoml/
│ │ ├── .bentoignore
│ │ ├── bentofile.yaml
│ │ ├── import_model.py
│ │ ├── requirements.txt
│ │ └── service.py
│ ├── cerebrium/
│ │ ├── cerebrium.toml
│ │ └── main.py
│ ├── dating_profile.py
│ ├── llamacpp_example.py
│ ├── llamacpp_processor.py
│ ├── math_generate_code.py
│ ├── meta_prompting.py
│ ├── modal_example.py
│ ├── pick_odd_one_out.py
│ ├── prompts/
│ │ ├── babyagi_create_task.txt
│ │ ├── babyagi_perform_task.txt
│ │ ├── babyagi_prioritize_task.txt
│ │ ├── dating_profile.txt
│ │ ├── pick_odd_one_out.txt
│ │ └── self_consistency.txt
│ ├── react.py
│ ├── sampling.ipynb
│ ├── self_consistency.py
│ ├── simulation_based_inference.ipynb
│ └── vllm_offline_integration.py
├── flake.nix
├── llm.txt
├── mkdocs.yml
├── outlines/
│ ├── __init__.py
│ ├── applications.py
│ ├── backends/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── llguidance.py
│ │ ├── outlines_core.py
│ │ └── xgrammar.py
│ ├── caching.py
│ ├── generator.py
│ ├── grammars/
│ │ ├── arithmetic.lark
│ │ ├── common.lark
│ │ └── json.lark
│ ├── grammars.py
│ ├── inputs.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── anthropic.py
│ │ ├── base.py
│ │ ├── dottxt.py
│ │ ├── gemini.py
│ │ ├── llamacpp.py
│ │ ├── lmstudio.py
│ │ ├── mistral.py
│ │ ├── mlxlm.py
│ │ ├── ollama.py
│ │ ├── openai.py
│ │ ├── sglang.py
│ │ ├── tgi.py
│ │ ├── tokenizer.py
│ │ ├── transformers.py
│ │ ├── utils.py
│ │ ├── vllm.py
│ │ └── vllm_offline.py
│ ├── processors/
│ │ ├── __init__.py
│ │ ├── base_logits_processor.py
│ │ └── tensor_adapters/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── mlx.py
│ │ ├── numpy.py
│ │ └── torch.py
│ ├── py.typed
│ ├── release_note.md
│ ├── templates.py
│ └── types/
│ ├── __init__.py
│ ├── airports.py
│ ├── countries.py
│ ├── dsl.py
│ ├── json_schema_utils.py
│ ├── locale/
│ │ ├── __init__.py
│ │ └── us.py
│ └── utils.py
├── pyproject.toml
├── requirements-doc.txt
├── scripts/
│ └── gen_ref_pages.py
├── setup.cfg
├── shell.nix
└── tests/
├── __init__.py
├── backends/
│ ├── test_backends.py
│ ├── test_backends_utils.py
│ ├── test_llguidance.py
│ ├── test_outlines_core.py
│ └── test_xgrammar.py
├── cfg_samples/
│ ├── arithmetic/
│ │ ├── lots_of_ops.arithmetic.test
│ │ └── simple_math.arithmetic.test
│ └── json/
│ ├── outlines.generate.samplers.mypy.json.test
│ ├── simple_fruit.json.test
│ └── simple_fruit_no_indent.json.test
├── conftest.py
├── models/
│ ├── test_anthopic_type_adapter.py
│ ├── test_anthropic.py
│ ├── test_dottxt.py
│ ├── test_dottxt_type_adapter.py
│ ├── test_gemini.py
│ ├── test_gemini_type_adapter.py
│ ├── test_llamacpp.py
│ ├── test_llamacpp_tokenizer.py
│ ├── test_llamacpp_type_adapter.py
│ ├── test_lmstudio.py
│ ├── test_lmstudio_type_adapter.py
│ ├── test_mistral.py
│ ├── test_mistral_type_adapter.py
│ ├── test_mlxlm.py
│ ├── test_mlxlm_type_adapter.py
│ ├── test_ollama.py
│ ├── test_ollama_type_adapter.py
│ ├── test_openai.py
│ ├── test_openai_type_adapter.py
│ ├── test_sglang.py
│ ├── test_sglang_type_adapter.py
│ ├── test_tgi.py
│ ├── test_tgi_model_adapter.py
│ ├── test_tokenizer.py
│ ├── test_transformers.py
│ ├── test_transformers_multimodal.py
│ ├── test_transformers_multimodal_type_adapter.py
│ ├── test_transformers_tokenizer.py
│ ├── test_transformers_type_adapter.py
│ ├── test_utils.py
│ ├── test_vllm.py
│ ├── test_vllm_offline.py
│ ├── test_vllm_offline_type_adapter.py
│ └── test_vllm_type_adapter.py
├── processors/
│ ├── test_base_processor.py
│ └── test_tensor_adapters.py
├── test_applications.py
├── test_cache.py
├── test_generator.py
├── test_inputs.py
├── test_templates.py
├── test_utils/
│ ├── mock_lmstudio_client.py
│ ├── mock_openai_client.py
│ ├── mock_tgi_client.py
│ └── utils.py
└── types/
├── test_custom_types.py
├── test_dsl.py
├── test_json_schema_utils.py
├── test_to_regex.py
└── test_types_utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .devcontainer/devcontainer.json
================================================
{
"name": "dottxt-ai",
"image": "mcr.microsoft.com/devcontainers/python:3.12",
"runArgs": [
"--device=nvidia.com/gpu=all"
],
"hostRequirements": {
"gpu": "optional"
},
"features": {
"ghcr.io/devcontainers/features/conda:1": {},
"ghcr.io/devcontainers/features/nvidia-cuda:1": {
"installCudnn": true,
"installToolkit": true,
"cudaVersion": "12.4"
},
"ghcr.io/devcontainers/features/rust:1": {}
}
}
================================================
FILE: .editorconfig
================================================
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.yaml]
indent_size = 2
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
# Issue template inspired by NumPy's excellent template:
# https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLATE/bug-report.yml
name: 🐞 Bug report
description: Create a bug report to help us reproduce and fix it.
title: "<Please write a descriptive title>"
labels: ["bug"]
body:
- type: markdown
attributes:
value: >-
Thank you for taking the time to file a bug report. First, carefully read
the following before everything else:
- Does your issue only arise in a library that uses Outlines? If so,
submit your issue to this library's issue tracker.
- Did you check the issue tracker for open and closed issues that may be
related to your bug?
- type: textarea
attributes:
label: "Describe the issue as clearly as possible:"
validations:
required: true
- type: textarea
attributes:
label: "Steps/code to reproduce the bug:"
description: >
A short code example that reproduces the problem/missing feature. It
should be self-contained, i.e., can be copy-pasted into the Python
interpreter or run as-is via `python myproblem.py`.
placeholder: |
import outlines
<< your code here >>
render: python
validations:
required: true
- type: textarea
attributes:
label: "Expected result:"
description: >
Please describe what you expect the above example to output.
placeholder: |
<< the expected result here >>
render: shell
validations:
required: true
- type: textarea
attributes:
label: "Error message:"
description: >
Please include the full error message, if any.
placeholder: |
<< Full traceback starting from `Traceback: ...` >>
render: shell
- type: textarea
attributes:
label: "Outlines/Python version information:"
description: |
Please run the following code and paste the output here.
python -c "from outlines import _version; print(_version.__version__)";
python -c "import sys; print('Python', sys.version)";
pip freeze;
value: |
Version information
<details>
```
(command output here)
```
</details>
validations:
required: true
- type: textarea
attributes:
label: "Context for the issue:"
description: |
Please explain how this issue affects your work or why it should be prioritized.
placeholder: |
<< your explanation here >>
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
contact_links:
- name: 🤔 Questions & Help
url: https://github.com/dottxt-ai/outlines/discussions/new
about: "If you have a question about how to use Outlines, please start a discussion."
================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template.md
================================================
# 🚧 Thank you for opening a PR!
A few important guidelines and requirements before we can merge your PR:
- [ ] We should be able to understand what the PR does from its title only;
- [ ] There is a high-level description of the changes;
- [ ] *If I add a new feature*, there is an [issue][issues] discussing it already;
- [ ] There are links to *all* the relevant issues, discussions and PRs;
- [ ] The branch is rebased on the latest `main` commit;
- [ ] **Commit messages** follow these [guidelines][git-guidelines];
- [ ] One commit per logical change;
- [ ] The code respects the current **naming conventions**;
- [ ] Docstrings follow the [numpy style guide][docstring-guidelines];
- [ ] `pre-commit` is installed and configured on your machine, and you ran it before opening the PR;
- [ ] There are tests covering the changes;
- [ ] The documentation is up-to-date;
Consider opening a **Draft PR** if your work is still in progress but you would
like some feedback from other contributors.
[issues]: https://github.com/dottxt-ai/outlines/issues
[git-guidelines]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
[docstring-guidelines]: https://numpydoc.readthedocs.io/en/latest/format.html
================================================
FILE: .github/scripts/build_sdist_and_wheel.sh
================================================
#!/bin/bash
# Build sdist and wheel
python -m pip install -U pip
python -m pip install build
python -m build
# Check sdist install and imports
mkdir -p test-sdist
cd test-sdist
python -m venv venv-sdist
venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz
venv-sdist/bin/python -c "import outlines"
cd ..
# Check wheel install and imports
mkdir -p test-wheel
cd test-wheel
python -m venv venv-wheel
venv-wheel/bin/python -m pip install ../dist/outlines-*.whl
venv-wheel/bin/python -c "import outlines"
cd ..
================================================
FILE: .github/workflows/build_documentation.yml
================================================
name: Build the documentation
on:
pull_request:
types: [opened, synchronize, reopened, closed]
branches: [main]
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
build:
name: Build and Deploy Documentation Preview
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
if: github.event.action != 'closed'
run: pip install -r requirements-doc.txt
- name: Build the documentation
if: github.event.action != 'closed'
env:
GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
sed -i "1i site_url: https://dottxt-ai.github.io/outlines/pr-preview/pr-${PR_NUMBER}/" mkdocs.yml
mkdocs build
- name: Deploy to PR preview
if: github.event_name == 'pull_request'
uses: rossjrw/pr-preview-action@v1
with:
source-dir: site/
preview-branch: gh-pages
umbrella-dir: pr-preview
comment: false
- name: Comment PR with preview link
if: github.event_name == 'pull_request' && github.event.action != 'closed'
uses: actions/github-script@v7
with:
script: |
const prNumber = context.issue.number;
const previewUrl = `https://dottxt-ai.github.io/outlines/pr-preview/pr-${prNumber}/`;
// Find existing preview comment
const comments = await github.rest.issues.listComments({
issue_number: prNumber,
owner: context.repo.owner,
repo: context.repo.repo,
});
const botComment = comments.data.find(comment =>
comment.user.type === 'Bot' &&
comment.body.includes('Documentation preview')
);
const commentBody = `📚 **Documentation preview**: ${previewUrl}\n\n*Preview updates automatically with each commit.*`;
// Update existing comment or create new one
if (botComment) {
await github.rest.issues.updateComment({
comment_id: botComment.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
} else {
await github.rest.issues.createComment({
issue_number: prNumber,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
}
================================================
FILE: .github/workflows/publish_documentation.yml
================================================
name: Publish the documentation
on:
workflow_dispatch:
push:
branches:
- main
release:
types:
- created
permissions:
contents: write
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
python-version: 3.x
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
- uses: actions/cache@v3
with:
key: mkdocs-material-${{ env.cache_id }}
path: .cache
restore-keys: |
mkdocs-material-
- run: pip install -r requirements-doc.txt
- run: mkdocs build
- name: Set up Git
run: |
git config user.name ${{ github.actor }}
git config user.email ${{ github.actor }}@users.noreply.github.com
- name: Publish Tag as latest
env:
GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
if: github.event_name == 'release'
run: |
mike deploy --push --update-aliases ${{ github.ref_name }} latest
mike set-default --push latest
- name: Publish main as unstable
env:
GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
if: github.event_name == 'push'
run: |
mike deploy --push --update-aliases ${{ github.ref_name }} unstable
================================================
FILE: .github/workflows/release_pypi.yaml
================================================
name: Release PyPi
on:
release:
types:
- created
jobs:
release-job:
name: Build and publish on PyPi
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.10"
- name: Build SDist and Wheel
run: ./.github/scripts/build_sdist_and_wheel.sh
- name: Check that the package version matches the Release name
run: |
grep -Rq "^Version: ${GITHUB_REF:10}$" outlines.egg-info/PKG-INFO
- name: Publish to PyPi
uses: pypa/gh-action-pypi-publish@v1.4.2
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN }}
================================================
FILE: .github/workflows/tests.yml
================================================
name: Tests
on:
pull_request:
branches: [main,v1.0]
push:
branches: [main]
jobs:
style:
name: Check the code style
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.13"
- uses: pre-commit/action@v3.0.0
tests:
name: Run the tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.13"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: 'pyproject.toml'
- name: Free disk space
run: |
set -eux
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf /usr/local/lib/android || true
sudo apt-get clean
df -h
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sh
ollama --version
ollama pull tinyllama
- name: Set up test environment
run: |
python -m pip install --upgrade pip
pip install uv
uv sync --no-group test-gpu --extra test
- name: cache HuggingFace models
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
hf-${{ runner.os }}-
- name: Create matrix id
id: matrix-id
env:
MATRIX_CONTEXT: ${{ toJson(matrix) }}
run: |
echo $MATRIX_CONTEXT
export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32`
echo $MATRIX_ID
echo "::set-output name=id::$MATRIX_ID"
- name: Run tests
run: |
rm -f .coverage*
uv run coverage erase
uv run python -m coverage run --branch --source=outlines --parallel-mode -m pytest -x -m 'not api_call'
- name: Upload coverage data
uses: actions/upload-artifact@v4
with:
name: coverage-data-${{ matrix.python-version }}
path: .coverage.*
if-no-files-found: ignore
include-hidden-files: true
coverage:
name: Combine & check coverage.
needs: tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
cache: pip
python-version: "3.11"
- name: Set up environment
run: |
pip install --upgrade "coverage[toml]>=5.1" diff-cover
- uses: actions/download-artifact@v4
with:
pattern: coverage-data-*
merge-multiple: true
- name: Combine coverage & fail if it's <100%.
run: |
python -m coverage combine
python -m coverage html --skip-covered --skip-empty
python -m coverage xml
python -m coverage report --fail-under=100 || (python -m coverage report && exit 1)
- name: Upload HTML report if check failed.
uses: actions/upload-artifact@v4
with:
name: html-report
path: htmlcov
overwrite: true
if: ${{ failure() }}
build-wheel:
name: Build Wheel and Test SDist
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Build SDist and Wheel
run: ./.github/scripts/build_sdist_and_wheel.sh
================================================
FILE: .github/workflows/tests_api_models.yml
================================================
name: API Models Tests
on:
workflow_dispatch:
jobs:
tests:
name: Run API Models Tests
runs-on: ubuntu-latest
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
DOTTXT_API_KEY: ${{ secrets.DOTTXT_API_KEY }}
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.ref }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: 'pyproject.toml'
- name: Free disk space
run: |
set -eux
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf /usr/local/lib/android || true
sudo apt-get clean
df -h
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sh
ollama --version
ollama pull tinyllama
- name: Set up test environment
run: |
python -m pip install --upgrade pip
pip install uv
uv sync --no-group test-gpu --extra test
- name: cache HuggingFace models
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
hf-${{ runner.os }}-
- name: Create matrix id
id: matrix-id
env:
MATRIX_CONTEXT: ${{ toJson(matrix) }}
run: |
echo $MATRIX_CONTEXT
export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32`
echo $MATRIX_ID
echo "::set-output name=id::$MATRIX_ID"
- name: Run tests
run: |
uv run pytest -m 'api_call' --ignore=tests/models/test_dottxt.py
env:
COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }}
================================================
FILE: .gitignore
================================================
__pycache__
.benchmarks
.cache
.coverage
.direnv
.env
.idea
.pytest_cache
.python-version
.venv
*_version.py
*.egg-info
*.gguf
benchmarks/results
build
docs/build
logs
.worktrees/
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-merge-conflict
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.14.1
hooks:
- id: mypy
args: [--allow-redefinition]
exclude: ^examples/
additional_dependencies: [types-tqdm, types-Pillow]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.1
hooks:
- id: ruff
args: ["--config=pyproject.toml"]
================================================
FILE: .pydocstyle
================================================
[pydocstyle]
convention = numpy
================================================
FILE: .readthedocs.yaml
================================================
version: 2
python:
version: "3.8"
install:
- method: pip
path: .
extra_requirements:
- rtd
- requirements: requirements-doc.txt
sphinx:
builder: html
configuration: docs/source/conf.py
fail_on_warning: true
================================================
FILE: .vscode/settings.json
================================================
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2023- The Outlines developers
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
<div align="center" style="margin-bottom: 1em;">
<img src="./docs/assets/images/logo-light-mode.svg#gh-light-mode-only" alt="Outlines Logo" width=300></img>
<img src="./docs/assets/images/logo-dark-mode.svg#gh-dark-mode-only" alt="Outlines Logo" width=300></img>
🗒️ *Structured outputs for LLMs* 🗒️
Made with ❤👷️ by the team at [.txt](https://dottxt.co)
<br>Trusted by NVIDIA, Cohere, HuggingFace, vLLM, etc.
<!-- Project Badges -->
[![PyPI Version][pypi-version-badge]][pypi]
[![Downloads][downloads-badge]][pypistats]
[![Stars][stars-badge]][stars]
<!-- Community Badges -->
[![Discord][discord-badge]][discord]
[![Blog][dottxt-blog-badge]][dottxt-blog]
[![Twitter][twitter-badge]][twitter]
</div>
## 🚀 Building the future of structured generation
We're working with select partners to develop new interfaces to structured generation.
Need XML, FHIR, custom schemas or grammars? Let's talk.
Audit your schema: share one schema, we show you what breaks under generation, the constraints that fix it, and compliance rates before and after. Sign up [here](https://h1xbpbfsf0w.typeform.com/to/rtFUraA2?typeform).
## Table of Contents
- [Why Outlines?](#why-outlines)
- [Quickstart](#quickstart)
- [Real-World Examples](#real-world-examples)
- [🙋♂️ Customer Support Triage](#customer-support-triage)
- [📦 E-commerce Product Categorization](#e-commerce-product-categorization)
- [📊 Parse Event Details with Incomplete Data](#parse-event-details-with-incomplete-data)
- [🗂️ Categorize Documents into Predefined Types](#categorize-documents-into-predefined-types)
- [📅 Schedule a Meeting with Function Calling](#schedule-a-meeting-with-function-calling)
- [📝 Dynamically Generate Prompts with Re-usable Templates](#dynamically-generate-prompts-with-re-usable-templates)
- [They Use Outlines](#they-use-outlines)
- [Model Integrations](#model-integrations)
- [Core Features](#core-features)
- [Other Features](#other-features)
- [About .txt](#about-txt)
- [Community](#community)
<div align="center"><img src="./docs/assets/images/install.png" width=300></img></div>
## Why Outlines?
LLMs are powerful but their outputs are unpredictable. Most solutions attempt to fix bad outputs after generation using parsing, regex, or fragile code that breaks easily.
Outlines guarantees structured outputs during generation — directly from any LLM.
- **Works with any model** - Same code runs across OpenAI, Ollama, vLLM, and more
- **Simple integration** - Just pass your desired output type: `model(prompt, output_type)`
- **Guaranteed valid structure** - No more parsing headaches or broken JSON
- **Provider independence** - Switch models without changing code
### The Outlines Philosophy
<div align="center"><img src="./docs/assets/images/use_philosophy.png" width=300></img></div>
Outlines follows a simple pattern that mirrors Python's own type system. Simply specify the desired output type, and Outlines will ensure your data matches that structure exactly:
- For a yes/no response, use `Literal["Yes", "No"]`
- For numerical values, use `int`
- For complex objects, define a structure with a [Pydantic model](https://docs.pydantic.dev/latest/)
## Quickstart
Getting started with outlines is simple:
### 1. Install outlines
``` shell
pip install outlines
```
### 2. Connect to your preferred model
``` python
import outlines
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
```
### 3. Start with simple structured outputs
``` python
from typing import Literal
from pydantic import BaseModel
# Simple classification
sentiment = model(
"Analyze: 'This product completely changed my life!'",
Literal["Positive", "Negative", "Neutral"]
)
print(sentiment) # "Positive"
# Extract specific types
temperature = model("What's the boiling point of water in Celsius?", int)
print(temperature) # 100
```
### 4. Create complex structures
``` python
from pydantic import BaseModel
from enum import Enum
class Rating(Enum):
poor = 1
fair = 2
good = 3
excellent = 4
class ProductReview(BaseModel):
rating: Rating
pros: list[str]
cons: list[str]
summary: str
review = model(
"Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.",
ProductReview,
max_new_tokens=200,
)
review = ProductReview.model_validate_json(review)
print(f"Rating: {review.rating.name}") # "Rating: good"
print(f"Pros: {review.pros}") # "Pros: ['great battery life', 'stunning display']"
print(f"Summary: {review.summary}") # "Summary: Good laptop with great display but thermal issues"
```
## Real-world examples
Here are production-ready examples showing how Outlines solves common problems:
<details id="customer-support-triage"><summary><b>🙋♂️ Customer Support Triage</b>
<br>This example shows how to convert a free-form customer email into a structured service ticket. By parsing attributes like priority, category, and escalation flags, the code enables automated routing and handling of support issues.
</summary>
``` python
import outlines
from enum import Enum
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import List
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
def alert_manager(ticket):
print("Alert!", ticket)
class TicketPriority(str, Enum):
low = "low"
medium = "medium"
high = "high"
urgent = "urgent"
class ServiceTicket(BaseModel):
priority: TicketPriority
category: str
requires_manager: bool
summary: str
action_items: List[str]
customer_email = """
Subject: URGENT - Cannot access my account after payment
I paid for the premium plan 3 hours ago and still can't access any features.
I've tried logging out and back in multiple times. This is unacceptable as I
have a client presentation in an hour and need the analytics dashboard.
Please fix this immediately or refund my payment.
"""
prompt = f"""
<|im_start|>user
Analyze this customer email:
{customer_email}
<|im_end|>
<|im_start|>assistant
"""
ticket = model(
prompt,
ServiceTicket,
max_new_tokens=500
)
# Use structured data to route the ticket
ticket = ServiceTicket.model_validate_json(ticket)
if ticket.priority == "urgent" or ticket.requires_manager:
alert_manager(ticket)
```
</details>
<details id="e-commerce-product-categorization"><summary><b>📦 E-commerce product categorization</b>
<br>This use case demonstrates how outlines can transform product descriptions into structured categorization data (e.g., main category, sub-category, and attributes) to streamline tasks such as inventory management. Each product description is processed automatically, reducing manual categorization overhead.
</summary>
```python
import outlines
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import List, Optional
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
def update_inventory(product, category, sub_category):
print(f"Updated {product.split(',')[0]} in category {category}/{sub_category}")
class ProductCategory(BaseModel):
main_category: str
sub_category: str
attributes: List[str]
brand_match: Optional[str]
# Process product descriptions in batches
product_descriptions = [
"Apple iPhone 15 Pro Max 256GB Titanium, 6.7-inch Super Retina XDR display with ProMotion",
"Organic Cotton T-Shirt, Men's Medium, Navy Blue, 100% Sustainable Materials",
"KitchenAid Stand Mixer, 5 Quart, Red, 10-Speed Settings with Dough Hook Attachment"
]
template = outlines.Template.from_string("""
<|im_start|>user
Categorize this product:
{{ description }}
<|im_end|>
<|im_start|>assistant
""")
# Get structured categorization for all products
categories = model(
[template(description=desc) for desc in product_descriptions],
ProductCategory,
max_new_tokens=200
)
# Use categorization for inventory management
categories = [
ProductCategory.model_validate_json(category) for category in categories
]
for product, category in zip(product_descriptions, categories):
update_inventory(product, category.main_category, category.sub_category)
```
</details>
<details id="parse-event-details-with-incomplete-data"><summary><b>📊 Parse event details with incomplete data</b>
<br>This example uses outlines to parse event descriptions into structured information (like event name, date, location, type, and topics), even handling cases where the data is incomplete. It leverages union types to return either structured event data or a fallback “I don’t know” answer, ensuring robust extraction in varying scenarios.
</summary>
```python
import outlines
from typing import Union, List, Literal
from pydantic import BaseModel
from enum import Enum
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
class EventType(str, Enum):
conference = "conference"
webinar = "webinar"
workshop = "workshop"
meetup = "meetup"
other = "other"
class EventInfo(BaseModel):
"""Structured information about a tech event"""
name: str
date: str
location: str
event_type: EventType
topics: List[str]
registration_required: bool
# Create a union type that can either be a structured EventInfo or "I don't know"
EventResponse = Union[EventInfo, Literal["I don't know"]]
# Sample event descriptions
event_descriptions = [
# Complete information
"""
Join us for DevCon 2023, the premier developer conference happening on November 15-17, 2023
at the San Francisco Convention Center. Topics include AI/ML, cloud infrastructure, and web3.
Registration is required.
""",
# Insufficient information
"""
Tech event next week. More details coming soon!
"""
]
# Process events
results = []
for description in event_descriptions:
prompt = f"""
<|im_start>system
You are a helpful assistant
<|im_end|>
<|im_start>user
Extract structured information about this tech event:
{description}
If there is enough information, return a JSON object with the following fields:
- name: The name of the event
- date: The date where the event is taking place
- location: Where the event is taking place
- event_type: either 'conference', 'webinar', 'workshop', 'meetup' or 'other'
- topics: a list of topics of the conference
- registration_required: a boolean that indicates whether registration is required
If the information available does not allow you to fill this JSON, and only then, answer 'I don't know'.
<|im_end|>
<|im_start|>assistant
"""
# Union type allows the model to return structured data or "I don't know"
result = model(prompt, EventResponse, max_new_tokens=200)
results.append(result)
# Display results
for i, result in enumerate(results):
print(f"Event {i+1}:")
if isinstance(result, str):
print(f" {result}")
else:
# It's an EventInfo object
print(f" Name: {result.name}")
print(f" Type: {result.event_type}")
print(f" Date: {result.date}")
print(f" Topics: {', '.join(result.topics)}")
print()
# Use structured data in downstream processing
structured_count = sum(1 for r in results if isinstance(r, EventInfo))
print(f"Successfully extracted data for {structured_count} of {len(results)} events")
```
</details>
<details id="categorize-documents-into-predefined-types"><summary><b>🗂️ Categorize documents into predefined types</b>
<br>In this case, outlines classifies documents into predefined categories (e.g., “Financial Report,” “Legal Contract”) using a literal type specification. The resulting classifications are displayed in both a table format and through a category distribution summary, illustrating how structured outputs can simplify content management.
</summary>
```python
import outlines
from typing import Literal, List
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
# Define classification categories using Literal
DocumentCategory = Literal[
"Financial Report",
"Legal Contract",
"Technical Documentation",
"Marketing Material",
"Personal Correspondence"
]
# Sample documents to classify
documents = [
"Q3 Financial Summary: Revenue increased by 15% year-over-year to $12.4M. EBITDA margin improved to 23% compared to 19% in Q3 last year. Operating expenses...",
"This agreement is made between Party A and Party B, hereinafter referred to as 'the Parties', on this day of...",
"The API accepts POST requests with JSON payloads. Required parameters include 'user_id' and 'transaction_type'. The endpoint returns a 200 status code on success."
]
template = outlines.Template.from_string("""
<|im_start|>user
Classify the following document into exactly one category among the following categories:
- Financial Report
- Legal Contract
- Technical Documentation
- Marketing Material
- Personal Correspondence
Document:
{{ document }}
<|im_end|>
<|im_start|>assistant
""")
# Classify documents
def classify_documents(texts: List[str]) -> List[DocumentCategory]:
results = []
for text in texts:
prompt = template(document=text)
# The model must return one of the predefined categories
category = model(prompt, DocumentCategory, max_new_tokens=200)
results.append(category)
return results
# Perform classification
classifications = classify_documents(documents)
# Create a simple results table
results_df = pd.DataFrame({
"Document": [doc[:50] + "..." for doc in documents],
"Classification": classifications
})
print(results_df)
# Count documents by category
category_counts = pd.Series(classifications).value_counts()
print("\nCategory Distribution:")
print(category_counts)
```
</details>
<details>
<summary id="schedule-a-meeting-with-function-calling"><b>📅 Schedule a meeting from requests with Function Calling</b>
<br>This example demonstrates how outlines can interpret a natural language meeting request and translate it into a structured format matching a predefined function’s parameters. Once the meeting details are extracted (e.g., title, date, duration, attendees), they are used to automatically schedule the meeting.
</summary>
```python
import outlines
import json
from typing import List, Optional
from datetime import date
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "microsoft/phi-4"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
# Define a function with typed parameters
def schedule_meeting(
title: str,
date: date,
duration_minutes: int,
attendees: List[str],
location: Optional[str] = None,
agenda_items: Optional[List[str]] = None
):
"""Schedule a meeting with the specified details"""
# In a real app, this would create the meeting
meeting = {
"title": title,
"date": date,
"duration_minutes": duration_minutes,
"attendees": attendees,
"location": location,
"agenda_items": agenda_items
}
return f"Meeting '{title}' scheduled for {date} with {len(attendees)} attendees"
# Natural language request
user_request = """
I need to set up a product roadmap review with the engineering team for next
Tuesday at 2pm. It should last 90 minutes. Please invite john@example.com,
sarah@example.com, and the product team at product@example.com.
"""
# Outlines automatically infers the required structure from the function signature
prompt = f"""
<|im_start|>user
Extract the meeting details from this request:
{user_request}
<|im_end|>
<|im_start|>assistant
"""
meeting_params = model(prompt, schedule_meeting, max_new_tokens=200)
# The result is a dictionary matching the function parameters
meeting_params = json.loads(meeting_params)
print(meeting_params)
# Call the function with the extracted parameters
result = schedule_meeting(**meeting_params)
print(result)
# "Meeting 'Product Roadmap Review' scheduled for 2023-10-17 with 3 attendees"
```
</details>
<details>
<summary id="dynamically-generate-prompts-with-re-usable-templates"><b>📝 Dynamically generate prompts with re-usable templates</b>
<br>Using Jinja-based templates, this example shows how to generate dynamic prompts for tasks like sentiment analysis. It illustrates how to easily re-use and customize prompts—including few-shot learning strategies—for different content types while ensuring the outputs remain structured.
</summary>
```python
import outlines
from typing import List, Literal
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "microsoft/phi-4"
model = outlines.from_transformers(
AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
AutoTokenizer.from_pretrained(MODEL_NAME)
)
# 1. Create a reusable template with Jinja syntax
sentiment_template = outlines.Template.from_string("""
<|im_start>user
Analyze the sentiment of the following {{ content_type }}:
{{ text }}
Provide your analysis as either "Positive", "Negative", or "Neutral".
<|im_end>
<|im_start>assistant
""")
# 2. Generate prompts with different parameters
review = "This restaurant exceeded all my expectations. Fantastic service!"
prompt = sentiment_template(content_type="review", text=review)
# 3. Use the templated prompt with structured generation
result = model(prompt, Literal["Positive", "Negative", "Neutral"])
print(result) # "Positive"
# Templates can also be loaded from files
example_template = outlines.Template.from_file("templates/few_shot.txt")
# Use with examples for few-shot learning
examples = [
("The food was cold", "Negative"),
("The staff was friendly", "Positive")
]
few_shot_prompt = example_template(examples=examples, query="Service was slow")
print(few_shot_prompt)
```
</details>
## They use outlines
<div align="center">
<img src="./docs/assets/images/readme-light.png#gh-light-mode-only" alt="Users Logo"></img>
<img src="./docs/assets/images/readme-dark.png#gh-dark-mode-only" alt="Users Logo"></img>
</div>
## Model Integrations
| Model type | Description | Documentation |
|---------|-------------|:-------------:|
| **Server Support** | vLLM and Ollama | [Server Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |
| **Local Model Support** | transformers and llama.cpp | [Model Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |
| **API Support** | OpenAI and Gemini | [API Integrations →](https://dottxt-ai.github.io/outlines/latest/features/models/) |
## Core Features
| Feature | Description | Documentation |
|---------|-------------|:-------------:|
| **Multiple Choices** | Constrain outputs to predefined options | [Multiple Choices Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#multiple-choices) |
| **Function Calls** | Infer structure from function signatures | [Function Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) |
| **JSON/Pydantic** | Generate outputs matching JSON schemas | [JSON Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#json-schemas) |
| **Regular Expressions** | Generate text following a regex pattern | [Regex Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#regex-patterns) |
| **Grammars** | Enforce complex output structures | [Grammar Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#context-free-grammars) |
## Other Features
| Feature | Description | Documentation |
|---------|-------------|:-------------:|
| **Prompt templates** | Separate complex prompts from code | [Template Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/template/) |
| **Custome types** | Intuitive interface to build complex types | [Python Types Guide →](https://dottxt-ai.github.io/outlines/latest/features/core/output_types/#basic-python-types) |
| **Applications** | Encapsulate templates and types into functions | [Application Guide →](https://dottxt-ai.github.io/outlines/latest/features/utility/application/) |
## About .txt
<div align="center">
<img src="./docs/assets/images/dottxt-light.svg#gh-light-mode-only" alt="dottxt logo" width=100></img>
<img src="./docs/assets/images/dottxt-dark.svg#gh-dark-mode-only" alt="dottxt logo" width=100></img>
</div>
Outlines is developed and maintained by [.txt](https://dottxt.co), a company dedicated to making LLMs more reliable for production applications.
Our focus is on advancing structured generation technology through:
- 🧪 **Cutting-edge Research**: We publish our findings on [structured generation](http://blog.dottxt.co/performance-gsm8k.html)
- 🚀 **Enterprise-grade solutions**: You can license [our enterprise-grade libraries](https://docs.dottxt.co).
- 🧩 **Open Source Collaboration**: We believe in building in public and contributing to the community
Follow us on [Twitter](https://twitter.com/dottxtai) or check out our [blog](https://blog.dottxt.co/) to stay updated on our latest work in making LLMs more reliable.
## Community
<div align="center" style="margin-bottom: 1em;">
[![Contributors][contributors-badge]][contributors]
[![Stars][stars-badge]][stars]
[![Downloads][downloads-badge]][pypistats]
[![Discord badge][discord-badge]][discord]
</div>
- 💡 **Have an idea?** Come chat with us on [Discord][discord]
- 🐞 **Found a bug?** Open an [issue](https://github.com/dottxt-ai/outlines/issues)
- 🧩 **Want to contribute?** Consult our [contribution guide](https://dottxt-ai.github.io/outlines/latest/community/contribute/).
## Cite Outlines
```
@article{willard2023efficient,
title={Efficient Guided Generation for Large Language Models},
author={Willard, Brandon T and Louf, R{\'e}mi},
journal={arXiv preprint arXiv:2307.09702},
year={2023}
}
```
[contributors]: https://github.com/dottxt-ai/outlines/graphs/contributors
[contributors-badge]: https://img.shields.io/github/contributors/dottxt-ai/outlines?style=flat-square&logo=github&logoColor=white&color=ECEFF4
[dottxt-blog]: https://blog.dottxt.co/
[dottxt-blog-badge]: https://img.shields.io/badge/dottxt%20blog-a6b4a3
[dottxt-twitter]: https://twitter.com/dottxtai
[dottxt-twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=social
[discord]: https://discord.gg/R9DSu34mGd
[discord-badge]: https://img.shields.io/discord/1182316225284554793?color=ddb8ca&logo=discord&logoColor=white&style=flat-square
[downloads-badge]: https://img.shields.io/pypi/dm/outlines?color=A6B4A3&logo=python&logoColor=white&style=flat-square
[pypistats]: https://pypistats.org/packages/outlines
[pypi-version-badge]: https://img.shields.io/pypi/v/outlines?style=flat-square&logoColor=white&color=ddb8ca
[pypi]: https://pypi.org/project/outlines/
[stars]: https://github.com/dottxt-ai/outlines/stargazers
[stars-badge]: https://img.shields.io/github/stars/dottxt-ai/outlines?style=flat-square&logo=github&color=BD932F&logoColor=white
[twitter-badge]: https://img.shields.io/twitter/follow/dottxtai?style=flat-square&logo=x&logoColor=white&color=bd932f
[twitter]: https://x.com/dottxtai
================================================
FILE: docs/api_reference/index.md
================================================
# API Reference
================================================
FILE: docs/blog/index.md
================================================
# Blog
================================================
FILE: docs/community/contribute.md
================================================
---
title: Contribute
---
## What contributions?
- **Documentation** contributions are very valuable to us!
- **Examples.** Show us what you did with Outlines :)
- **Bug reports** with a minimum working examples in the [issue tracker][issues]
- **Bug fixes** are always a pleasure to review.
- **New features**. Please start a new [discussion][discussions], or [come chat with us][discord] beforehand!
Note that the [issue tracker][issues] is only intended for actionable items. In doubt, open a [discussion][discussions] or [come talk to us][discord].
## How to contribute?
### Setup
First, [fork the repository on GitHub](https://github.com/dottxt-ai/outlines/fork) and clone the fork locally:
```shell
git clone git@github.com/YourUserName/outlines.git
cd outlines
```
Create a new virtual environment:
*If you are using `uv`*:
```shell
uv venv
source .venv/bin/activate
alias pip="uv pip" # ... or just remember to prepend any pip command with uv in the rest of this guide
```
*If you are using `venv`*:
```shell
python -m venv .venv
source .venv/bin/activate
```
*If you are using `conda`*:
```shell
conda env create -f environment.yml
```
Then install the dependencies in editable mode, and install the `pre-commit` hooks:
```shell
pip install -e ".[test]"
pre-commit install
```
If you own a GPU and want to run the vLLM tests you will have to run:
```shell
pip install -e ".[test-gpu]"
```
instead.
Outlines provides optional dependencies for different supported backends, which you can install with
```shell
pip install ".[vllm]"
```
A list of supported optional dependencies can be found in the [installation guide](/installation).
### Using VSCode DevContainer / GitHub Codespaces
If you want a fully pre-configured development environment, you can use VSCode DevContainers or GitHub Codespaces.
#### VSCode DevContainer
1. Ensure that the [Docker](https://www.docker.com/get-started/) daemon is running on your machine.
2. Install the [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension in VSCode.
3. Open the Outlines repository in VSCode. When prompted, **Reopen in Container** (or press `F1` and select "Remote-Containers: Reopen in Container").
4. Run the normal setup steps. Your environment will not complain about missing system dependencies!
#### GitHub Codespaces
1. Navigate to the Outlines repository on GitHub.
2. Click on the **Code** button and select the **Codespaces** tab.
3. Click **Create codespace on main** (or another branch you are working on).
4. GitHub will launch a pre-configured cloud development environment.
You will not have access to a GPU, but you'll be able to make basic contributions to the project on the go while using a fully featured web-based IDE.
### Before pushing your code
Run the tests:
```shell
pytest
```
And run the code style checks:
```shell
pre-commit run --all-files
```
### Benchmarking
Outlines uses [asv](https://asv.readthedocs.io) for automated benchmark testing. Benchmarks are run automatically before pull requests are merged to prevent performance degradation.
You can run the benchmark test suite locally with the following command:
```shell
asv run --config benchmarks/asv.conf.json
```
Caveats:
- If you're on a device with CUDA, you must add the argument `--launch-method spawn`
- Uncommitted code will not be benchmarked, you must first commit your changes.
#### Run a specific test:
```shell
asv run --config benchmarks/asv.conf.json -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```
#### Profile a specific test:
```shell
asv run --config benchmarks/asv.conf.json --profile -b bench_json_schema.JsonSchemaBenchmark.time_json_schema_to_fsm
```
#### Compare to `origin/main`
```shell
get fetch origin
asv continuous origin/main HEAD --config benchmarks/asv.conf.json
```
#### ASV PR Behavior
- **View ASV Benchmark Results:** Open the workflow, view `BENCHMARK RESULTS` section.
- Merging is blocked unless benchmarks are run for the latest commit.
- Benchmarks fail if performance degrades by more than 10% for any individual benchmark.
- The "Benchmark PR" workflow runs when it is manually dispatched, or if the `run_benchmarks` label is added to the PR they run for every commit.
### Contribute to the documentation
To work on the *documentation* you will need to install the related dependencies:
```shell
pip install -r requirements-doc.txt
```
To build the documentation and serve it locally, run the following command in the repository's root folder:
```shell
mkdocs serve
```
By following the instruction you will be able to view the documentation locally.
It will be updated every time you make a change.
## Open a Pull Request
Create a new branch on your fork, commit and push the changes:
```shell
git checkout -b new-branch
git add .
git commit -m "Changes I made"
git push origin new-branch
```
Then you can [open a pull request][pull-requests] on GitHub. It should prompt you to do so. Every subsequent change that you make on your branch will update the pull request.
Do not hesitate to open a draft PR before your contribution is ready, especially if you have questions and/or need feedback. If you need help, come tell us on [Discord][discord].
[discord]: https://discord.gg/R9DSu34mGd
[discussions]: https://github.com/dottxt-ai/outlines/discussions
[issues]: https://github.com/dottxt-ai/outlines/issues
[pull-requests]: https://github.com/dottxt-ai/outlines/pulls
================================================
FILE: docs/community/examples.md
================================================
# Community projects and articles
Publishing examples and articles about Outlines are a meaningful way to contribute to the community. Here is a list of projects we are aware of. Drop us a line if we forgot yours!
[MMSG](https://github.com/leloykun/mmsg) is a Python library for generating interleaved text and image content in a structured format you can directly pass to downstream APIs.
[Multimodal Structured Generation: CVPR's 2nd MMFM Challenge Technical Report](https://arxiv.org/abs/2406.11403) shows that Structured Generation can outperform finetuning, and maybe even multimodality, in document-image understanding tasks as part of CVPR's 2nd MMFM Challenge.
[Chess LLM Arena](https://huggingface.co/spaces/mlabonne/chessllm) is a HuggingFace Space where you can make LLMs compete in a chess match.
[LLM Data Gen](https://huggingface.co/spaces/lhoestq/LLM_DataGen) is a HuggingFace Space that generates synthetic dataset files in JSONLines format.
[Fast, High-Fidelity LLM Decoding with Regex Constraints ](https://vivien000.github.io/blog/journal/llm-decoding-with-regex-constraints.html) presents an efficient alternative to Outlines's structured generation.
[gigax](https://github.com/GigaxGames/gigax) is an Open-Source library that allows to create real-time LLM-powered NPCs for video games.
[Improving Prompt Consistency with Structured Generations](https://huggingface.co/blog/evaluation-structured-outputs) shows how structured generation can improve consistency of evaluation runs by reducing sensitivity to changes in prompt format.
[AskNews](https://asknews.app) is a news curation service processing 300k news articles per day in a structured way, with Outlines.
================================================
FILE: docs/community/feedback.md
================================================
---
title: Feedback
---
# Feedback
If Outlines has been helpful to you, let us know on [Discord][discord] or give us a shoutout on [Twitter][twitter]! It's always heartwarming ❤️
<head>
<!-- From Marvin AI's documentation -->
<!-- Their library is also awesome -->
<!-- https://www.askmarvin.ai/ -->
<style>
.tweet-masonry {
column-count: 2;
column-gap: 20px;
padding: 20px;
}
.twitter-tweet {
display: inline-block;
width: 100%;
margin-bottom: 20px;
margin-top: 0px !important;
break-inside: avoid;
}
@media (max-width: 600px) {
.tweet-masonry {
column-count: 1;
}
}
</style>
</head>
<body>
<div class="tweet-masonry">
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">I am once again reminding you that structured extraction using LLMs is going to transform every single industry in the next 10 years <a href="https://t.co/xQ3tcWnrZ8">https://t.co/xQ3tcWnrZ8</a></p>— Sam Hogan (@0xSamHogan) <a href="https://twitter.com/0xSamHogan/status/1780637917737816323?ref_src=twsrc%5Etfw">April 17, 2024</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">outline's growth is insane, using is an understatement! <a href="https://t.co/rHCNWhZdCs">https://t.co/rHCNWhZdCs</a></p>— jason liu (@jxnlco) <a href="https://twitter.com/jxnlco/status/1780618454040797554?ref_src=twsrc%5Etfw">April 17, 2024</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Outlines is an amazing lib and more popular than <a href="https://twitter.com/remilouf?ref_src=twsrc%5Etfw">@remilouf</a>’s modesty will admit. <a href="https://t.co/DfHbMPIlX1">https://t.co/DfHbMPIlX1</a> <a href="https://t.co/mDHIWJrD0C">https://t.co/mDHIWJrD0C</a></p>— Delip Rao e/σ (@deliprao) <a href="https://twitter.com/deliprao/status/1780780217180598377?ref_src=twsrc%5Etfw">April 18, 2024</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Impressive implementation of a true regex / json / grammar guided text generation <a href="https://t.co/RX5RVYaVIx">pic.twitter.com/RX5RVYaVIx</a></p>— Rohan Paul (@rohanpaul_ai) <a href="https://twitter.com/rohanpaul_ai/status/1741099984299135403?ref_src=twsrc%5Etfw">December 30, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Most underrated Github Repo in AI + LLM JSON guided Generation: <a href="https://t.co/lSB8KIet1H">https://t.co/lSB8KIet1H</a></p>— 🎙Jean-Louis Queguiner (@JiliJeanlouis) <a href="https://twitter.com/JiliJeanlouis/status/1736857292581093706?ref_src=twsrc%5Etfw">December 18, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Nice and useful. <a href="https://t.co/LX72AE0lgt">https://t.co/LX72AE0lgt</a></p>— Dan Roy (@roydanroy) <a href="https://twitter.com/roydanroy/status/1691556956941525458?ref_src=twsrc%5Etfw">August 15, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">HUGE dub for open source AI <a href="https://t.co/bYKuiEUZ1j">https://t.co/bYKuiEUZ1j</a></p>— kenneth 🖇 (@k3nnethfrancis) <a href="https://twitter.com/k3nnethfrancis/status/1691304781732843521?ref_src=twsrc%5Etfw">August 15, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">This is amazing - glad to see more outp guidance modules! <br><br>Will try this out soon I'm wondering how they translate from regex automatons to token boundaries<br><br>Also why Open Source will succeed. Even today I don't see any guided output functionality from the big providers. <a href="https://t.co/Ity2H25Klf">https://t.co/Ity2H25Klf</a></p>— Hrishi (@hrishioa) <a href="https://twitter.com/hrishioa/status/1691181499671080960?ref_src=twsrc%5Etfw">August 14, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Outlines - a library to help LLM developers guide text generation in a fast and reliable way.<br><br>"Provides generation methods that guarantee that the output will match a regular expressions, or follow a JSON schema."<br><br>Need to check this out. Reliable JSON output is a common use… <a href="https://t.co/Bkbh8vKogN">pic.twitter.com/Bkbh8vKogN</a></p>— elvis (@omarsar0) <a href="https://twitter.com/omarsar0/status/1691179888214966273?ref_src=twsrc%5Etfw">August 14, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Woah this is cool! Makes open source models more usable.<br><br>Give any LLM Function Call capability (and more) with Outlines: <a href="https://t.co/PtPykR5ZGR">https://t.co/PtPykR5ZGR</a> <a href="https://t.co/RRQjWHnIxv">https://t.co/RRQjWHnIxv</a> <a href="https://t.co/BwNnH8SMwv">pic.twitter.com/BwNnH8SMwv</a></p>— Yohei (@yoheinakajima) <a href="https://twitter.com/yoheinakajima/status/1691231912466223104?ref_src=twsrc%5Etfw">August 14, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">This is awesome! Being able to guarantee the output's structure unblocks so many applications. This is a great milestone and a fundamental building block for more advanced AI apps. <a href="https://t.co/WdwMOc7hE8">https://t.co/WdwMOc7hE8</a></p>— Guilherme Castro (@skastr052) <a href="https://twitter.com/skastr052/status/1691239359494619136?ref_src=twsrc%5Etfw">August 15, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Juggling with the unpredictable outputs of ChatGPT API lately while building my product. 😓 <br><br>Tried prompt engineering to channel its wisdom into a neat JSON, but it's like asking a cat to fetch. 🐱<br><br>Luckily, stumbled upon "Outlines" – looks like a promising way to tame the LLM… <a href="https://t.co/oYQ6q8exAS">pic.twitter.com/oYQ6q8exAS</a></p>— Charlie (@14435635Sun) <a href="https://twitter.com/14435635Sun/status/1691439342689095680?ref_src=twsrc%5Etfw">August 15, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
<blockquote class="twitter-tweet"><p lang="en" dir="ltr">A complex system of LLM input-outputs interacting with non-LLM agents and models benefits immeasurably from structured outputs. The outlines package saves so much time, <a href="https://t.co/NhVQ6NpKDR">https://t.co/NhVQ6NpKDR</a></p>— Amir Sani (@amirsani) <a href="https://twitter.com/amirsani/status/1728734266568376433?ref_src=twsrc%5Etfw">November 26, 2023</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
</div>
</body>
</html>
# Let us know!
We highly value the insights of our users, and we would love to hear from you. If you are using Outlines for your projects and would like to share your experience with us, let's connect:
- What are you building with it?
- What do you like about it?
- What challenges are you facing?
- What do you think could be improved?
To schedule an appointment follow [this link](https://cal.com/dottxt/outlines). This is exclusively intended to share your experience, please go on [Discord][discord] or [GitHub](https://github.com/dottxt-ai/outlines/discussions) for support.
[discord]: https://discord.gg/UppQmhEpe8
[twitter]: https://twitter.com/dottxtai
================================================
FILE: docs/community/index.md
================================================
# Community
Outlines exists for a community of users who believe software doesn't need to be complicated. Who share the same passion for Large Language Models but don't want to compromise on robustness. Together, we are bringing these powerful models back to the world of software.
## Connect on Discord
The Outlines community lives on our Discord server. There you can ask questions, share ideas or just chat with people like you. Don't be a stranger and [join us][discord].
[discord]: https://discord.gg/UppQmhEpe8
================================================
FILE: docs/community/versioning.md
================================================
---
title: Versioning Guide
---
# Versioning Guide
The Outlines project follows a structured versioning scheme designed to provide clarity and minimize risk for downstream dependents.
Each part of the version number (`major.minor.patch`) conveys information about the nature and impact of the changes included in the release.
- **Major Releases** includes compatibility-breaking changes to core interfaces, such as `LogitsProcessor`s and `Guides`.
- **Minor Releases** introduce changes of substance to internal or unexposed functionality. These changes are well tested and intended to maintain compatibility with existing use of core interfaces.
- **Patch Releases** address bug fixes and incorporate low-risk changes to improve stability and performance.
!!! note "Breaking Changes"
Outlines v1.0 introduced several breaking changes to the core interface. See [the migration guide](/user_guide/migration) for more details.
## Releases
Releases along with release notes can be found on the [Outlines Releases GitHub Page](https://github.com/dottxt-ai/outlines/releases).
## Version Pinning Recommendations
Here are our recommendations for managing dependencies on the Outlines package:
**Small, Risk-Tolerant Projects:** Pin to a specific major version.
**Large, Conservative Projects:** Pin to a specific minor version.
================================================
FILE: docs/core_concepts.md
================================================
---
title: Core concepts
---
# Core concepts
Coming soon. This will document various concepts at a high level, so users can understand Outlines before diving into specific implementations.
1. Constrained decoding, tokens, and the basics of logit biasing
2. Different ways to define output structure (regex, JSON schema, Pydantic models, context-free grammars)
3. How finite state machines are used to guarantee output structure
4. `Generator`, `Application`, `Template`,
5. Prompt engineering vs. structured generation
================================================
FILE: docs/examples/chain_of_density.md
================================================
# Summarize documents using Chain of Density prompting
A good summary should be informative, concise and clear. While large language models are generally good at summarizing documents, their summaries tend to be long and contain redundant information; their information density tends to be on the lower end. This is where [chain of Density](https://arxiv.org/abs/2309.04269), a new prompting technique, comes in. In this example we will show how one can implement chain of density with a few lines of code using Outlines, leveraging both Outline's prompt templating and its structured generation capabilities.
The article we will try to summarize is the first three paragraphs of the [Alan Turing page on Wikipedia](https://en.wikipedia.org/wiki/Alan_Turing):
```python
article = """
Alan Mathison Turing OBE FRS (/ˈtjʊərɪŋ/; 23 June 1912 – 7 June 1954) was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.[5] Turing was highly influential in the development of theoretical computer science, providing a formalisation of the concepts of algorithm and computation with the Turing machine, which can be considered a model of a general-purpose computer.[6][7][8] He is widely considered to be the father of theoretical computer science and artificial intelligence.[9]
Born in Maida Vale, London, Turing was raised in southern England. He graduated at King's College, Cambridge, with a degree in mathematics. Whilst he was a fellow at Cambridge, he published a proof demonstrating that some purely mathematical yes–no questions can never be answered by computation. He defined a Turing machine and proved that the halting problem for Turing machines is undecidable. In 1938, he obtained his PhD from the Department of Mathematics at Princeton University. During the Second World War, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence. For a time he led Hut 8, the section that was responsible for German naval cryptanalysis. Here, he devised a number of techniques for speeding the breaking of German ciphers, including improvements to the pre-war Polish bomba method, an electromechanical machine that could find settings for the Enigma machine. Turing played a crucial role in cracking intercepted coded messages that enabled the Allies to defeat the Axis powers in many crucial engagements, including the Battle of the Atlantic.[10][11]
After the war, Turing worked at the National Physical Laboratory, where he designed the Automatic Computing Engine, one of the first designs for a stored-program computer. In 1948, Turing joined Max Newman's Computing Machine Laboratory at the Victoria University of Manchester, where he helped develop the Manchester computers[12] and became interested in mathematical biology. He wrote a paper on the chemical basis of morphogenesis[1] and predicted oscillating chemical reactions such as the Belousov–Zhabotinsky reaction, first observed in the 1960s. Despite these accomplishments, Turing was never fully recognised in Britain during his lifetime because much of his work was covered by the Official Secrets Act.[13]
"""
```
## How Chain Of Density works
Chain Of Density starts with asking the model to generate a first long and non-specific summary. Then it asks the model to generate 4 extra summaries by proceeding in the following way:
1. Identify 1-3 entities missing in the previous summary;
2. Add all entities marked as missing in the previous step, while not dropping entities;
3. Make the summary more concise;
The prompt also asks the model to return a list of JSON objects that contain the missing entities and the new summary. This is where structured generation will come in handy :) The paper provides the prompt and an example:

We can now implement the prompt provided in the paper. We stored the prompt template in a text file, and we can load it using the `Template` class:
```python
from outlines import Template
chain_of_density = Template.from_file("prompt_templates/chain_of_density.txt")
```
??? Note
Note that we modified the prompt slightly so it returns a JSON object that contains the summaries, instead of a list of summaries.
## Outlines implementation
We will use Outline's JSON-structured generation to ensure that the model's output is consistent with the format specified in the prompt. We start with defining the JSON objects that the model is asked to return using Pydantic. One JSON object that contains a list of `Summary` objects that contain the missing entities and new summary:
```python
from pydantic import BaseModel, conlist
class Summary(BaseModel):
missing_entities: str
denser_summary: str
class Summaries(BaseModel):
summaries: conlist(Summary, max_length=5, min_length=5)
```
We now generate the prompt by passing the article we want to summarize to the prompt template previously loaded. We load a quantized version of Mistral-7B using the AutoAWQ library, and then use the `Summaries` schema to generate the summaries with structured generation:
```python
import outlines
import transformers
MODEL_NAME = "TheBloke/Mistral-7B-OpenOrca-AWQ"
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),
transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
)
prompt = chain_of_density(article=article)
result = model(prompt, Summaries, max_new_tokens=2000)
```
We can now check the results:
```python
print(result)
# {'summaries': [
# {
# 'missing_entities': 'English mathematician, cryptanalyst, philosopher',
# 'denser_summary': 'Alan Mathison Turing was an English mathematician, cryptanalyst, philosopher.'
# },
# {
# 'missing_entities': '',
# 'denser_summary': "Alan Mathison Turing was an English mathematician who was a crucial figure in WW2's Bletchley Park codebreaking centre and designed one of the first computers."
# },
# {
# 'missing_entities': 'cryptanalyst, studied, biology, father',
# 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and contributed to mathematical biology.'
# },
# {
# 'missing_entities': 'biology, morphogenesis, chemical',
# 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, studied theoretical computer science, and predicted chemical reactions in morphogenesis.
# '},
# {
# 'missing_entities': '',
# 'denser_summary': 'Alan Mathison Turing was an English cryptanalyst, developed computer science, and made strides in mathematical biology research.'
# }
# ]}
```
Not bad, considering we used a smallish model to generate the summary! Chain of Density seems to be a very effective prompting technique to generate dense summaries, even with small quantized models. Its implementation in Outlines is also very short.
Note that this is the first article I tried and it worked out of the box. Try it out on other articles, and please share the results on Twitter, or by opening [a new discussion](https://github.com/dottxt-ai/outlines/discussions/categories/show-and-tell) on the Outlines repository!
================================================
FILE: docs/examples/chain_of_thought.md
================================================
# Chain of thought
Chain of thought is a prompting technique introduced in the paper ["Chain-of-Thought Prompting Elicits Reasoning in Large Language Models"](https://arxiv.org/abs/2201.11903) where throught prompting the authors generate a series of intermediate reasoning steps which improves the ability of LLMs to perform complex reasoning.
In this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to apply chain of thought through structured output.
We use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:
```shell
pip install llama-cpp-python
```
To create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub):
```python
import llama_cpp
import outlines
llm = llama_cpp.Llama(
"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B"
),
n_gpu_layers=-1,
flash_attn=True,
n_ctx=8192,
verbose=False
)
model = outlines.from_llamacpp(llm)
```
??? note "(Optional) Store the model weights in a custom folder"
By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):
```shell
wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
```
We initialize the model:
```python
from llama_cpp import Llama
llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...)
```
## Chain of thought
We first define our Pydantic class for a reasoning step:
```python
from pydantic import BaseModel, Field
class Reasoning_Step(BaseModel):
reasoning_step: str = Field(..., description="Reasoning step")
```
We then define the Pydantic class for reasoning which will consist on a list of reasoning steps and a conclusion, and we get its JSON schema:
```python
from typing import List
class Reasoning(BaseModel):
reasoning: List[Reasoning_Step] = Field(..., description="List of reasoning steps")
conclusion: str = Field(..., description="Conclusion")
json_schema = Reasoning.model_json_schema()
```
We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):
```python
from outlines import Template
generate_hermes_prompt = Template.from_string(
"""
<|im_start|>system
You are a world class AI model who answers questions in JSON
Here's the json schema you must adhere to:
<schema>
{{ json_schema }}
</schema>
<|im_end|>
<|im_start|>user
{{ user_prompt }}
<|im_end|>
<|im_start|>assistant
<schema>
"""
)
```
For a given user prompt:
```python
user_prompt = "9.11 and 9.9 -- which is bigger?"
```
We can use `outlines.Generator` with the Pydantic class we previously defined, and call the generator with the Hermes prompt:
```python
generator = outlines.Generator(model, regex_str)
prompt = generate_hermes_prompt(json_schema=json_schema, user_prompt=user_prompt)
response = generator(prompt, max_tokens=1024, temperature=0, seed=42)
```
We obtain a series of intermediate reasoning steps as well as the conclusion:
```python
import json
json_response = json.loads(response)
print(json_response["reasoning"])
print(json_response["conclusion"])
# [{'reasoning_step': 'Both 9.11 and 9.9 are decimal numbers.'},
# {'reasoning_step': 'When comparing decimal numbers, we look at the numbers after the decimal point.'},
# {'reasoning_step': 'In this case, 9.11 has the number 1 after the decimal point, while 9.9 has the number 9.'},
# {'reasoning_step': 'Since 1 is greater than 9, 9.11 is greater than 9.9.'}]
# '9.11 is bigger.'
```
We notice that the 4th reasoning step is wrong ``Since 1 is greater than 9, 9.11 is greater than 9.9.'', so we should probably give the model some examples for this particular task.
This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).
================================================
FILE: docs/examples/classification.md
================================================
# Classification
Classification is a classic problem in NLP and finds many applications: spam detection, sentiment analysis, triaging of incoming requests, etc. We will use the example of a company that wants to sort support requests between those that require immediate attention (`URGENT`), those that can wait a little (`STANDARD`). You could easily extend the example by adding new labels.
This tutorial shows how one can implement multi-label classification using Outlines.
As always, we start with initializing the model. Since we are GPU poor we will be using a quantized version of Mistal-7B-v0.1:
```python
import outlines
import transformers
MODEL_NAME = "TheBloke/Mistral-7B-OpenOrca-AWQ"
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),
transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
)
```
We will use a prompt template stored in a text file:
```python
from outlines import Template
customer_support = Template.from_file("prompt_templates/classification.txt")
```
## Choosing between multiple choices
Outlines provides a convenient way to do multi-label classification, passing a Literal type hint to the `outlines.Generator` object:
```python
from typing import Literal
import outlines
generator = outlines.Generator(model, Literal["URGENT", "STANDARD"])
```
Outlines supports batched requests, so we will pass two requests to the model:
```python
requests = [
"My hair is one fire! Please help me!!!",
"Just wanted to say hi"
]
prompts = [customer_support(request=request) for request in requests]
```
We can now ask the model to classify the requests:
```python
labels = generator(prompts)
print(labels)
# ['URGENT', 'STANDARD']
```
## Using JSON-structured generation
Another (convoluted) way to do multi-label classification is to JSON-structured generation in Outlines. We first need to define our Pydantic schema that contains the labels:
```python
from enum import Enum
from pydantic import BaseModel
class Label(str, Enum):
urgent = "URGENT"
standard = "STANDARD"
class Classification(BaseModel):
label: Label
```
We can then create a generator with the Pydantic model we just defined and call it:
```python
generator = outlines.Generator(model, Classification)
labels = generator(prompts)
print(labels)
# ['{"label":"URGENT"}', '{ "label": "STANDARD" }']
```
================================================
FILE: docs/examples/dating_profiles.md
================================================
# Generate a synthetic dating profile from a description
In this example we will see how we can use Outlines to generate synthetic data for a dating application. This example was originally contributed by [Vibhor Kumar](https://github.com/veezbo).
```python
import json
from dataclasses import dataclass
from enum import Enum
import torch
import transformers
from pydantic import BaseModel, conlist, constr
import outlines
```
## Defining the profile with Pydantic
Here a dating profile will consist in a biography, a job, a list of interests and two question-answer pairs. The questions are written in advance by the team, and the users are asked to provide an answer:
```python
class QuestionChoice(str, Enum):
A = "The key to my heart is"
B = "The first item on my bucket list is"
C = "Perks of dating me"
D = "Message me if you also love"
E = "People would describe me as"
F = "I can beat you in a game of"
@dataclass
class QuestionAnswer:
question: QuestionChoice
answer: str
```
Users need to provide a short biography, with a minimum of 10 and a maximum of 300 characters. The application also limits job descriptions to 50 characters. In addition to the question-answer pairs, the user is required to provide a list of between 1 and 5 interests:
```python
class DatingProfile(BaseModel):
bio: constr(str, min_length=10, max_length=300)
job: constr(str, max_lengt=50)
interests: conlist(str, min_length=1, max_length=5) # type: ignore
qna1: QuestionAnswer
qna2: QuestionAnswer
```
## Prompt template and examples
We will ask the model to generate profiles from a high-level description:
```python
@dataclass
class Example:
description: str
profile: DatingProfile
```
We will use Outlines' prompt templating abilities to generate the prompt for us. This help clearly separate the general prompting logic from what is specific to an example.
```python
from outlines import Template
dating_profile_prompt = Template.from_string(
"""
You are a world-renowned matchmaker who understands the modern dating
market. Your job is to generate dating app profiles for male clients
interested in women based on a provided description. The profiles should be
authentic, show off their strengths, and maximize their likelihood of
getting matches on dating apps. Here are some examples of past clients that
you have successfully created profiles for:
{% for example in examples %}
Description:
{{ example.description }}
Profile:
{{ example.profile }}
{% endfor %}
Here is the new client who you need to create a profile for:
Description: {{ description }}
Profile:
"""
)
```
We will provide the model with several few-shot examples:
```python
samples: list[Example] = [
Example(
description="I'm an author and former professional soccer player living in Seattle who publishes popular fiction books. A typical day for me starts by hanging out with my cat, drinking a coffee, and reading as much as I can in a few hours. Then, I'll prepare a quick smoothie before starting to write for a few hours, take a break with soccer or running a few miles, and finally meet friends for dinner at a new, hip restaurant in the evening. Sometimes we go axe-throwing afterwards, or play poker, or watch a comedy show, or visit a dive bar. On my vacations, I travel extensively to countries South America, Europe, and Asia, with the goal of visiting them all!",
profile=DatingProfile(
bio="Adventurer, dreamer, author, and soccer enthusiast. Life’s too short to waste time so I make the most of each day by exploring new places and playing with my friends on the pitch. What’s your favorite way to get out and have fun?",
job="Famous Soccer Player -> Famous Author",
interests=["Soccer", "Travel", "Friends", "Books", "Fluffy Animals"],
qna1=QuestionAnswer(
question=QuestionChoice.B, answer="swim in all seven oceans!"
),
qna2=QuestionAnswer(
question=QuestionChoice.E,
answer="fun-loving, adventurous, and a little bit crazy",
),
),
),
Example(
description="I run my company and build houses for a living. I'm a big fan of the outdoors and love to go hiking, camping, and fishing. I don't like video games, but do like to watch movies. My love language is home-cooked food, and I'm looking for someone who isn't afraid to get their hands dirty.",
profile=DatingProfile(
bio="If you're looking for a Montana man who loves to get outdoors and hunt, and who's in-tune with his masculinity then I'm your guy!",
job="House Construction Manager / Entrepreneur",
interests=["Hunting", "Hiking", "The outdoors", "Home-cooked food"],
qna1=QuestionAnswer(question=QuestionChoice.A, answer="food made at home"),
qna2=QuestionAnswer(
question=QuestionChoice.C,
answer="having a man in your life who can fix anything",
),
),
),
Example(
description="I run my own Youtube channel with 10M subscribers. I love working with kids, and my audience skews pretty young too. In my free time, I play Fortnite and Roblox. I'm looking for someone who is also a gamer and likes to have fun. I'm learning Japanese in my free time as well as how to cook.",
profile=DatingProfile(
bio="Easy on the eyes (find me on Youtube!) and great with kids. What more do you need?",
job="Youtuber 10M+ subscribers",
interests=["Kids", "Gaming", "Japanese"],
qna1=QuestionAnswer(question=QuestionChoice.D, answer="anime and gaming!"),
qna2=QuestionAnswer(question=QuestionChoice.F, answer="Fortnite, gg ez"),
),
),
]
```
## Load the model
We will use Mosaic's MPT-7B model (requires 13GB of GPU memory) which can fit on a single GPU with a reasonable context window. We initialize it with Outlines:
```python
MODEL_NAME = "mosaicml/mpt-7b-8k-instruct"
config = transformers.AutoConfig.from_pretrained(
MODEL_NAME, trust_remote_code=True
)
config.init_device = "meta"
model_kwargs = {
"config": config,
"trust_remote_code": True,
"torch_dtype": torch.bfloat16,
"device_map": "cuda",
}
tf_model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME, **model_kwargs)
tf_tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
model = outlines.from_transformers(tf_model, tokenizer=tf_tokenizer)
```
## JSON-structured generation of profiles
We will now generate a dating profile from a textual description of oneself:
``` python
new_description = """I'm a laid-back lawyer who spends a lot of his free-time
gaming. I work in a corporate office, but ended up here after the start-up I
cofounded got acquired, so still play ping pong with my cool coworkers every
day. I have a bar at home where I make cocktails, which is great for
entertaining friends. I secretly like to wear suits and get a new one tailored
every few months. I also like weddings because I get to wear those suits, and
it's a good excuse for a date. I watch the latest series because I'm paying,
with my hard-earned money, for every streaming service."""
prompt = dating_profile_prompt(description=new_description, examples=samples)
profile = model(prompt, DatingProfile)
parsed_profile = DatingProfile.model_validate_json(json.loads(profile))
```
## Results
Here are a couple of results:
```json
{
"bio": """I'm an ambitious lawyer with a casual and fashionable style. I love
games and sports, but my true passion is preparing refreshing cocktails at
home and dressing to the nines at weddings. I'm currently looking for a woman
to show a good time to and get a kiss on the opulent suit I just had made.
Send resume to this inbox.""",
"job": "Lawyer",
"interests":
[
"Stylish guys",
"Gaming",
"Ping pong",
"Cocktails",
"Weddings"
],
"qna1":
{
"question": "The first item on my bucket list is",
"answer": "be married and have a family."
},
"qna2":
{
"question": "People would describe me as",
"answer": "charming, stylish, and funny."
}
}
```
```json
{
"bio": """I’m a sexy lawyer with time on my hands. I love to game and
play ping pong, but the real reason you should swipe to the right
is because I look great in a suit. Who doesn’t love a man in a
suit? Just saying. Send me a message if you think it’s time to take
your dating life to the next level.""",
"job": "Lawyer",
"interests":
[
"Gaming",
"Ping Pong",
"Tailored Suits",
"Weddings",
"Streaming Services"
],
"qna1":
{
"question": "The first item on my bucket list is",
"answer": "simulate space but stay alive for as long as possible"
},
"qna2":
{
"question": "People would describe me as",
"answer": "easy-going, a little nerdy but with a mature essence"
}
}
```
================================================
FILE: docs/examples/deploy-using-bentoml.md
================================================
# Run Outlines using BentoML
[BentoML](https://github.com/bentoml/BentoML) is an open-source model serving library for building performant and scalable AI applications with Python. It comes with tools that you need for serving optimization, model packaging, and production deployment.
In this guide, we will show you how to use BentoML to run programs written with Outlines on GPU locally and in [BentoCloud](https://www.bentoml.com/), an AI Inference Platform for enterprise AI teams. The example source code in this guide is also available in the [examples/bentoml/](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/) directory.
## Import a model
First we need to download an LLM (Mistral-7B-v0.1 in this example and you can use any other LLM) and import the model into BentoML's [Model Store](https://docs.bentoml.com/en/latest/guides/model-store.html). Let's install BentoML and other dependencies from PyPi (preferably in a virtual environment):
```shell
pip install -r requirements.txt
```
Then save the code snippet below as `import_model.py` and run `python import_model.py`.
**Note**: You need to accept related conditions on [Hugging Face](https://huggingface.co/mistralai/Mistral-7B-v0.1) first to gain access to Mistral-7B-v0.1.
```python
import bentoml
MODEL_ID = "mistralai/Mistral-7B-v0.1"
BENTO_MODEL_TAG = MODEL_ID.lower().replace("/", "--")
def import_model(model_id, bento_model_tag):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
)
with bentoml.models.create(bento_model_tag) as bento_model_ref:
tokenizer.save_pretrained(bento_model_ref.path)
model.save_pretrained(bento_model_ref.path)
if __name__ == "__main__":
import_model(MODEL_ID, BENTO_MODEL_TAG)
```
You can verify the download is successful by running:
```shell
$ bentoml models list
Tag Module Size Creation Time
mistralai--mistral-7b-v0.1:m7lmf5ac2cmubnnz 13.49 GiB 2024-04-25 06:52:39
```
## Define a BentoML Service
As the model is ready, we can define a [BentoML Service](https://docs.bentoml.com/en/latest/guides/services.html) to wrap the capabilities of the model.
We will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema:
```python
DEFAULT_SCHEMA = """{
"title": "Character",
"type": "object",
"properties": {
"name": {
"title": "Name",
"maxLength": 10,
"type": "string"
},
"age": {
"title": "Age",
"type": "integer"
},
"armor": {"$ref": "#/definitions/Armor"},
"weapon": {"$ref": "#/definitions/Weapon"},
"strength": {
"title": "Strength",
"type": "integer"
}
},
"required": ["name", "age", "armor", "weapon", "strength"],
"definitions": {
"Armor": {
"title": "Armor",
"description": "An enumeration.",
"enum": ["leather", "chainmail", "plate"],
"type": "string"
},
"Weapon": {
"title": "Weapon",
"description": "An enumeration.",
"enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
"type": "string"
}
}
}"""
```
First, we need to define a BentoML service by decorating an ordinary class (`Outlines` here) with `@bentoml.service` decorator. We pass to this decorator some configuration and GPU on which we want this service to run in BentoCloud (here an L4 with 24GB memory):
```python
import typing as t
import bentoml
from import_model import BENTO_MODEL_TAG
@bentoml.service(
traffic={
"timeout": 300,
},
resources={
"gpu": 1,
"gpu_type": "nvidia-l4",
},
)
class Outlines:
bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG)
def __init__(self) -> None:
import outlines
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load tokenizer and model from the BentoML model reference path
hf_tokenizer = AutoTokenizer.from_pretrained(self.bento_model_ref.path)
hf_model = AutoModelForCausalLM.from_pretrained(
self.bento_model_ref.path,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
device_map="cuda"
)
# Then use the loaded model with Outlines
self.model = outlines.from_transformers(hf_model, hf_tokenizer)
...
```
We then need to define an HTTP endpoint using `@bentoml.api` to decorate the method `generate` of `Outlines` class:
```python
...
@bentoml.api
async def generate(
self,
prompt: str = "Give me a character description.",
json_schema: t.Optional[str] = DEFAULT_SCHEMA,
) -> t.Dict[str, t.Any]:
import json
import outlines
from outlines.types import JsonSchema
generator = outlines.Generator(self.model, JsonSchema(json_schema))
character = generator(prompt)
return json.loads(character)
```
Here `@bentoml.api` decorator defines `generate` as an HTTP endpoint that accepts a JSON request body with two fields: `prompt` and `json_schema` (optional, which allows HTTP clients to provide their own JSON schema). The type hints in the function signature will be used to validate incoming JSON requests. You can define as many HTTP endpoints as you want by using `@bentoml.api` to decorate other methods of `Outlines` class.
Now you can save the above code to `service.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/bentoml/)), and run the code using the BentoML CLI.
## Run locally for testing and debugging
Then you can run a server locally by:
```shell
bentoml serve .
```
The server is now active at <http://localhost:3000>. You can interact with it using the Swagger UI or in other different ways:
<details>
<summary>CURL</summary>
```shell
curl -X 'POST' \
'http://localhost:3000/generate' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"prompt": "Give me a character description."
}'
```
</details>
<details>
<summary>Python client</summary>
```python
import bentoml
with bentoml.SyncHTTPClient("http://localhost:3000") as client:
response = client.generate(
prompt="Give me a character description"
)
print(response)
```
</details>
Expected output:
```shell
{
"name": "Aura",
"age": 15,
"armor": "plate",
"weapon": "sword",
"strength": 20
}
```
## Deploy to BentoCloud
After the Service is ready, you can deploy it to [BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/get-started.html) for better management and scalability. [Sign up](https://cloud.bentoml.com/signup) if you haven't got a BentoCloud account.
Make sure you have [logged in to BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html), then run the following command to deploy it.
```shell
bentoml deploy .
```
Once the application is up and running on BentoCloud, you can access it via the exposed URL.
**Note**: For custom deployment in your own infrastructure, use [BentoML to generate an OCI-compliant image](https://docs.bentoml.com/en/latest/guides/containerization.html).
================================================
FILE: docs/examples/deploy-using-cerebrium.md
================================================
# Run Outlines using Cerebrium
[Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications. They offer Serverless GPU's with low cold start times with over 12 varieties of GPU chips that auto scale and you only pay for the compute you use.
In this guide we will show you how you can use Cerebrium to run programs written with Outlines on GPUs in the cloud.
# Setup Cerebrium
First, we install Cerebrium and login to get authenticated.
```shell
pip install cerebrium
cerebrium login
```
Then let us create our first project
```shell
cerebrium init outlines-project
```
## Setup Environment and Hardware
You set up your environment and hardware in the cerebrium.toml file that was created using the init function above.
```toml
[cerebrium.deployment]
docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
[cerebrium.hardware]
cpu = 2
memory = 14.0
gpu = "AMPERE A10"
gpu_count = 1
provider = "aws"
region = "us-east-1"
[cerebrium.dependencies.pip]
outline = "==1.0.0"
transformers = "==4.38.2"
datasets = "==2.18.0"
accelerate = "==0.27.2"
```
## Setup inference
Running code in Cerebrium is like writing normal python with no special syntax. In a `main.py` file specify the following:
```python
import outlines
import transformers
from outlines.types import JsonSchema
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct"),
transformers.AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
)
schema = """{
"title": "Character",
"type": "object",
"properties": {
"name": {
"title": "Name",
"maxLength": 10,
"type": "string"
},
"age": {
"title": "Age",
"type": "integer"
},
"armor": {"$ref": "#/definitions/Armor"},
"weapon": {"$ref": "#/definitions/Weapon"},
"strength": {
"title": "Strength",
"type": "integer"
}
},
"required": ["name", "age", "armor", "weapon", "strength"],
"definitions": {
"Armor": {
"title": "Armor",
"description": "An enumeration.",
"enum": ["leather", "chainmail", "plate"],
"type": "string"
},
"Weapon": {
"title": "Weapon",
"description": "An enumeration.",
"enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
"type": "string"
}
}
}"""
generator = outlines.Generator(model, JsonSchema(schema))
```
On first deploy, it will download the model and store it on disk therefore for subsequent calls it will load the model from disk.
Every function in Cerebrium is callable through an API endpoint. Code at the top most layer (ie: not in a function) is instantiated only when the container is spun up the first time so for subsequent calls, it will simply run the code defined in the function you call.
To deploy an API that creates a new character when called with a prompt you can add the following code to `main.py`:
```python
def generate(
prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
):
character = generator(
f"<s>[INST]Give me a character description. Describe {prompt}.[/INST]"
)
return character
```
## Run on the cloud
```shell
cerebrium deploy
```
You will see your application deploy, install pip packages and download the model. Once completed it will output a CURL request you can use to call your endpoint. Just remember to end
the url with the function you would like to call - in this case /generate. You should see your response returned!
================================================
FILE: docs/examples/deploy-using-modal.md
================================================
# Run Outlines using Modal
[Modal](https://modal.com/) is a serverless platform that allows you to easily run code on the cloud, including GPUs. It can come very handy for those of us who don't have a monster GPU at home and want to be able to quickly and easily provision, configure and orchestrate cloud infrastructure.
In this guide we will show you how you can use Modal to run programs written with Outlines on GPU in the cloud.
## Requirements
We recommend installing `modal` and `outlines` in a virtual environment. You can create one with:
```shell
python -m venv venv
source venv/bin/activate
```
Then install the required packages:
```shell
pip install modal outlines
```
## Build the image
First we need to define our container image. If you need to access a gated model, you will need to provide an [access token](https://huggingface.co/settings/tokens). See the `.env` call below for how to provide a HuggingFace token.
Setting a token is best done by setting an environment variable `HF_TOKEN` with your token. If you do not wish to do this, we provide a commented-out line in the code to set the token directly in the code.
```python
from modal import Image, App, gpu
import os
# This creates a modal App object. Here we set the name to "outlines-app".
# There are other optional parameters like modal secrets, schedules, etc.
# See the documentation here: https://modal.com/docs/reference/modal.App
app = App(name="outlines-app")
# Specify a language model to use.
# Another good model to use is "NousResearch/Hermes-2-Pro-Mistral-7B"
language_model = "mistral-community/Mistral-7B-v0.2"
# Please set an environment variable HF_TOKEN with your Hugging Face API token.
# The code below (the .env({...}) part) will copy the token from your local
# environment to the container.
# More info on Image here: https://modal.com/docs/reference/modal.Image
outlines_image = Image.debian_slim(python_version="3.11").pip_install(
"outlines",
"transformers",
"datasets",
"accelerate",
"sentencepiece",
).env({
# This will pull in your HF_TOKEN environment variable if you have one.
'HF_TOKEN':os.environ['HF_TOKEN']
# To set the token directly in the code, uncomment the line below and replace
# 'YOUR_TOKEN' with the HuggingFace access token.
# 'HF_TOKEN':'YOUR_TOKEN'
})
```
## Setting the container up
When running longer Modal apps, it's recommended to download your language model when the container starts, rather than when the function is called. This will cache the model for future runs.
```python
# This function imports the model from Hugging Face. The modal container
# will call this function when it starts up. This is useful for
# downloading models, setting up environment variables, etc.
def import_model():
import outlines
import transformers
outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(language_model),
transformers.AutoTokenizer.from_pretrained(language_model)
)
# This line tells the container to run the import_model function when it starts.
outlines_image = outlines_image.run_function(import_model)
```
## Define a schema
We will run the JSON-structured generation example [in the README](https://github.com/dottxt-ai/outlines?tab=readme-ov-file#efficient-json-generation-following-a-json-schema), with the following schema:
```python
# Specify a schema for the character description. In this case,
# we want to generate a character with a name, age, armor, weapon, and strength.
schema = """{
"title": "Character",
"type": "object",
"properties": {
"name": {
"title": "Name",
"maxLength": 10,
"type": "string"
},
"age": {
"title": "Age",
"type": "integer"
},
"armor": {"$ref": "#/definitions/Armor"},
"weapon": {"$ref": "#/definitions/Weapon"},
"strength": {
"title": "Strength",
"type": "integer"
}
},
"required": ["name", "age", "armor", "weapon", "strength"],
"definitions": {
"Armor": {
"title": "Armor",
"description": "An enumeration.",
"enum": ["leather", "chainmail", "plate"],
"type": "string"
},
"Weapon": {
"title": "Weapon",
"description": "An enumeration.",
"enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
"type": "string"
}
}
}"""
```
To make the inference work on Modal we need to wrap the corresponding function in a `@app.function` decorator. We pass to this decorator the image and GPU on which we want this function to run.
Let's choose an A100 with 80GB memory. Valid GPUs can be found [here](https://modal.com/docs/reference/modal.gpu).
```python
# Define a function that uses the image we chose, and specify the GPU
# and memory we want to use.
@app.function(image=outlines_image, gpu=gpu.A100(size='80GB'))
def generate(
prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
):
# Remember, this function is being executed in the container,
# so we need to import the necessary libraries here. You should
# do this with any other libraries you might need.
import outlines
import transformers
from outlines.types import JsonSchema
# Load the model into memory. The import_model function above
# should have already downloaded the model, so this call
# only loads the model into GPU memory.
outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(language_model, device_map="cuda"),
transformers.AutoTokenizer.from_pretrained(language_model)
)
# Generate a character description based on the prompt.
# We use the .json generation method -- we provide the
# - model: the model we loaded above
# - schema: the JSON schema we defined above
generator = outlines.Generator(model, JsonSchema(schema))
# Make sure you wrap your prompt in instruction tags ([INST] and [/INST])
# to indicate that the prompt is an instruction. Instruction tags can vary
# by models, so make sure to check the model's documentation.
character = generator(
f"<s>[INST]Give me a character description. Describe {prompt}.[/INST]"
)
# Print out the generated character.
print(character)
```
We then need to define a `local_entrypoint` to call our function `generate` remotely.
```python
@app.local_entrypoint()
def main(
prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
):
# We use the "generate" function defined above -- note too that we are calling
# .remote() on the function. This tells modal to run the function in our cloud
# machine. If you want to run the function locally, you can call .local() instead,
# though this will require additional setup.
generate.remote(prompt)
```
Here `@app.local_entrypoint()` decorator defines `main` as the function to start from locally when using the Modal CLI. You can save above code to `example.py` (or use [this implementation](https://github.com/dottxt-ai/outlines/blob/main/examples/modal_example.py)). Let's now see how to run the code on the cloud using the Modal CLI.
## Run on the cloud
First install the Modal client from PyPi, if you have not already:
```shell
pip install modal
```
You then need to obtain a token from Modal. Run the following command:
```shell
modal setup
```
Once that is set you can run inference on the cloud using:
```shell
modal run example.py
```
You should see the Modal app initialize, and soon after see the result of the `print` function in your terminal. That's it!
================================================
FILE: docs/examples/earnings-reports.md
================================================
# Extracting financial data from earnings reports
A common task in finance is to extract financial data from earnings reports. Earnings reports are infamously poorly formatted, as the SEC does not have requirements for producing machine-readable documents.
Earnings reports are often provided as HTML documents, which can be difficult to parse. Investors often use complicated parsing systems or manual review to extract data. Entire companies are built around automating this task.
This cookbook is a proof of concept about how we can use LLMs to extract financial data directly into CSV. Comma-separated values are well-structured and can be defined by a regular expression, which Outlines can use to guide the LLM's output.
The example is a smaller subset of a full demo found [here](https://github.com/dottxt-ai/demos/tree/main/earnings-reports). The demo contains the full set of pre-processing steps needed to convert raw HTML into a structured CSV file, and tests the results across three company's 10k reports.
## Setup
Install outlines and required dependencies:
```shell
# Later versions of torch can have difficulty with certain CUDA drivers.
# We recommend using 2.4.0 for now, but you may wish to experiment with
# other versions.
pip install outlines pandas transformers torch==2.4.0 accelerate
```
## Load the model
Choose your language model. We'll use Phi-3 mini, which is small enough to run on reasonably small machines.
```python
import outlines
import torch
import transformers
model_name = 'microsoft/Phi-3-mini-4k-instruct'
tf_model = transformers.AutoModelForCausalLM.from_pretrained(
model_name, device_map="cuda", torch_dtype=torch.bfloat16
)
tf_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = outlines.from_transformers(tf_model, tf_tokenizer)
```
## Set up the data
For brevity, we've attached the markdown version of Nvidia's 10k report. The [full demonstration](https://github.com/dottxt-ai/demos/tree/main/earnings-reports) processes the raw HTML version of the report to these markdown tables. Pages are filtered by whether they seem to contain income statements, and then compacted into the string you see below.
```python
income_statement = """
Table of ContentsNVIDIA Corporation and SubsidiariesConsolidated Statements of Income(In millions, except per share data)
| | | | | | | | | | | | | | | | | | |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| | | | Year Ended | | | | | | | | | | | | | | |
| | | | Jan 28, 2024 | | | | | | Jan 29, 2023 | | | | | | Jan 30, 2022 | | |
| Revenue | | | $ | 60,922 | | | | | $ | 26,974 | | | | | $ | 26,914 | |
| Cost of revenue | | | 16,621 | | | | | | 11,618 | | | | | | 9,439 | | |
| Gross profit | | | 44,301 | | | | | | 15,356 | | | | | | 17,475 | | |
| Operating expenses | | | | | | | | | | | | | | | | | |
| Research and development | | | 8,675 | | | | | | 7,339 | | | | | | 5,268 | | |
| Sales, general and administrative | | | 2,654 | | | | | | 2,440 | | | | | | 2,166 | | |
| Acquisition termination cost | | | | | | | | | 1,353 | | | | | | | | |
| Total operating expenses | | | 11,329 | | | | | | 11,132 | | | | | | 7,434 | | |
| Operating income | | | 32,972 | | | | | | 4,224 | | | | | | 10,041 | | |
| Interest income | | | 866 | | | | | | 267 | | | | | | 29 | | |
| Interest expense | | | (257) | | | | | | (262) | | | | | | (236) | | |
| Other, net | | | 237 | | | | | | (48) | | | | | | 107 | | |
| Other income (expense), net | | | 846 | | | | | | (43) | | | | | | (100) | | |
| Income before income tax | | | 33,818 | | | | | | 4,181 | | | | | | 9,941 | | |
| Income tax expense (benefit) | | | 4,058 | | | | | | (187) | | | | | | 189 | | |
| Net income | | | $ | 29,760 | | | | | $ | 4,368 | | | | | $ | 9,752 | |
| | | | | | | | | | | | | | | | | | |
| Net income per share: | | | | | | | | | | | | | | | | | |
| Basic | | | $ | 12\.05 | | | | | $ | 1\.76 | | | | | $ | 3\.91 | |
| Diluted | | | $ | 11\.93 | | | | | $ | 1\.74 | | | | | $ | 3\.85 | |
| | | | | | | | | | | | | | | | | | |
| Weighted average shares used in per share computation: | | | | | | | | | | | | | | | | | |
| Basic | | | 2,469 | | | | | | 2,487 | | | | | | 2,496 | | |
| Diluted | | | 2,494 | | | | | | 2,507 | | | | | | 2,535 | | |
"""
```
The markdown tables extracted from the earnings reports can vary widely in row names, column counts, data types, etc. The advantage of LLMs here is that we can define the data we want in terms of the data types, and the LLM will output the data in the desired format.
For comparison, here is how the income statement looks in the original HTML:

## Define the data we want
Outlines is often used for JSON output, but it can also be used for CSV. We know the columns we want to extract, and we know the data types of the columns. Year for example is always a four-digit number, revenue is a number with commas, and so on.
We can define a regex pattern for each column type:
```python
# Define the column type regex patterns
column_types = {
# Year is always a four-digit number
"year": r"\d{4}",
# Revenue, operating income, and net income are always numbers with commas.
# This regex permits integers that may begin with a minus sign, and may have
# commas separating the thousands, millions, etc.
"integer_comma": r"((-?\d+),?\d+|(-?\d+))",
# Number is currently not used, but it represents a number with up to two decimal places.
"number": r"(-?\d+(?:\.\d{1,2})?)",
}
```
Next, let's choose the columns we want to extract. We want
- Year, always a four-digit number
- Revenue, a number with commas
- Operating income, a number with commas
- Net income, a number with commas
```python
# Define the columns to extract, and their data types.
columns_to_extract = {
"year": "year",
"revenue": "integer_comma",
"operating_income": "integer_comma",
"net_income": "integer_comma",
}
```
You can modify `column_type_regex` to match the data types of the columns you want to extract. Adding a new financial metric to extract is as simple as adding a new key/value pair to `columns_to_extract`:
```python
columns_to_extract["diluted_earnings_per_share"] = "number"
```
Additional columns are not well tested for accuracy, so use with caution.
## Create the regex describing the data we want
```python
# Create the header line. This is the requested column names
# separated by commas, i.e. "year,revenue,..."
header = ",".join(columns_to_extract.keys())
# Create the data capture patterns. These are the regex patterns
# that will be used to capture the data in each column
data_patterns = [column_types[dtype] for dtype in columns_to_extract.values()]
data_line = ",".join(data_patterns)
# Our final regex pattern.
max_rows = 3 # We expect 3 rows of data, firms usually report 3 years of income statements
csv_regex = f"{header}(\n{data_line}){{,{max_rows}}}\n\n"
print(csv_regex)
```
which gives us
```
year,revenue,operating_income,net_income,basic_earnings_per_share(
\d{4},((-?\d+),?\d+|(-?\d+)),((-?\d+),?\d+|(-?\d+)),((-?\d+),?\d+|(-?\d+)),(-?\d+(?:\.\d{1,2})?)){,3}
```
Pretty hairy, right? Thankfully, we have a simple function to construct this regex for you. The regex defines a header line, followed by a data line that repeats for each row of data we want to extract. Passing the regex to `outlines.Generator` will produce a function that will __always__ produce a CSV string that is consistent with the regex.
## Prompting the model
Outlines does not add system or instruction tokens by default, so we need to use `transformers.AutoTokenizer` to add them for whatever model we're using.
```python
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
def add_instruction(prompt):
return tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
print(add_instruction("Howdy"))
```
```
<|user|>
Howdy<|end|>
<|assistant|>
```
Our prompt roughly describes the task we want the model to perform, and a few pieces of information it may need to know about income statements.
```python
def extract_financial_data_prompt(columns_to_extract, income_statement):
user_prompt = f"""
Extract annual financial data from this set of pages. Pages
are from a 10k filing and were chosen because they may contain
a comprehensive income statement. Note that selected pages may
be incorrectly extracted, so you should verify that you are extracting
from the comprehensive income statement and not some other financial
statement.
Create a row for each year available in the income statement with the
following columns: {', '.join(columns_to_extract.keys())}. Firms typically report the
most recent 3 years of data, but this can vary.
Each column has types: {', '.join(columns_to_extract.values())}.
# Relevant pages:
{income_statement}
# Key instructions:
1. Look ONLY at the "Consolidated Statements of Income" table
2. For operating income, look for "Income from operations" or "Operating income"
3. For net income, use the TOTAL net income figure, not amounts allocated to specific share classes
4. Use NULL for missing values
5. Operating income must be less than revenue
6. Net income must be less than operating income
7. Ignore segment breakdowns, quarterly data, or per-share amounts
# Output format:
- CSV format with headers: {','.join(columns_to_extract.keys())}
- Use NULL for missing values
- If no data are found, do not create a row.
- Enter two newline characters to terminate the CSV when no more data are found.
# Definitions:
- Revenue: Total sales of goods and services. Usually this is at the top of the
income statement.
- Operating income: Revenue minus operating expenses for the entire company. This is revenue
minus costs. Operating income is also called operating profit, EBIT, or income from
operations.
- Net income: Operating income minus taxes. This is the bottom line of the
income statement.
"""
return add_instruction(user_prompt)
```
## Running the model
Now that we have our prompt and regular expression, we can run the model.
Construct our regex extractor function.
```python
from outlines.types import Regex
csv_extractor = outlines.Generator(model, Regex(csv_regex))
```
Provide the prompt to the model and run it:
```python
csv_data = csv_extractor(
extract_financial_data_prompt(columns_to_extract, income_statement),
max_new_tokens=1024,
)
print(csv_data)
```
```
year,revenue,operating_income,net_income
2024,60922,32972,29760
2023,26974,4224,4368
2022,26914,10041,9752
```
Voila! We've extracted the financial data from the income statement, and it's correct upon inspection.
You can even load this into a `pandas` DataFrame for further analysis:
```python
import pandas as pd
from io import StringIO
df = pd.read_csv(StringIO(csv_data))
print(df)
```
```
year revenue operating_income net_income
0 2024 60922 32972 29760
1 2023 26974 4224 4368
2 2022 26914 10041 9752
```
================================================
FILE: docs/examples/extract_event_details.md
================================================
This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message.
We will extract the title, location, and start date and time from messages like the following:
```plaintext
Hello Kitty, my grandmother will be here, I think it's better to postpone
our appointment to review math lessons to next Monday at 2pm at the same
place, 3 avenue des tanneurs, one hour will be enough see you 😘
```
Let see how to extract the event details from the message with the MLX
library dedicated to Apple Silicon processor (M series).
```python
--8<-- "docs/cookbook/extract_event_details.py"
```
The output will be:
```plaintext
Today: Saturday 16 November 2024 and it's 10:55
```
and the extracted event information will be:
```json
{
"title":"Math Review",
"location":"3 avenue des tanneurs",
"start":"2024-11-22T14:00:00Z"
}
```
To find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator)
================================================
FILE: docs/examples/extract_event_details.py
================================================
from datetime import datetime
from mlx_lm import load
from pydantic import BaseModel, Field
import outlines
from outlines import Generator, Template
# Load the model
model = outlines.from_mlxlm(*load("mlx-community/Hermes-3-Llama-3.1-8B-8bit"))
# Define the event schema using Pydantic
class Event(BaseModel):
title: str = Field(description="title of the event")
location: str
start: datetime = Field(
default=None, description="date of the event if available in iso format"
)
# Load the prompt template from a string
prompt_template = Template.from_string(
"""
Today's date and time are {{ now }}
Given a user message, extract information of the event like date and time in iso format, location and title.
If the given date is relative, think step by step to find the right date.
Here is the message:
{{ message }}
"""
)
# Get the current date and time
now = datetime.now().strftime("%A %d %B %Y and it's %H:%M")
# Sample message
message = """Hello Kitty, my grandmother will be here, I think it's better to postpone our
appointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough
see you 😘 """
# Create the generator
generator = Generator(model, Event)
# Create the prompt
prompt = prompt_template(now=now, message=message)
# Extract the event information
event = generator(prompt)
# Print the current date and time
print(f"Today: {now}")
# Print the extracted event information
print(event)
================================================
FILE: docs/examples/extraction.md
================================================
# Named entity extraction
Named Entity Extraction is a fundamental problem in NLP. It involves identifying and categorizing named entities within a document: people, organization, dates, places, etc. It is usually the first step in a more complex NLP worklow. Here we will use the example of a pizza restaurant that receives orders via their website and need to identify the number and types of pizzas that are being ordered.
Getting LLMs to output the extracted entities in a structured format can be challenging. In this tutorial we will see how we can use Outlines' JSON-structured generation to extract entities from a document and return them in a valid JSON data structure 100% of the time.
As always, we start with initializing the model. We will be using a quantized version of Mistal-7B-v0.1 (we're GPU poor):
```python
import transformers
import outlines
model_name = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda"),
transformers.AutoTokenizer.from_pretrained(model_name),
)
```
And we will be using the following prompt template:
```python
from outlines import Template
take_order = Template.from_string(
"""You are the owner of a pizza parlor. Customers \
send you orders from which you need to extract:
1. The pizza that is ordered
2. The number of pizzas
# EXAMPLE
ORDER: I would like one Margherita pizza
RESULT: {"pizza": "Margherita", "number": 1}
# OUTPUT INSTRUCTIONS
Answer in valid JSON. Here are the different objects relevant for the output:
Order:
pizza (str): name of the pizza
number (int): number of pizzas
Return a valid JSON of type "Order"
# OUTPUT
ORDER: {{ order }}
RESULT: """
)
```
We now define our data model using Pydantic:
```python
from enum import Enum
from pydantic import BaseModel
class Pizza(str, Enum):
margherita = "Margherita"
pepperonni = "Pepperoni"
calzone = "Calzone"
class Order(BaseModel):
pizza: Pizza
number: int
```
We can now define our generator and call it on several incoming orders:
```python
orders = [
"Hi! I would like to order two pepperonni pizzas and would like them in 30mins.",
"Is it possible to get 12 margheritas?"
]
prompts = [take_order(order=order) for order in orders]
generator = outlines.Generator(model, Order)
results = generator(prompts)
print(results)
# ['{"pizza": "Pepperoni", "number": 2}',
# '{"pizza": "Margherita", "number": 12}']
```
There are several ways you could improve this example:
- Clients may order several types of pizzas.
- Clients may order drinks as well.
- If the pizza place has a delivery service we need to extract the client's address and phone number
- Clients may specify the time for which they want the pizza. We could then check against a queuing system and reply to them with the estimated delivery time.
How would you change the Pydantic model to account for these use cases?
================================================
FILE: docs/examples/index.md
================================================
# Examples
This part of the documentation provides a few cookbooks that you can browse to get acquainted with the library and get some inspiration about what you could do with structured generation. Remember that you can easily change the model that is being used!
- [Classification](classification.md): Classify customer requests.
- [Named Entity Extraction](extraction.md): Extract information from pizza orders.
- [Dating Profiles](dating_profiles.md): Build dating profiles from descriptions using prompt templating and JSON-structured generation.
- [Chain Of Density](chain_of_density.md): Summarize documents using chain of density prompting and JSON-structured generation.
- [Playing Chess](models_playing_chess.md): Make Phi-3 Mini play chess against itself using regex-structured generation.
- [SimToM](simtom.md): Improve LLMs' Theory of Mind capabilities with perspective-taking prompting and JSON-structured generation.
- [Q&A with Citations](qa-with-citations.md): Answer questions and provide citations using JSON-structured generation.
- [Knowledge Graph Generation](knowledge_graph_extraction.md): Generate a Knowledge Graph from unstructured text using JSON-structured generation.
- [Structured Generation Workflow](structured_generation_workflow.md):
- [Chain Of Thought (CoT)](chain_of_thought.md): Generate a series of intermediate reasoning steps using regex-structured generation.
- [ReAct Agent](react_agent.md): Build an agent with open weights models using regex-structured generation.
- [Structured Generation from PDFs](read-pdfs.md): Use Outlines with vision-language models to read PDFs and produce structured output.
- [Earnings reports to CSV](earnings-reports.md): Extract data from earnings reports to CSV using regex-structured generation.
- [Receipt Digitization](receipt-digitization.md): Extract information from a picture of a receipt using structured generation.
- [Extract Events Details](extract_event_details.md):
Run Outlines on the cloud:
- [BentoML](deploy-using-bentoml.md)
- [Cerebrium](deploy-using-cerebrium.md)
- [Modal](deploy-using-modal.md)
================================================
FILE: docs/examples/knowledge_graph_extraction.md
================================================
# Knowledge Graph Extraction
In this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to extract a knowledge graph from unstructured text.
We will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:
```shell
pip install llama-cpp-python
```
To create an outlines `LlamaCpp` model, you first need to create a `Llama` object from the `llama-cpp-python` library. Then you can create the outlines model by calling `models.from_llamacpp` with the `Llama` object instance as argument. To create the `Llama` object, you need to provide the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames or glob pattern (it will automatically download the weights from the hub):
```python
import llama_cpp
import outlines
llm = llama_cpp.Llama(
"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B"
),
n_gpu_layers=-1,
flash_attn=True,
n_ctx=8192,
verbose=False
)
model = outlines.from_llamacpp(llm)
```
??? note "(Optional) Store the model weights in a custom folder"
By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):
```shell
wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
```
We initialize the model:
```python
from llama_cpp import Llama
llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...)
```
## Knowledge Graph Extraction
We first need to define our Pydantic class for each node and each edge of the knowledge graph:
```python
from pydantic import BaseModel, Field
class Node(BaseModel):
"""Node of the Knowledge Graph"""
id: int = Field(..., description="Unique identifier of the node")
label: str = Field(..., description="Label of the node")
property: str = Field(..., description="Property of the node")
class Edge(BaseModel):
"""Edge of the Knowledge Graph"""
source: int = Field(..., description="Unique source of the edge")
target: int = Field(..., description="Unique target of the edge")
label: str = Field(..., description="Label of the edge")
property: str = Field(..., description="Property of the edge")
```
We then define the Pydantic class for the knowledge graph and get its JSON schema:
```python
from typing import List
class KnowledgeGraph(BaseModel):
"""Generated Knowledge Graph"""
nodes: List[Node] = Field(..., description="List of nodes of the knowledge graph")
edges: List[Edge] = Field(..., description="List of edges of the knowledge graph")
schema = KnowledgeGraph.model_json_schema()
```
We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):
```python
from outlines import Template
generate_hermes_prompt = Template.from_string(
"""
<|im_start|>system
You are a world class AI model who answers questions in JSON
Here's the json schema you must adhere to:
<schema>
{{ schema }}
</schema>
<|im_end|>
<|im_start|>user
{{ user_prompt }}
<|im_end|>
<|im_start|>assistant
<schema>
"""
)
```
For a given user prompt, for example:
```python
user_prompt = "Alice loves Bob and she hates Charlie."
```
We can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with the Hermes prompt:
```python
from outlines import Generator
generator = Generator(model, KnowledgeGraph)
prompt = generate_hermes_prompt(schema=schema, user_prompt=user_prompt)
response = generator(prompt, max_tokens=1024, temperature=0, seed=42)
```
We obtain the nodes and edges of the knowledge graph:
```python
print(response)
# {"nodes":[{"id":1,"label":"Alice","property":"loves,hates"},
# {"id":2,"label":"Bob","property":"loved_by"},
# {"id":3,"label":"Charlie","property":"hated_by"}],
# "edges":[{"source":1,"target":2,"label":"loves","property":"love"},
# {"source":1,"target":3,"label":"hates","property":"hate"}]}
```
## (Optional) Visualizing the Knowledge Graph
We can use the [Graphviz library](https://graphviz.readthedocs.io/en/stable/) to visualize the generated knowledge graph. For detailed installation instructions, see [here](https://graphviz.readthedocs.io/en/stable/#installation).
```python
from graphviz import Digraph
dot = Digraph()
for node in response["nodes"]:
dot.node(str(node["id"]), node["label"], shape='circle', width='1', height='1')
for edge in response["edges"]:
dot.edge(str(edge["source"]), str(edge["target"]), label=edge["label"])
dot.render('knowledge-graph.gv', view=True)
```

This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).
================================================
FILE: docs/examples/models_playing_chess.md
================================================
# Large language models playing chess
In this example we will make a Phi-3 model play chess against itself. On its own the model easily generates invalid moves, so we will give it a little help. At each step we will generate a regex that only matches valid move, and use it to help the model only generating valid moves.
## The chessboard
The game will be played on a standard checkboard. We will use the `chess` [library](https://github.com/niklasf/python-chess) to track the opponents' moves, and check that the moves are valid.
```python
%pip install outlines -q
%pip install chess -q
%pip install transformers accelerate einops -q
import chess
board = chess.Board("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
```
## The opponents
Phi-3 will be playing against itself:
```python
import transformers
import outlines
model_name = "microsoft/Phi-3-mini-4k-instruct"
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(model_name),
transformers.AutoTokenizer.from_pretrained(model_name),
)
```
## A little help for the language model
To make sure Phi-3 generates valid chess moves we will use Outline's regex-structured generation. We define a function that takes the current state of the board and returns a regex that matches all possible legal moves:
```python
import re
from outlines.types.dsl import either, String
def legal_moves_regex(board):
"""Build a regex that only matches valid moves."""
legal_moves = list(board.legal_moves)
legal_modes_str = [board.san(move) for move in legal_moves]
legal_modes_str = [re.sub(r"[+#]", "", move) for move in legal_modes_str]
regex_pattern = either(*[String(move) for move in legal_modes_str])
return regex_pattern
```
## Prompting the language model
The prompt corresponds to the current state of the board, so we start with:
```python
prompt = "Let's play Chess. Moves: "
```
We update the prompt at each step so it reflects the state of the board after the previous move.
## Let's play
```python
board_state = " "
turn_number = 0
while not board.is_game_over():
regex_pattern = legal_moves_regex(board)
structured = model(prompt + board_state, regex_pattern)
move = board.parse_san(structured)
if turn_number % 2 == 0 : # It's White's turn
board_state += board.san(move) + " "
else:
board_state += board.san(move) + " " + str(turn_number) + "."
turn_number += 1
board.push(move)
print(board_state)
```
Interestingly enough, Phi-3 hates capturing.
```pgn
e4 e5 1.Nf3 Ne7 3.b4 Nf5 5.Nc3 Ne7 7.Bb5 a6 9.Na4 b6 11.c3 Nec6 13.c4 a5 15.d4 Qg5 17.Nd2 Bb7 19.dxe5
```
*This example was originally authored by [@903124S](https://x.com/903124S) in [this gist](https://gist.github.com/903124/cfbefa24da95e2316e0d5e8ef8ed360d).*
================================================
FILE: docs/examples/prompt_templates/chain_of_density.txt
================================================
Article: {{ article }}
You will generate increasingly concise, entity-dense summaries of the above Article.
Repeat the following 2 steps 5 times.
Step 1. Identify 1-3 informative Entities ("; " delimited) from the Article which are missing from the previously generated summary.
Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities.
A Missing Entity is:
- Relevant: to the main story.
- Specific: descriptive yet concise (5 words or fewer).
- Novel: not in the previous summary.
- Faithful: present in the Article.
- Anywhere: located anywhere in the Article.
Guidelines:
- The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., "this article discusses") to reach ~80 words.
- Make every word count: rewrite the previous summary to improve flow and make space for additional entities.
- Make space with fusion, compression, and removal of uninformative phrases like "the article discusses".
- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article.
- Missing entities can appear anywhere in the new summary.
- Never drop entities from the previous summary. If space cannot be made, add fewer new entities.
Remember, use the exact same number of words for each summary.
Answer in JSON. The JSON should be a a dictionary with key "summaries" that contains a list (length 5) of dictionaries whose keys are "Missing_Entities" and "Denser_Summary".
================================================
FILE: docs/examples/prompt_templates/classification.txt
================================================
You are an experienced customer success manager.
Given a request from a client, you need to determine when the
request is urgent using the label "URGENT" or when it can wait
a little with the label "STANDARD".
# Examples
Request: "How are you?"
Label: STANDARD
Request: "I need this fixed immediately!"
Label: URGENT
# TASK
Request: {{ request }}
Label:
================================================
FILE: docs/examples/prompt_templates/react_agent.txt
================================================
<|im_start|>system
You are a world class AI model who answers questions in JSON with correct Pydantic schema.
Here's the json schema you must adhere to:
<schema>
{{ schema }}
</schema>
Today is {{ today }}
You run in a loop of Scratchpad, Thought, Action, Action Input, PAUSE, Observation.
At the end of the loop you output a Final Answer.
Use Scratchpad to store the information from the Observation useful to answer the question
Use Thought to describe your thoughts about the question you have been asked and reflect carefully about the Observation if it exists.
Use Action to run one of the actions available to you.
Use Action Input to input the arguments of the selected action - then return PAUSE.
Observation will be the result of running those actions.
Your available actions are:
calculate:
e.g. calulate: 4**2 / 3
Runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary
wikipedia:
e.g. wikipedia: Django
Returns a summary from searching Wikipedia
DO NOT TRY TO GUESS THE ANSWER. Begin!
<|im_end|>
<|im_start|>user
{{ question }}
<|im_end|>
<|im_start|>assistant
================================================
FILE: docs/examples/prompt_templates/simtom_prospective_taking.txt
================================================
<s>[INST] The following is a sequence of events about some characters, that takes place in multiple locations.
Your job is to output only the events that the specified character, {{character}}, knows about.
Here are a few rules:
1. A character knows about all events that they do.
2. If a character is in a certain room/location, that character knows about all other events that happens in the room. This includes other characters leaving or exiting the location, the locations of objects in that location, and whether somebody moves an object to another place.
3. If a character leaves a location, and is NOT in that location, they no longer know about any events that happen within that location. However, they can re-enter the location.
Story: {{story}}
What events does {{character}} know about? Only output the events according to the above rules, do not provide an explanation. [/INST]
================================================
FILE: docs/examples/prompt_templates/simtom_simulation.txt
================================================
<s>[INST] {% for event in events %}
{{event}}
{% endfor %}
You are {{name}}.
Based on the above information, answer the following question:
{{question}}
You must choose one of the above choices, do not say there is not enough information. Answer with a single word, do not output anything else. [/INST]
================================================
FILE: docs/examples/qa-with-citations.md
================================================
# Generate Synthetic Data and Q&A with Citations
This tutorial is adapted from the [instructor-ollama notebook](https://github.com/alonsosilvaallende/Hermes-Function-Calling/blob/main/examples/instructor_ollama.ipynb). We start with a simple example to generate synthetic data and then we approach the problem of question answering by providing citations.
We will use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:
```shell
pip install llama-cpp-python
```
We download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern):
```python
import llama_cpp
import outlines
llm = llama_cpp.Llama(
"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B"
),
n_gpu_layers=-1,
flash_attn=True,
n_ctx=8192,
verbose=False
)
model = outlines.from_llamacpp(llm)
```
??? note "(Optional) Store the model weights in a custom folder"
By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):
```shell
wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
```
We initialize the model:
```python
from llama_cpp import Llama
llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...)
```
## Generate Synthetic Data
We first need to define our Pydantic class for a user:
```python
from pydantic import BaseModel, Field
class UserDetail(BaseModel):
id: int = Field(..., description="Unique identifier") # so the model keeps track of the number of users
first_name: str
last_name: str
age: int
```
We then define a Pydantic class for a list of users:
```python
from typing import List
class Users(BaseModel):
users: List[UserDetail]
```
We can use a `outlines.Generator` by passing this Pydantic class we just defined, and call the generator:
```python
import json
generator = outlines.Generator(model, Users)
response = generator("Create 5 fake users", max_tokens=1024, temperature=0, seed=42)
response = json.loads(response)
print(response['users'])
# [{'id': 1, 'first_name': 'John', 'last_name': 'Doe', 'age': 25},
# {'id': 2, 'first_name': 'Jane', 'last_name': 'Doe', 'age': 30},
# {'id': 3, 'first_name': 'Bob', 'last_name': 'Smith', 'age': 40},
# {'id': 4, 'first_name': 'Alice', 'last_name': 'Smith', 'age': 35},
# {'id': 5, 'first_name': 'John', 'last_name': 'Smith', 'age': 20}]
```
```python
for user in response['users']:
print(user['first_name'])
print(user['last_name'])
print(user['age'])
print("#####")
# John
# Doe
# 25
# #####
# Jane
# Doe
# 30
# #####
# Bob
# Smith
# 40
# #####
# Alice
# Smith
# 35
# #####
# John
# Smith
# 20
# #####
```
## QA with Citations
We first need to define our Pydantic class for QA with citations:
```python
from typing import List
from pydantic import BaseModel
class QuestionAnswer(BaseModel):
question: str
answer: str
citations: List[str]
schema = QuestionAnswer.model_json_schema()
```
We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs):
```python
from outlines import Template
hermes_prompt = Template.from_string(
"""
<|im_start|>system
You are a world class AI model who answers questions in JSON with correct and exact citations
extracted from the `Context`.
Here's the json schema you must adhere to:
<schema>
{{ schema }}
</schema>
<|im_end|>
<|im_start|>user
`Context`:
{{ context }}
`Question`:
{{ question }}
<|im_end|>
<|im_start|>assistant
"""
)
```
We can use `outlines.Generator` by passing the Pydantic class we previously defined, and call the generator with Hermes prompt:
```python
question = "What did the author do during college?"
context = """
My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.
I went to an arts high school but in university I studied Computational Mathematics and physics.
As part of coop I worked at many companies including Stitchfix, Facebook.
I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
"""
generator = outlines.Generator(model, QuestionAnswer)
prompt = hermes_prompt(question=question, context=context, schema=schema)
response = generator(prompt, max_tokens=1024, temperature=0, seed=42)
print(response)
# {"question": "What did the author do during college?", "answer": "The author studied Computational Mathematics and physics in university and was also involved in starting the Data Science club, serving as its president for 2 years.", "citations": ["I went to an arts high school but in university I studied Computational Mathematics and physics.", "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years."]}
```
We can do the same for a list of question-context pairs:
```python
question1 = "Where was John born?"
context1 = """
John Doe is a software engineer who was born in New York, USA.
He studied Computer Science at the Massachusetts Institute of Technology.
During his studies, he interned at Google and Microsoft.
He also founded the Artificial Intelligence club at his university and served as its president for three years.
"""
question2 = "What did Emily study in university?"
context2 = """
Emily Smith is a data scientist from London, England.
She attended the University of Cambridge where she studied Statistics and Machine Learning.
She interned at IBM and Amazon during her summer breaks.
Emily was also the head of the Women in Tech society at her university.
"""
question3 = "Which companies did Robert intern at?"
context3 = """
Robert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert.
He studied Information Systems at the University of Melbourne.
Robert interned at several cybersecurity firms including NortonLifeLock and McAfee.
He was also the leader of the Cybersecurity club at his university.
"""
question4 = "What club did Alice start at her university?"
context4 = """
Alice Williams, a native of Dublin, Ireland, is a successful web developer.
She studied Software Engineering at Trinity College Dublin.
Alice interned at several tech companies including Shopify and Squarespace.
She started the Web Development club at her university and was its president for two years.
"""
question5 = "What did Michael study in high school?"
context5 = """
Michael Brown is a game developer from Tokyo, Japan.
He attended a specialized high school where he studied Game Design.
He later attended the University of Tokyo where he studied Computer Science.
Michael interned at Sony and Nintendo during his university years.
He also started the Game Developers club at his university.
"""
for question, context in [
(question1, context1),
(question2, context2),
(question3, context3),
(question4, context4),
(question5, context5),
]:
prompt = hermes_prompt(question=question, context=context, schema=schema)
generator = outlines.Generator(model, QuestionAnswer)
response = generator(prompt, max_tokens=1024, temperature=0, seed=42)
response = json.loads(response)
print(question)
print(response['answer'])
print(response['citations'])
print("\n\n")
# 'Where was John born?'
# 'John Doe was born in New York, USA.'
# ['John Doe is a software engineer who was born in New York, USA.']
#
#
# 'What did Emily study in university?'
# 'Emily studied Statistics and Machine Learning in university.'
# ['She attended the University of Cambridge where she studied Statistics and Machine Learning.']
#
#
# 'Which companies did Robert intern at?'
# 'Robert interned at NortonLifeLock and McAfee.'
# ['Robert Johnson, originally from Sydney, Australia, is a renowned cybersecurity expert. He interned at several cybersecurity firms including NortonLifeLock and McAfee.']
#
#
# 'What club did Alice start at her university?'
# 'Alice started the Web Development club at her university.'
# ['Alice Williams, a native of Dublin, Ireland, is a successful web developer. She started the Web Development club at her university and was its president for two years.']
#
#
# 'What did Michael study in high school?'
# 'Michael studied Game Design in high school.'
# ['Michael Brown is a game developer from Tokyo, Japan. He attended a specialized high school where he studied Game Design.']
```
This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).
================================================
FILE: docs/examples/react_agent.md
================================================
# ReAct Agent
This example shows how to use [outlines](https://dottxt-ai.github.io/outlines/) to build your own agent with open weights local models and structured outputs. It is inspired by the blog post [A simple Python implementation of the ReAct pattern for LLMs](https://til.simonwillison.net/llms/python-react-pattern) by [Simon Willison](https://simonwillison.net/).
The ReAct pattern (for Reason+Act) is described in the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629). It's a pattern where you implement additional actions that an LLM can take - searching Wikipedia or running calculations for example - and then teach it how to request the execution of those actions, and then feed their results back into the LLM.
Additionally, we give the LLM the possibility of using a scratchpad described in the paper [Show Your Work: Scratchpads for Intermediate Computation with Language Models](https://arxiv.org/abs/2112.00114) which improves the ability of LLMs to perform multi-step computations.
We use [llama.cpp](https://github.com/ggerganov/llama.cpp) using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library. Outlines supports llama-cpp-python, but we need to install it ourselves:
```shell
pip install llama-cpp-python
```
We download the model weights by passing the name of the repository on the HuggingFace Hub, and the filenames (or glob pattern):
```python
import llama_cpp
import outlines
llm = llama_cpp.Llama(
"NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B"
),
n_gpu_layers=-1,
flash_attn=True,
n_ctx=8192,
verbose=False
)
model = outlines.from_llamacpp(llm)
```
??? note "(Optional) Store the model weights in a custom folder"
By default the model weights are downloaded to the hub cache but if we want so store the weights in a custom folder, we pull a quantized GGUF model [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF) by [NousResearch](https://nousresearch.com/) from [HuggingFace](https://huggingface.co/):
```shell
wget https://hf.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
```
We initialize the model:
```python
from llama_cpp import Llama
llm = Llama("/path/to/model/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ...)
```
## Build a ReAct agent
In this example, we use two tools:
- wikipedia: \<search term\> - search Wikipedia and returns the snippet of the first result
- calculate: \<expression\> - evaluate an expression using Python's eval() function
```python
import httpx
def wikipedia(q):
return httpx.get("https://en.wikipedia.org/w/api.php", params={
"action": "query",
"list": "search",
"srsearch": q,
"format": "json"
}).json()["query"]["search"][0]["snippet"]
def calculate(numexp):
return eval(numexp)
```
We define the logic of the agent through a Pydantic class. First, we want the LLM to decide only between the two previously defined tools:
```python
from enum import Enum
class Action(str, Enum):
wikipedia = "wikipedia"
calculate = "calculate"
```
Our agent will loop through Thought and Action. We explicitly give the Action Input field so it doesn't forget to add the arguments of the Action. We also add a scratchpad (optional).
```python
from pydantic import BaseModel, Field
class Reason_and_Act(BaseModel):
Scratchpad: str = Field(..., description="Information from the Observation useful to answer the question")
Thought: str = Field(..., description="It describes your thoughts about the question you have been asked")
Action: Action
Action_Input: str = Field(..., description="The arguments of the Action.")
```
Our agent will reach a Final Answer. We also add a scratchpad (optional).
```python
class Final_Answer(BaseModel):
Scratchpad: str = Field(..., description="Information from the Observation useful to answer the question")
Final_Answer: str = Field(..., description="Answer to the question grounded on the Observation")
```
Our agent will decide when it has reached a Final Answer and therefore to stop the loop of Thought and Action.
```python
from typing import Union
class Decision(BaseModel):
Decision: Union[Reason_and_Act, Final_Answer]
json_schema = Decision.model_json_schema()
```
We then need to adapt our prompt to the [Hermes prompt format for JSON schema](https://github.com/NousResearch/Hermes-Function-Calling?tab=readme-ov-file#prompt-format-for-json-mode--structured-outputs) and explain the agent logic. We can load a template from a file for that:
```python
from outlines import Template
hermes_prompt = Template.from_file("prompt_templates/react_agent.txt")
```
We define a ChatBot class
```python
class ChatBot:
def __init__(self, prompt=""):
self.prompt = prompt
def __call__(self, user_prompt):
self.prompt += user_prompt
result = self.execute()
return result
def execute(self):
generator = outlines.Generator(model, Decision)
result = generator(self.prompt, max_tokens=1024, temperature=0, seed=42)
return result
```
We define a query function:
```python
import json
def query(question, max_turns=5):
i = 0
next_prompt = (
"\n<|im_start|>user\n" + question + "<|im_end|>"
"\n<|im_start|>assistant\n"
)
previous_actions = []
while i < max_turns:
i += 1
prompt = generate_hermes_prompt(
question=question,
schema=Decision.model_json_schema(),
today=datetime.datetime.today().strftime('%Y-%m-%d')
)
bot = ChatBot(prompt=prompt)
result = bot(next_prompt)
json_result = json.loads(result)['Decision']
if "Final_Answer" not in list(json_result.keys()):
scratchpad = json_result['Scratchpad'] if i == 0 else ""
thought = json_result['Thought']
action = json_result['Action']
action_input = json_result['Action_Input']
print(f"\x1b[34m Scratchpad: {scratchpad} \x1b[0m")
print(f"\x1b[34m Thought: {thought} \x1b[0m")
print(f"\x1b[36m -- running {action}: {str(action_input)}\x1b[0m")
if action + ": " + str(action_input) in previous_actions:
observation = "You already run that action. **TRY A DIFFERENT ACTION INPUT.**"
else:
if action=="calculate":
try:
observation = eval(str(action_input))
except Exception as e:
observation = f"{e}"
elif action=="wikipedia":
try:
observation = wikipedia(str(action_input))
except Exception as e:
observation = f"{e}"
print()
print(f"\x1b[33m Observation: {observation} \x1b[0m")
print()
previous_actions.append(action + ": " + str(action_input))
next_prompt += (
"\nScratchpad: " + scratchpad +
"\nThought: " + thought +
"\nAction: " + action +
"\nAction Input: " + action_input +
"\nObservation: " + str(observation)
)
else:
scratchpad = json_result["Scratchpad"]
final_answer = json_result["Final_Answer"]
print(f"\x1b[34m Scratchpad: {scratchpad} \x1b[0m")
print(f"\x1b[34m Final Answer: {final_answer} \x1b[0m")
return final_answer
print(f"\nFinal Answer: I am sorry, but I am unable to answer your question. Please provide more information or a different question.")
return "No answer found"
```
We can now test our ReAct agent:
```python
print(query("What's 2 to the power of 10?"))
# Scratchpad:
# Thought: I need to perform a mathematical calculation to find the result of 2 to the power of 10.
# -- running calculate: 2**10
#
# Observation: 1024
#
# Scratchpad: 2 to the power of 10 is 1024.
# Final Answer: 2 to the power of 10 is 1024.
# 2 to the power of 10 is 1024.
```
```python
print(query("What does England share borders with?"))
# Scratchpad:
# Thought: To answer this question, I will use the 'wikipedia' action to gather information about England's geographical location and its borders.
# -- running wikipedia: England borders
#
# Observation: Anglo-Scottish <span class="searchmatch">border</span> (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal <span class="searchmatch">border</span> of the United Kingdom separating Scotland and <span class="searchmatch">England</span> which runs for
#
# Scratchpad: Anglo-Scottish border (Scottish Gaelic: Crìochan Anglo-Albannach) is an internal border of the United Kingdom separating Scotland and England which runs for
# Final Answer: England shares a border with Scotland.
# England shares a border with Scotland.
```
As mentioned in Simon's blog post, this is not a very robust implementation at all and there's a ton of room for improvement. But it is lovely how simple it is with a few lines of Python to make these extra capabilities available to the LLM. And now you can run it locally with an open weights LLM.
This example was originally contributed by [Alonso Silva](https://github.com/alonsosilvaallende).
================================================
FILE: docs/examples/read-pdfs.md
================================================
# PDF to structured output with vision language models
A common task with language models is to ask language models questions about a PDF file.
Typically, the output is unstructured text, i.e. "talking" to your PDF.
In some cases, you may wish to extract structured information from the PDF, like tables, lists, citations, etc.
PDFs are difficult to machine read. However, you can simply convert the PDF to images, and then use a vision language model to extract structured information from the images.
This cookbook demonstrates how to
1. Convert a PDF to a list of images
2. Use a vision language model to extract structured information from the images
## Dependencies
You'll need to install these dependencies:
```shell
pip install outlines pillow transformers torch==2.4.0 pdf2image
# Optional, but makes the output look nicer
pip install rich
```
## Import the necessary libraries
```python
from PIL import Image
import outlines
import torch
from transformers import AutoProcessor
from pydantic import BaseModel
from typing import List, Optional
from pdf2image import convert_from_path
import os
from rich import print
import requests
```
## Choose a model
We've tested this example with [Pixtral 12b](https://huggingface.co/mistral-community/pixtral-12b) and [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct).
To use Pixtral:
```python
from transformers import LlavaForConditionalGeneration, LlavaProcessor
model_name="mistral-community/pixtral-12b"
model_class=LlavaForConditionalGeneration
processor_class = LlavaProcessor
```
To use Qwen-2-VL:
```python
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
model_name = "Qwen/Qwen2-VL-7B-Instruct"
model_class = Qwen2VLForConditionalGeneration
processor_class = AutoProcessor
```
You can load your model into memory with:
```python
# This loads the model into memory. On your first run,
# it will have to download the model, which might take a while.
model_kwargs={"device_map": "auto", "torch_dtype": torch.bfloat16}
processor_kwargs={"device_map": "cpu"}
tf_model = model_class.from_pretrained(model_name, **model_kwargs)
tf_processor = processor_class.from_pretrained(model_name, **processor_kwargs)
model = outlines.from_transformers(tf_model, tf_processor)
```
## Convert the PDF to images
We'll use the `pdf2image` library to convert each page of the PDF to an image.
`convert_pdf_to_images` is a convenience function that converts each page of the PDF to an image, and optionally saves the images to disk when `output_dir` is provided.
Note: the `dpi` argument is important. It controls the resolution of the images. High DPI images are higher quality and may yield better results,
but they are also larger, slower to process, and require more memory.
```python
from pdf2image import convert_from_path
from PIL import Image
import os
from typing import List, Optional
def convert_pdf_to_images(
pdf_path: str,
output_dir: Optional[str] = None,
dpi: int = 120,
fmt: str = 'PNG'
) -> List[Image.Image]:
"""
Convert a PDF file to a list of PIL Image objects.
Args:
pdf_path: Path to the PDF file
output_dir: Optional directory to save the images
dpi: Resolution for the conversion. High DPI is high quality, but also slow and memory intensive.
fmt: Output format (PNG recommended for quality)
Returns:
List of PIL Image objects
"""
# Convert PDF to list of images
images = convert_from_path(
pdf_path,
dpi=dpi,
fmt=fmt
)
# Optionally save images
if output_dir:
os.makedirs(output_dir, exist_ok=True)
for i, image in enumerate(images):
image.save(os.path.join(output_dir, f'page_{i+1}.{fmt.lower()}'))
return images
```
We're going to use the [Louf & Willard paper](https://arxiv.org/pdf/2307.09702) that described the method that Outlines uses for structured generation.
To download the PDF, run:
```python
# Download the PDF file
pdf_url = "https://arxiv.org/pdf/2307.09702"
response = requests.get(pdf_url)
# Save the PDF locally
with open("louf-willard.pdf", "wb") as f:
f.write(response.content)
```
Now, we can convert the PDF to a list of images:
```python
# Load the pdf
images = convert_pdf_to_images(
"louf-willard.pdf",
dpi=120,
output_dir="output_images"
)
```
## Extract structured information from the images
The structured output you can extract is exactly the same as everywhere else in Outlines -- you can use regular expressions, JSON schemas, selecting from a list of options, etc.
### Extracting data into JSON
Suppose you wished to go through each page of the PDF, and extract the page description, key takeaways, and page number.
You can do this by defining a JSON schema, and then using `outlines.Generator` to extract the data.
First, define the structure you want to extract:
```python
class PageSummary(BaseModel):
description: str
key_takeaways: List[str]
page_number: int
```
Second, we need to set up the prompt. Adding special tokens can be tricky, so we use the transformers processor to apply the special tokens for us. To do so, we specify a list of messages, where each message is a dictionary with a `role` and `content` key.
Images are denoted with `type: "image"`, and text is denoted with `type: "text"`.
```python
messages = [
{
"role": "user",
"content": [
# The text you're passing to the model --
# this is where you do your standard prompting.
{"type": "text", "text": f"""
Describe the page in a way that is easy for a PhD student to understand.
Return the information in the following JSON schema:
{PageSummary.model_json_schema()}
Here is the page:
"""
},
# This a placeholder, the actual image is passed in when
# we call the generator function down below.
{"type": "image", "image": ""},
],
}
]
# Convert the messages to the final prompt
prompt = tf_processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
```
Now we iterate through each image, and extract the structured information:
```python
# Page summarizer function
page_summary_generator = outlines.Generator(model, PageSummary)
for image in images:
result = page_summary_generator({"text": prompt, "images": image})
print(result)
```
### Regular expressions to extract the arxiv paper identifier
The [arXiv paper identifier](https://info.arxiv.org/help/arxiv_identifier.html) is a unique identifier for each paper. These identifiers have the format `arXiv:YYMM.NNNNN` (five end digits) or `arXiv:YYMM.NNNN` (four end digits). arXiv identifiers are typically watermarked on papers uploaded to arXiv.
arXiv identifiers are optionally followed by a version number, i.e. `arXiv:YYMM.NNNNNvX`.
We can use a regular expression to define this patter:
```python
from outlines.types import Regex
paper_regex = Regex(r'arXiv:\d{2}[01]\d\.\d{4,5}(v\d)?')
```
We can build an extractor function from the regex:
```python
id_extractor = outlines.Generator(model, paper_regex)
```
Now, we can extract the arxiv paper identifier from the first image:
```python
arxiv_instruction = tf_processor.apply_chat_template(
[
{
"role": "user",
"content": [
{"type": "text", "text": f"""
Extract the arxiv paper identifier from the page.
Here is the page:
"""},
{"type": "image", "image": ""},
],
}
],
tokenize=False,
add_generation_prompt=True
)
# Extract the arxiv paper identifier
paper_id = id_extractor({"text": arxiv_instruction, "images": images[0]})
```
As of the time of this writing, the arxiv paper identifier is
```
arXiv:2307.09702v4
```
Your version number may be different, but the part before `vX` should match.
### Categorize the paper into one of several categories
`outlines.Generator` also allows the model to select one of several options by providing a Literal type hint with the categories.
Suppose we wanted to categorize the paper into being about "language models", "cell biology", or "other". We would then define the output type as `Literal["llms", "cell biology", "other"]`.
Let's define a few categories we might be interested in:
```python
categories = [
"llms",
"cell biology",
"other"
]
```
Now we can construct the prompt:
```python
categorization_instruction = tf_processor.apply_chat_template(
[
{
"role": "user",
"content": [
{"type": "text", "text": f"""
Please choose one of the following categories
that best describes the paper.
{categories}
Here is the paper:
"""},
{"type": "image", "image": ""},
],
}
],
tokenize=False,
add_generation_prompt=True
)
```
Now we can show the model the first page and extract the category:
```python
from typing import Literal
# Build the choice extractor
categorizer = outlines.Generator(model, Literal["llms", "cell biology", "other"])
# Categorize the paper
category = categorizer({"text": categorization_instruction, "images": images[0]})
print(category)
```
Which should return:
```
llms
```
## Additional notes
You can provide multiple images to the model by
1. Adding additional image messages
2. Providing a list of images to the generator
For example, to have two images, you can do:
```python
two_image_prompt = tf_processor.apply_chat_template(
[
{
"role": "user",
"content": [
{"type": "text", "text": "are both of these images of hot dogs?"},
# Tell the model there are two images
{"type": "image", "image": ""},
{"type": "image", "image": ""},
],
}
],
tokenize=False,
add_generation_prompt=True
)
# Pass two images to the model
generator = outlines.Generator(model, Literal["hot dog", "not hot dog"])
result = generator({"text": two_image_prompt, "images": [images[0], images[1]]})
print(result)
```
Using the first to pages of the paper (they are not images of hot dogs), we should get
```
not hot dog
```
================================================
FILE: docs/examples/receipt-digitization.md
================================================
# Receipt Data Extraction with VLMs
## Setup
You'll need to install the dependencies:
```shell
pip install outlines torch==2.4.0 transformers accelerate pillow rich
```
## Import libraries
Load all the necessary libraries:
```python
# LLM stuff
import outlines
import torch
from transformers import AutoProcessor
from pydantic import BaseModel, Field
from typing import Literal, Optional, List
# Image stuff
from PIL import Image
import requests
# Rich for pretty printing
from rich import print
```
## Choose a model
This example has been tested with `mistral-community/pixtral-12b` ([HF link](https://huggingface.co/mistral-community/pixtral-12b)) and `Qwen/Qwen2-VL-7B-Instruct` ([HF link](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)).
We recommend Qwen-2-VL as we have found it to be more accurate than Pixtral.
If you want to use Qwen-2-VL, you can do the following:
```python
# To use Qwen-2-VL:
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
model_name = "Qwen/Qwen2-VL-7B-Instruct"
model_class = Qwen2VLForConditionalGeneration
processor_class = AutoProcessor
```
If you want to use Pixtral, you can do the following:
```python
# To use Pixtral:
from transformers import LlavaForConditionalGeneration, LlavaProcessor
model_name="mistral-community/pixtral-12b"
model_class=LlavaForConditionalGeneration
processor_class = LlavaProcessor
```
## Load the model
Load the model into memory:
```python
model_kwargs={"device_map": "auto", "torch_dtype": torch.bfloat16}
processor_kwargs={"device_map": "cuda"}
tf_model = model_class.from_pretrained(model_name, **model_kwargs)
tf_processor = processor_class.from_pretrained(model_name, **processor_kwargs)
model = outlines.from_transformers(tf_model, tf_processor)
```
## Image processing
Images can be quite large. In GPU-poor environments, you may need to resize the image to a smaller size.
Here's a helper function to do that:
```python
def load_and_resize_image(image_path, max_size=1024):
"""
Load and resize an image while maintaining aspect ratio
Args:
image_path: Path to the image file
max_size: Maximum dimension (width or height) of the output image
Returns:
PIL Image: Resized image
"""
image = Image.open(image_path)
# Get current dimensions
width, height = image.size
# Calculate scaling factor
scale = min(max_size / width, max_size / height)
# Only resize if image is larger than max_size
if scale < 1:
new_width = int(width * scale)
new_height = int(height * scale)
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
return image
```
You can change the resolution of the image by changing the `max_size` argument. Small max sizes will make the image more blurry, but processing will be faster and require less memory.
## Load an image
Load an image and resize it. We've provided a sample image of a Trader Joe's receipt, but you can use any image you'd like.
Here's what the image looks like:

```python
# Path to the image
image_path = "https://raw.githubusercontent.com/dottxt-ai/outlines/refs/heads/main/docs/cookbook/images/trader-joes-receipt.jpg"
# Download the image
response = requests.get(image_path)
with open("receipt.png", "wb") as f:
f.write(response.content)
# Load + resize the image
image = load_and_resize_image("receipt.png")
```
## Define the output structure
We'll define a Pydantic model to describe the data we want to extract from the image.
In our case, we want to extract the following information:
- The store name
- The store address
- The store number
- A list of items, including the name, quantity, price per unit, and total price
- The tax
- The total
- The date
- The payment method
Most fields are optional, as not all receipts contain all information.
```python
class Item(BaseModel):
name: str
quantity: Optional[int]
price_per_unit: Optional[float]
total_price: Optional[float]
class ReceiptSummary(BaseModel):
store_name: str
store_address: str
store_number: Optional[int]
items: List[Item]
tax: Optional[float]
total: Optional[float]
# Date is in the format YYYY-MM-DD. We can apply a regex pattern to ensure it's formatted correctly.
date: Optional[str] = Field(pattern=r'\d{4}-\d{2}-\d{2}', description="Date in the format YYYY-MM-DD")
payment_method: Literal["cash", "credit", "debit", "check", "other"]
```
## Prepare the prompt
We'll use the `tf_processor` to convert the image and the text prompt into a format that the model can understand. Practically,
this is the code that adds user, system, assistant, and image tokens to the prompt.
```python
# Set up the content you want to send to the model
messages = [
{
"role": "user",
"content": [
{
# The image is provided as a PIL Image object
"type": "image",
"image": image,
},
{
"type": "text",
"text": f"""You are an expert at extracting information from receipts.
Please extract the information from the receipt. Be as detailed as possible --
missing or misreporting information is a crime.
Return the information in the following JSON schema:
{ReceiptSummary.model_json_schema()}
"""},
],
}
]
# Convert the messages to the final prompt
prompt = tf_processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
```
If you are curious, the final prompt that is sent to the model looks (roughly) like this:
```
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>
You are an expert at extracting information from receipts.
Please extract the information from the receipt. Be as detailed as
possible -- missing or misreporting information is a crime.
Return the information in the following JSON schema:
<JSON SCHEMA OMITTED>
<|im_end|>
<|im_start|>assistant
```
## Run the model
```python
# Prepare a function to process receipts
receipt_summary_generator = outlines.Generator(model, ReceiptSummary)
# Generate the receipt summary
result = receipt_summary_generator(
{"text": prompt, "images": image},
max_new_tokens=1024
)
print(result)
```
## Output
The output should look like this:
```
{
"store_name": "Trader Joe's",
"store_address": "401 Bay Street, San Francisco, CA 94133",
"store_number": 0,
"items": [
{"name": "BANANA EACH", "quantity": 7, "price_per_unit": 0.23, "total_price": 1.61},
{"name": "BAREBELLS CHOCOLATE DOUG", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29},
{"name": "BAREBELLS CREAMY CRISP", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29},
{"name": "BAREBELLS CHOCOLATE DOUG", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29},
{"name": "BAREBELLS CARAMEL CASHEW", "quantity": 2, "price_per_unit": 2.29, "total_price": 4.58},
{"name": "BAREBELLS CREAMY CRISP", "quantity": 1, "price_per_unit": 2.29, "total_price": 2.29},
{"name": "SPINDRIFT ORANGE MANGO 8", "quantity": 1, "price_per_unit": 7.49, "total_price": 7.49},
{"name": "Bottle Deposit", "quantity": 8, "price_per_unit": 0.05, "total_price": 0.4},
{"name": "MILK ORGANIC GALLON WHOL", "quantity": 1,"price_per_unit": 6.79,"total_price": 6.79},
{"name": "CLASSIC GREEK SALAD", "quantity": 1, "price_per_unit": 3.49, "total_price": 3.49},
{"name": "COBB SALAD", "quantity": 1, "price_per_unit": 5.99, "total_price": 5.99},
{"name": "PEPPER BELL RED XL EACH", "quantity": 1, "price_per_unit": 1.29, "total_price": 1.29},
{"name": "BAG FEE.", "quantity": 1, "price_per_unit": 0.25, "total_price": 0.25},
{"name": "BAG FEE.", "quantity": 1, "price_per_unit": 0.25, "total_price": 0.25},
],
"tax": 0.68,
"total": 41.98,
"date": "2023-11-04",
"payment_method": "debit"
}
```
Voila! You've successfully extracted information from a receipt using an LLM.
## Bonus: roasting the user for their receipt
You can roast the user for their receipt by adding a `roast` field to the end of the `ReceiptSummary` model.
```python
class ReceiptSummary(BaseModel):
...
roast: str
```
which gives you a result like
```
{
...
"roast": "You must be a fan of Trader Joe's because you bought enough
items to fill a small grocery bag and still had to pay for a bag fee.
Maybe you should start using reusable bags to save some money and the
environment."
}
```
Qwen is not particularly funny, but worth a shot.
================================================
FILE: docs/examples/simtom.md
================================================
# Build perspective-taking agents with SimToM
Prompting strategies like Chain-of-Thought (CoT) can improve LLMs' reasoning capabilities. However, they underwhelm in tasks that require keeping track of inconsistent world states. [SimToM](https://arxiv.org/abs/2311.10227) proposes a simple, two-stage prompting framework for LLMs inspired by Simulation Theory. The authors showed that this approach outperforms zero-shot prompting and CoT on ToMI and BigToM, two benchmarks with Theory of Mind questions.
In this example, we will implement SimToM with a few lines of code using Outlines' prompt templating and structured generation capabilities.
## How SimToM works
SimToM calls an LLM with two consecutive prompts:
1. **Perspective-taking**: The first prompt receives a `story` and a `character`. The goal is to understand the situation based on the character's point of view and filter out the rest of the story.
2. **Question-Answering**: The second prompt receives the character's point of view from the previous step and tasks the LLM to answer a question using that context.

## Outlines implementation
To implement SimToM with Outlines, we will need to:
1. Write the prompts with [prompt templates](https://dottxt-ai.github.io/outlines/latest/reference/prompting/).
2. Define the JSON object each prompt will return using Pydantic.
3. Generate responses with a Mistral model using the [transformers integration](https://dottxt-ai.github.io/outlines/latest/reference/models/transformers/).
Let's dive into it!
### Using Prompt Templates
The authors have shared their code, prompts and data in [this GitHub repository](https://github.com/shawnsihyunlee/simulatedtom). Below, we define in Outlines the prompts they used for the ToMI dataset:
```python
from outlines import Template
perspective_taking = Template.from_file("prompt_templates/simtom_prospective_taking.txt")
simulation = Template.from_file("prompt_templates/simtom_simulation.txt")
```
### JSON Structured Generation
Outlines guarantees that the LLM will return a valid JSON object, which we can specify as a Pydantic model.
We will need two Pydantic models for SimToM, one for each prompt:
```python
from pydantic import BaseModel, Field
from typing import List
class PerspectiveTaking(BaseModel):
"""This is for the first prompt."""
character: str = Field(description="The character we extract the events for.")
events: List[str] = Field(description="All events that the character knows about.")
class Simulation(BaseModel):
"""This is for the second prompt."""
answer: str
```
### Calling an LLM
Let's try SimToM with an example from the ToMI dataset:
```python
story = """
1 Aria entered the front_yard.
2 Aiden entered the front_yard.
3 The grapefruit is in the green_bucket.
4 Aria moved the grapefruit to the blue_container.
5 Aiden exited the front_yard.
6 Noah entered the playroom.
"""
question = "7 Where was the grapefruit at the beginning?"
character = "Aria"
```
We load `Mistral-7B-Instruct-v0.3`, create the prompt using the template we defined earlier, and generate a structured response. As a reminder, the goal of the first call is to get all the events a character, `Aria`, knows about.
```python
import transformers
import outlines
# Load an LLM from Hugging Face
MODEL_NAME = "mistral-community/Mistral-7B-Instruct-v0.3"
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME),
transformers.AutoTokenizer.from_pretrained(MODEL_NAME),
)
perspective_prompt = perspective_taking(story=story, character=character)
# Call Mistral 7B with the first prompt
generator = outlines.Generator(model, PerspectiveTaking)
perspective = generator(perspective_prompt, max_new_tokens=1024)
print(perspective)
# {'character': 'Aria', 'events': ['1 Aria entered the front_yard.', '3 The grapefruit is in the green_bucket.', '4 Aria moved the grapefruit to the blue_container.']}
```
Not bad! We will now generate the second prompt with those events.
```python
import json
sim_prompt = simulation(events=json.loads(perspective)["events"], name=character, question=question)
# Call Mistral 7B with the second prompt
generator = outlines.Generator(model, Simulation)
result = generator(sim_prompt, max_new_tokens=1024)
print(result)
# {'answer': 'green_bucket'}
```
And this is it! SimToM could be useful in agentic workflows, where agents must act based on what they know, not all available information. One caveat of SimToM is that the perspective-taking step may remove important information, leading to wrong results. As the authors note in their paper, it can feature as a simple and effective baseline for evaluating LLMs on Theory of Mind reasoning tasks.
================================================
FILE: docs/examples/structured_generation_workflow.md
================================================
# Structured Generation Workflow: Generating Synthetic Phone Numbers
This is a condensed version of [Coding for Structured Generation with LLMs](https://blog.dottxt.co/coding-for-structured-generation.html).
For this example we're going to be building an LLM program to generate **synthetic data** in the form of realistic looking phone numbers for Washington State. Using an LLM for this task *is a bit overkill* since we could just as easily accomplish this with a tool like [Faker](https://fakerjs.dev/), but this example still serves as a useful way to demonstrate a workflow for using structured generation.
## Unstructured approach
Before diving into how to use structure generation for this task let's start with an unstructured example. We begin by loading our model:
```python
import outlines
import transformers
model_name = 'microsoft/Phi-3-mini-4k-instruct'
model = outlines.from_transformers(
transformers.AutoModelForCausalLM.from_pretrained(model_name),
transformers.AutoTokenizer.from_pretrained(model_name)
)
```
Next we need a prompt for this model. Since we're focusing on structured generation, we won't be engaging in any form of "prompt hacking" and will be leaving this prompt untouched for the rest of this example.
```python
prompt_phone = """
Please generate a realistic phone number for Washington State in the following format
(555) 555-5555
"""
```
With our prompt ready we can now generate 10 example phone numbers
```python
phone_generator_unstruct = outlines.Generator(model)
for _ in range(3):
print(phone_generator_unstruct(prompt_phone, max_new_tokens=12))
```
> I'd be happy to help you generate a realistic phone\
I cannot generate a real phone number as I'm just\
I'm an AI and don't have the ability\
Sure! Here is a randomly generated phone number in the format\
Here's a phone number that fits the format for a\
In Washington State, phone numbers typically have a three-dig\
Here are a few examples of phone numbers that could be considered\
I'd be happy to help generate a realistic phone number\
I'd be happy to help you generate a random phone\
Based on the format you provided, a realistic phone number for\
As we can see, none of these outputs are even phone numbers!
Let's see if we can improve this using structured generation.
## The Structured Generation Workflow
In order to solve this problem we're going to introduce a *Structured Generation Workflow* outlined in this image:

Let's step through this:
### Real example
We start with a real example phone number, in this case for the Seattle Public Library, that we can use to verify the structure we are creating.
```python
phone_number = "(206) 386-4636"
```
For a simple example like this, we'll just be using a single phone number, for more complex examples it can be helpful to have more examples.
### Draft Structure
The next step in the process is for use to define a simple regex that we feel correctly models our real data.
```python
from outlines.types import Regex
phone_regex_1 = Regex(r'\([0-9]{3}\) [0-9]{3}-[0-9]{4}')
```
Next we need to validate this regex against our real data.
### Validate by matching examples
Whenever writing non-trivial code with structured generation it is *essential* that you first validate the code against your real data example(s).
We'll start with a simple method of validation: just checking that our regex matches the data.
```
import re
re.match(phone_regex_1.pattern, phone_number)
# <re.Match object; span=(0, 14), match='(206) 386-4636'>
```
Now that we have a match, we can move on to generating structured output!
### Generate Structure
We're ready to see if structured generation can make an improvement over our initial unstructured approach:
```python
phone_generator_v1 = outlines.Generator(model, phone_regex_1)
for _ in range(3):
print(phone_generator_v1(prompt_phone))
```
> (206) 555-1234\
(206) 555-1234\
(206) 555-1234\
(206) 555-1234\
(206) 555-1234\
(206) 555-1234\
(206) 123-4567\
(206) 555-1234\
(206) 555-1234\
(206) 555-1234
At least we have phone numbers! But I think we can do better!
### Inspect output
In this case the model *did* create phone numbers and, impressively, got the area code correct. So using structured generation did improve things. However these numbers are pretty boring. Let's improve that structure!
## Iteration
We've walked through the loop once, so we can go quickly now through each iteration.
We start by improving our structure:
```python
phone_regex_2 = Regex(r'\([0-9]{3}\) [2-46-9]{3}-[02-9]{4}')
```
Before rushing to another round of generation, let's validate this new regex. We'll add just a bit more sophistication over our last check:
```python
re.match(phone_regex_2.pattern, phone_number)[0] == phone_number
# True
```
Now that we've validated, let's generate with this new regex!
```python
phone_generator_v2 = outlines.Generator(model, phone_regex_2)
for _ in range(3):
print(phone_generator_v2(prompt_phone))
```
> (206) 867-5309\
(206) 666-7777\
(206) 444-3333\
(206) 444-3333\
(206) 943-2222\
(206) 323-6789\
(206) 444-3333\
(206) 867-5309\
(206) 466-2255\
(206) 222-3333
Better, but I don't like those repeated sequences. Like good software developers, let's iterate again!
## Reiteration - with debugging
Here's a fancier regex that should give us more interesting results:
```python
phone_regex_3_error = r'\([0-9]{3}\) [2-4][7-9][4-6]-[3-6][2-8][1-4]'
```
Thi
gitextract_sobc03i9/
├── .devcontainer/
│ └── devcontainer.json
├── .editorconfig
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ └── config.yml
│ ├── PULL_REQUEST_TEMPLATE/
│ │ └── pull_request_template.md
│ ├── scripts/
│ │ └── build_sdist_and_wheel.sh
│ └── workflows/
│ ├── build_documentation.yml
│ ├── publish_documentation.yml
│ ├── release_pypi.yaml
│ ├── tests.yml
│ └── tests_api_models.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pydocstyle
├── .readthedocs.yaml
├── .vscode/
│ └── settings.json
├── LICENSE
├── README.md
├── docs/
│ ├── api_reference/
│ │ └── index.md
│ ├── blog/
│ │ └── index.md
│ ├── community/
│ │ ├── contribute.md
│ │ ├── examples.md
│ │ ├── feedback.md
│ │ ├── index.md
│ │ └── versioning.md
│ ├── core_concepts.md
│ ├── examples/
│ │ ├── chain_of_density.md
│ │ ├── chain_of_thought.md
│ │ ├── classification.md
│ │ ├── dating_profiles.md
│ │ ├── deploy-using-bentoml.md
│ │ ├── deploy-using-cerebrium.md
│ │ ├── deploy-using-modal.md
│ │ ├── earnings-reports.md
│ │ ├── extract_event_details.md
│ │ ├── extract_event_details.py
│ │ ├── extraction.md
│ │ ├── index.md
│ │ ├── knowledge_graph_extraction.md
│ │ ├── models_playing_chess.md
│ │ ├── prompt_templates/
│ │ │ ├── chain_of_density.txt
│ │ │ ├── classification.txt
│ │ │ ├── react_agent.txt
│ │ │ ├── simtom_prospective_taking.txt
│ │ │ └── simtom_simulation.txt
│ │ ├── qa-with-citations.md
│ │ ├── react_agent.md
│ │ ├── read-pdfs.md
│ │ ├── receipt-digitization.md
│ │ ├── simtom.md
│ │ └── structured_generation_workflow.md
│ ├── features/
│ │ ├── advanced/
│ │ │ ├── backends.md
│ │ │ └── logits_processors.md
│ │ ├── core/
│ │ │ ├── generator.md
│ │ │ ├── inputs.md
│ │ │ └── output_types.md
│ │ ├── index.md
│ │ ├── models/
│ │ │ ├── anthropic.md
│ │ │ ├── dottxt.md
│ │ │ ├── gemini.md
│ │ │ ├── index.md
│ │ │ ├── llamacpp.md
│ │ │ ├── mistral.md
│ │ │ ├── mlxlm.md
│ │ │ ├── ollama.md
│ │ │ ├── openai.md
│ │ │ ├── openai_compatible.md
│ │ │ ├── openrouter.md
│ │ │ ├── sglang.md
│ │ │ ├── tgi.md
│ │ │ ├── transformers.md
│ │ │ ├── transformers_multimodal.md
│ │ │ ├── vllm.md
│ │ │ └── vllm_offline.md
│ │ └── utility/
│ │ ├── application.md
│ │ ├── regex_dsl.md
│ │ └── template.md
│ ├── guide/
│ │ ├── architecture.md
│ │ ├── chat_templating.md
│ │ ├── core_concepts.md
│ │ ├── fastapi_vllm_deployment.md
│ │ ├── getting_started.md
│ │ ├── installation.md
│ │ ├── migration.md
│ │ ├── selecting_an_inference_backend.md
│ │ └── vlm.md
│ ├── index.md
│ ├── overrides/
│ │ ├── home.html
│ │ └── main.html
│ └── stylesheets/
│ └── extra.css
├── environment.yml
├── examples/
│ ├── babyagi.py
│ ├── beam-cloud/
│ │ ├── README.md
│ │ └── app.py
│ ├── bentoml/
│ │ ├── .bentoignore
│ │ ├── bentofile.yaml
│ │ ├── import_model.py
│ │ ├── requirements.txt
│ │ └── service.py
│ ├── cerebrium/
│ │ ├── cerebrium.toml
│ │ └── main.py
│ ├── dating_profile.py
│ ├── llamacpp_example.py
│ ├── llamacpp_processor.py
│ ├── math_generate_code.py
│ ├── meta_prompting.py
│ ├── modal_example.py
│ ├── pick_odd_one_out.py
│ ├── prompts/
│ │ ├── babyagi_create_task.txt
│ │ ├── babyagi_perform_task.txt
│ │ ├── babyagi_prioritize_task.txt
│ │ ├── dating_profile.txt
│ │ ├── pick_odd_one_out.txt
│ │ └── self_consistency.txt
│ ├── react.py
│ ├── sampling.ipynb
│ ├── self_consistency.py
│ ├── simulation_based_inference.ipynb
│ └── vllm_offline_integration.py
├── flake.nix
├── llm.txt
├── mkdocs.yml
├── outlines/
│ ├── __init__.py
│ ├── applications.py
│ ├── backends/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── llguidance.py
│ │ ├── outlines_core.py
│ │ └── xgrammar.py
│ ├── caching.py
│ ├── generator.py
│ ├── grammars/
│ │ ├── arithmetic.lark
│ │ ├── common.lark
│ │ └── json.lark
│ ├── grammars.py
│ ├── inputs.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── anthropic.py
│ │ ├── base.py
│ │ ├── dottxt.py
│ │ ├── gemini.py
│ │ ├── llamacpp.py
│ │ ├── lmstudio.py
│ │ ├── mistral.py
│ │ ├── mlxlm.py
│ │ ├── ollama.py
│ │ ├── openai.py
│ │ ├── sglang.py
│ │ ├── tgi.py
│ │ ├── tokenizer.py
│ │ ├── transformers.py
│ │ ├── utils.py
│ │ ├── vllm.py
│ │ └── vllm_offline.py
│ ├── processors/
│ │ ├── __init__.py
│ │ ├── base_logits_processor.py
│ │ └── tensor_adapters/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── mlx.py
│ │ ├── numpy.py
│ │ └── torch.py
│ ├── py.typed
│ ├── release_note.md
│ ├── templates.py
│ └── types/
│ ├── __init__.py
│ ├── airports.py
│ ├── countries.py
│ ├── dsl.py
│ ├── json_schema_utils.py
│ ├── locale/
│ │ ├── __init__.py
│ │ └── us.py
│ └── utils.py
├── pyproject.toml
├── requirements-doc.txt
├── scripts/
│ └── gen_ref_pages.py
├── setup.cfg
├── shell.nix
└── tests/
├── __init__.py
├── backends/
│ ├── test_backends.py
│ ├── test_backends_utils.py
│ ├── test_llguidance.py
│ ├── test_outlines_core.py
│ └── test_xgrammar.py
├── cfg_samples/
│ ├── arithmetic/
│ │ ├── lots_of_ops.arithmetic.test
│ │ └── simple_math.arithmetic.test
│ └── json/
│ ├── outlines.generate.samplers.mypy.json.test
│ ├── simple_fruit.json.test
│ └── simple_fruit_no_indent.json.test
├── conftest.py
├── models/
│ ├── test_anthopic_type_adapter.py
│ ├── test_anthropic.py
│ ├── test_dottxt.py
│ ├── test_dottxt_type_adapter.py
│ ├── test_gemini.py
│ ├── test_gemini_type_adapter.py
│ ├── test_llamacpp.py
│ ├── test_llamacpp_tokenizer.py
│ ├── test_llamacpp_type_adapter.py
│ ├── test_lmstudio.py
│ ├── test_lmstudio_type_adapter.py
│ ├── test_mistral.py
│ ├── test_mistral_type_adapter.py
│ ├── test_mlxlm.py
│ ├── test_mlxlm_type_adapter.py
│ ├── test_ollama.py
│ ├── test_ollama_type_adapter.py
│ ├── test_openai.py
│ ├── test_openai_type_adapter.py
│ ├── test_sglang.py
│ ├── test_sglang_type_adapter.py
│ ├── test_tgi.py
│ ├── test_tgi_model_adapter.py
│ ├── test_tokenizer.py
│ ├── test_transformers.py
│ ├── test_transformers_multimodal.py
│ ├── test_transformers_multimodal_type_adapter.py
│ ├── test_transformers_tokenizer.py
│ ├── test_transformers_type_adapter.py
│ ├── test_utils.py
│ ├── test_vllm.py
│ ├── test_vllm_offline.py
│ ├── test_vllm_offline_type_adapter.py
│ └── test_vllm_type_adapter.py
├── processors/
│ ├── test_base_processor.py
│ └── test_tensor_adapters.py
├── test_applications.py
├── test_cache.py
├── test_generator.py
├── test_inputs.py
├── test_templates.py
├── test_utils/
│ ├── mock_lmstudio_client.py
│ ├── mock_openai_client.py
│ ├── mock_tgi_client.py
│ └── utils.py
└── types/
├── test_custom_types.py
├── test_dsl.py
├── test_json_schema_utils.py
├── test_to_regex.py
└── test_types_utils.py
SYMBOL INDEX (1469 symbols across 108 files)
FILE: docs/examples/extract_event_details.py
class Event (line 15) | class Event(BaseModel):
FILE: examples/babyagi.py
function create_tasks_fmt (line 26) | def create_tasks_fmt(result: str) -> List[str]:
function prioritize_tasks_fmt (line 38) | def prioritize_tasks_fmt(result: str):
function one_cycle (line 61) | def one_cycle(objective: str, task_list, next_task_id: int):
FILE: examples/beam-cloud/app.py
function load_models (line 11) | def load_models():
function predict (line 32) | def predict(context, **inputs):
FILE: examples/bentoml/import_model.py
function import_model (line 7) | def import_model(model_id, bento_model_tag):
FILE: examples/bentoml/service.py
class Outlines (line 53) | class Outlines:
method __init__ (line 56) | def __init__(self) -> None:
method generate (line 72) | async def generate(
FILE: examples/cerebrium/main.py
function generate (line 40) | def generate(
FILE: examples/dating_profile.py
class QuestionChoice (line 12) | class QuestionChoice(str, Enum):
class QuestionAnswer (line 22) | class QuestionAnswer:
class DatingProfile (line 27) | class DatingProfile(BaseModel):
class Example (line 39) | class Example:
FILE: examples/llamacpp_example.py
class Weapon (line 9) | class Weapon(str, Enum):
class Armor (line 18) | class Armor(str, Enum):
class Character (line 24) | class Character(BaseModel):
FILE: examples/llamacpp_processor.py
class Weapon (line 10) | class Weapon(str, Enum):
class Armor (line 19) | class Armor(str, Enum):
class Character (line 25) | class Character(BaseModel):
FILE: examples/math_generate_code.py
function execute_code (line 35) | def execute_code(code):
FILE: examples/meta_prompting.py
function split_into_steps (line 24) | def split_into_steps(question, model_name: str):
function fill_in_the_blanks (line 45) | def fill_in_the_blanks(question, model_name: str):
function ask_an_expert (line 66) | def ask_an_expert(question, model_name: str):
function ask_an_expert_simple (line 105) | def ask_an_expert_simple(question, model_name: str):
function run_example (line 132) | def run_example(model_fn, question, model_name):
FILE: examples/modal_example.py
function import_model (line 14) | def import_model():
function generate (line 64) | def generate(
function main (line 85) | def main(
FILE: examples/react.py
function search_wikipedia (line 45) | def search_wikipedia(query: str):
FILE: examples/vllm_offline_integration.py
class Person (line 11) | class Person(BaseModel):
FILE: outlines/applications.py
class Application (line 14) | class Application:
method __init__ (line 55) | def __init__(
method __call__ (line 76) | def __call__(
FILE: outlines/backends/__init__.py
function _get_backend (line 32) | def _get_backend(backend_name: str, model: SteerableModel) -> BaseBackend:
function get_json_schema_logits_processor (line 58) | def get_json_schema_logits_processor(
function get_regex_logits_processor (line 87) | def get_regex_logits_processor(
function get_cfg_logits_processor (line 116) | def get_cfg_logits_processor(
FILE: outlines/backends/base.py
class BaseBackend (line 10) | class BaseBackend(ABC):
method get_json_schema_logits_processor (line 19) | def get_json_schema_logits_processor(
method get_regex_logits_processor (line 38) | def get_regex_logits_processor(self, regex: str) -> LogitsProcessorType:
method get_cfg_logits_processor (line 55) | def get_cfg_logits_processor(self, grammar: str) -> LogitsProcessorType:
FILE: outlines/backends/llguidance.py
class LLGuidanceLogitsProcessor (line 20) | class LLGuidanceLogitsProcessor(OutlinesLogitsProcessor):
method __init__ (line 23) | def __init__(
method reset (line 46) | def reset(self):
method _setup (line 50) | def _setup(self, batch_size: int) -> None:
method _bias_logits_mlx (line 90) | def _bias_logits_mlx( # pragma: no cover
method _bias_logits_torch (line 107) | def _bias_logits_torch(
method _bias_logits_numpy (line 130) | def _bias_logits_numpy(
method process_logits (line 144) | def process_logits(
class LLGuidanceBackend (line 180) | class LLGuidanceBackend(BaseBackend):
method __init__ (line 183) | def __init__(self, model: SteerableModel):
method _create_llg_tokenizer (line 197) | def _create_llg_tokenizer(self, model: SteerableModel) -> "LLGTokenizer":
method get_json_schema_logits_processor (line 237) | def get_json_schema_logits_processor(
method get_regex_logits_processor (line 258) | def get_regex_logits_processor(
method get_cfg_logits_processor (line 279) | def get_cfg_logits_processor(
FILE: outlines/backends/outlines_core.py
class OutlinesCoreLogitsProcessor (line 19) | class OutlinesCoreLogitsProcessor(OutlinesLogitsProcessor):
method __init__ (line 22) | def __init__(self, index: Index, tensor_library_name: str):
method reset (line 38) | def reset(self) -> None:
method _setup (line 42) | def _setup(self, batch_size: int, vocab_size: int) -> None:
method _bias_logits_mlx (line 84) | def _bias_logits_mlx( # pragma: no cover
method _bias_logits_torch (line 104) | def _bias_logits_torch(self, batch_size: int, logits: TensorType) -> T...
method _bias_logits_numpy (line 124) | def _bias_logits_numpy(self, batch_size: int, logits: TensorType) -> T...
method process_logits (line 140) | def process_logits(self, input_ids: TensorType, logits: TensorType) ->...
class OutlinesCoreBackend (line 176) | class OutlinesCoreBackend(BaseBackend):
method __init__ (line 179) | def __init__(self, model: SteerableModel):
method get_json_schema_logits_processor (line 214) | def get_json_schema_logits_processor(self, json_schema: str):
method get_regex_logits_processor (line 231) | def get_regex_logits_processor(self, regex: str):
method get_cfg_logits_processor (line 248) | def get_cfg_logits_processor(self, grammar):
method create_outlines_core_vocabulary (line 254) | def create_outlines_core_vocabulary(
FILE: outlines/backends/xgrammar.py
class XGrammarLogitsProcessor (line 13) | class XGrammarLogitsProcessor(OutlinesLogitsProcessor):
method __init__ (line 16) | def __init__(self, compiled_grammar: str, tensor_library_name: str,):
method reset (line 34) | def reset(self):
method _setup (line 38) | def _setup(self, batch_size: int, vocab_size: int) -> None:
method _bias_logits_torch (line 55) | def _bias_logits_torch(
method _bias_logits_mlx (line 75) | def _bias_logits_mlx( # pragma: no cover
method process_logits (line 92) | def process_logits(
class XGrammarBackend (line 113) | class XGrammarBackend(BaseBackend):
method __init__ (line 116) | def __init__(self, model: SteerableModel):
method get_json_schema_logits_processor (line 143) | def get_json_schema_logits_processor(
method get_regex_logits_processor (line 167) | def get_regex_logits_processor(
method get_cfg_logits_processor (line 189) | def get_cfg_logits_processor(
FILE: outlines/caching.py
class CloudpickleDisk (line 17) | class CloudpickleDisk(Disk): # pragma: no cover
method __init__ (line 18) | def __init__(self, directory, compress_level=1, **kwargs):
method put (line 22) | def put(self, key):
method get (line 26) | def get(self, key, raw):
method store (line 30) | def store(self, value, read, key=UNKNOWN):
method fetch (line 35) | def fetch(self, mode, filename, value, read):
function get_cache (line 43) | def get_cache():
function cache (line 86) | def cache(expire: Optional[float] = None, typed=False, ignore=()):
function disable_cache (line 156) | def disable_cache():
function clear_cache (line 180) | def clear_cache():
function cache_disabled (line 187) | def cache_disabled():
FILE: outlines/generator.py
class BlackBoxGenerator (line 28) | class BlackBoxGenerator:
method __init__ (line 38) | def __init__(self, model: BlackBoxModel, output_type: Optional[Any]):
method __call__ (line 51) | def __call__(self, prompt: Any, **inference_kwargs) -> Any:
method batch (line 71) | def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
method stream (line 91) | def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
class AsyncBlackBoxGenerator (line 112) | class AsyncBlackBoxGenerator:
method __init__ (line 122) | def __init__(self, model: AsyncBlackBoxModel, output_type: Optional[An...
method __call__ (line 135) | async def __call__(self, prompt: Any, **inference_kwargs) -> Any:
method batch (line 155) | async def batch(self, prompts: List[Any], **inference_kwargs) -> List[...
method stream (line 175) | async def stream(self, prompt: Any, **inference_kwargs) -> AsyncIterat...
class SteerableGenerator (line 197) | class SteerableGenerator:
method __init__ (line 216) | def __init__(
method from_processor (line 260) | def from_processor(
method __call__ (line 279) | def __call__(self, prompt: Any, **inference_kwargs) -> Any:
method batch (line 301) | def batch(self, prompts: List[Any], **inference_kwargs) -> List[Any]:
method stream (line 323) | def stream(self, prompt: Any, **inference_kwargs) -> Iterator[Any]:
function Generator (line 346) | def Generator(
FILE: outlines/grammars.py
function read_grammar (line 8) | def read_grammar(
FILE: outlines/inputs.py
class Image (line 12) | class Image:
method __post_init__ (line 26) | def __post_init__(self):
class Video (line 41) | class Video:
class Audio (line 58) | class Audio:
class Chat (line 75) | class Chat:
method __post_init__ (line 112) | def __post_init__(self):
method append (line 116) | def append(self, message: Dict[str, Any]):
method extend (line 127) | def extend(self, messages: List[Dict[str, Any]]):
method pop (line 138) | def pop(self) -> Dict[str, Any]:
method add_system_message (line 149) | def add_system_message(self, content: str | List[Any]):
method add_user_message (line 160) | def add_user_message(self, content: str | List[Any]):
method add_assistant_message (line 171) | def add_assistant_message(self, content: str | List[Any]):
method __str__ (line 182) | def __str__(self):
method __repr__ (line 185) | def __repr__(self):
FILE: outlines/models/anthropic.py
class AnthropicTypeAdapter (line 15) | class AnthropicTypeAdapter(ModelTypeAdapter):
method format_input (line 27) | def format_input(self, model_input):
method format_str_model_input (line 48) | def format_str_model_input(self, model_input: str) -> dict:
method format_list_model_input (line 54) | def format_list_model_input(self, model_input: list) -> dict:
method format_chat_model_input (line 62) | def format_chat_model_input(self, model_input: Chat) -> dict:
method _create_message (line 74) | def _create_message(self, role: str, content: str | list) -> dict:
method format_output_type (line 117) | def format_output_type(self, output_type):
class Anthropic (line 128) | class Anthropic(Model):
method __init__ (line 135) | def __init__(
method generate (line 151) | def generate(
method generate_batch (line 195) | def generate_batch(
method generate_stream (line 205) | def generate_stream(
function from_anthropic (line 257) | def from_anthropic(
FILE: outlines/models/base.py
class ModelTypeAdapter (line 7) | class ModelTypeAdapter(ABC):
method format_input (line 19) | def format_input(self, model_input: Any) -> Any:
method format_output_type (line 42) | def format_output_type(self, output_type: Optional[Any] = None) -> Any:
class Model (line 62) | class Model(ABC):
method __call__ (line 80) | def __call__(
method batch (line 124) | def batch(
method stream (line 170) | def stream(
method generate (line 219) | def generate(
method generate_batch (line 249) | def generate_batch(
method generate_stream (line 278) | def generate_stream(
class AsyncModel (line 307) | class AsyncModel(ABC):
method __call__ (line 325) | async def __call__(
method batch (line 370) | async def batch(
method stream (line 416) | async def stream(
method generate (line 467) | async def generate(
method generate_batch (line 497) | async def generate_batch(
method generate_stream (line 527) | async def generate_stream(
FILE: outlines/models/dottxt.py
class DottxtTypeAdapter (line 14) | class DottxtTypeAdapter(ModelTypeAdapter):
method format_input (line 17) | def format_input(self, model_input: str) -> str:
method format_output_type (line 38) | def format_output_type(self, output_type: Optional[Any] = None) -> str:
class Dottxt (line 78) | class Dottxt(Model):
method __init__ (line 86) | def __init__(
method generate (line 108) | def generate(
method generate_batch (line 155) | def generate_batch(
method generate_stream (line 165) | def generate_stream(
function from_dottxt (line 178) | def from_dottxt(
FILE: outlines/models/gemini.py
class GeminiTypeAdapter (line 31) | class GeminiTypeAdapter(ModelTypeAdapter):
method format_input (line 41) | def format_input(self, model_input):
method format_str_model_input (line 62) | def format_str_model_input(self, model_input: str) -> dict:
method format_list_model_input (line 66) | def format_list_model_input(self, model_input: list) -> dict:
method format_chat_model_input (line 74) | def format_chat_model_input(self, model_input: Chat) -> dict:
method _create_message (line 86) | def _create_message(self, role: str, content: str | list) -> dict:
method _create_text_part (line 129) | def _create_text_part(self, text: str) -> dict:
method _create_img_part (line 135) | def _create_img_part(self, image: Image) -> dict:
method format_output_type (line 144) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
method format_enum_output_type (line 205) | def format_enum_output_type(self, output_type: Optional[Any]) -> dict:
method format_json_output_type (line 211) | def format_json_output_type(self, output_type: Optional[Any]) -> dict:
method format_list_output_type (line 217) | def format_list_output_type(self, output_type: Optional[Any]) -> dict:
class Gemini (line 246) | class Gemini(Model):
method __init__ (line 255) | def __init__(self, client: "Client", model_name: Optional[str] = None):
method generate (line 269) | def generate(
method generate_batch (line 305) | def generate_batch(
method generate_stream (line 315) | def generate_stream(
function from_gemini (line 354) | def from_gemini(client: "Client", model_name: Optional[str] = None) -> G...
FILE: outlines/models/llamacpp.py
class LlamaCppTokenizer (line 28) | class LlamaCppTokenizer(Tokenizer):
method __init__ (line 29) | def __init__(self, model: "Llama"):
method decode (line 89) | def decode(self, token_ids: List[int]) -> List[str]:
method encode (line 93) | def encode(
method convert_token_to_string (line 115) | def convert_token_to_string(self, token: str) -> str:
method __eq__ (line 129) | def __eq__(self, other):
method __hash__ (line 134) | def __hash__(self):
method __getstate__ (line 146) | def __getstate__(self):
method __setstate__ (line 156) | def __setstate__(self, state):
class LlamaCppTypeAdapter (line 160) | class LlamaCppTypeAdapter(ModelTypeAdapter):
method __init__ (line 168) | def __init__(self, has_chat_template: bool = False):
method format_input (line 178) | def format_input(self, model_input):
method format_str_input (line 198) | def format_str_input(self, model_input: str) -> str | list:
method format_chat_input (line 204) | def format_chat_input(self, model_input: Chat) -> list:
method format_output_type (line 222) | def format_output_type(
class LlamaCpp (line 245) | class LlamaCpp(Model):
method __init__ (line 254) | def __init__(self, model: "Llama", chat_mode: bool = True):
method generate (line 275) | def generate(
method generate_batch (line 323) | def generate_batch(
method generate_stream (line 331) | def generate_stream(
function from_llamacpp (line 380) | def from_llamacpp(model: "Llama", chat_mode: bool = True) -> LlamaCpp:
FILE: outlines/models/lmstudio.py
class LMStudioTypeAdapter (line 24) | class LMStudioTypeAdapter(ModelTypeAdapter):
method _prepare_lmstudio_image (line 27) | def _prepare_lmstudio_image(self, image: Image):
method format_input (line 41) | def format_input(self, model_input):
method format_str_model_input (line 61) | def format_str_model_input(self, model_input: str) -> str:
method format_list_model_input (line 66) | def format_list_model_input(self, model_input: list) -> "LMStudioChat":
method format_chat_model_input (line 82) | def format_chat_model_input(self, model_input: Chat) -> "LMStudioChat":
method format_output_type (line 122) | def format_output_type(
class LMStudio (line 160) | class LMStudio(Model):
method __init__ (line 168) | def __init__(self, client: "Client", model_name: Optional[str] = None):
method generate (line 184) | def generate(
method generate_batch (line 224) | def generate_batch(
method generate_stream (line 234) | def generate_stream(
class AsyncLMStudio (line 276) | class AsyncLMStudio(AsyncModel):
method __init__ (line 284) | def __init__(
method close (line 302) | async def close(self) -> None:
method generate (line 308) | async def generate(
method generate_batch (line 352) | async def generate_batch(
method generate_stream (line 362) | async def generate_stream( # type: ignore
function from_lmstudio (line 408) | def from_lmstudio(
FILE: outlines/models/mistral.py
class MistralTypeAdapter (line 35) | class MistralTypeAdapter(ModelTypeAdapter):
method format_input (line 44) | def format_input(self, model_input):
method format_str_model_input (line 64) | def format_str_model_input(self, model_input: str) -> list:
method format_list_model_input (line 83) | def format_list_model_input(self, model_input: list) -> list:
method format_chat_model_input (line 103) | def format_chat_model_input(self, model_input: Chat) -> list:
method _create_message_content (line 137) | def _create_message_content(
method format_output_type (line 189) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
method format_json_schema_type (line 242) | def format_json_schema_type(
class Mistral (line 273) | class Mistral(Model):
method __init__ (line 281) | def __init__(
method generate (line 297) | def generate(
method generate_batch (line 348) | def generate_batch(
method generate_stream (line 358) | def generate_stream(
class AsyncMistral (line 411) | class AsyncMistral(AsyncModel):
method __init__ (line 419) | def __init__(
method generate (line 435) | async def generate(
method generate_batch (line 487) | async def generate_batch(
method generate_stream (line 497) | async def generate_stream(
function from_mistral (line 552) | def from_mistral(
FILE: outlines/models/mlxlm.py
class MLXLMTypeAdapter (line 19) | class MLXLMTypeAdapter(ModelTypeAdapter):
method __init__ (line 22) | def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template...
method format_input (line 27) | def format_input(self, model_input):
method format_str_input (line 47) | def format_str_input(self, model_input: str) -> str:
method format_chat_input (line 53) | def format_chat_input(self, model_input: Chat) -> str:
method format_output_type (line 69) | def format_output_type(
class MLXLM (line 90) | class MLXLM(Model):
method __init__ (line 100) | def __init__(
method generate (line 125) | def generate(
method generate_batch (line 159) | def generate_batch(
method generate_stream (line 218) | def generate_stream(
function from_mlxlm (line 254) | def from_mlxlm(model: "nn.Module", tokenizer: "PreTrainedTokenizer") -> ...
FILE: outlines/models/ollama.py
class OllamaTypeAdapter (line 25) | class OllamaTypeAdapter(ModelTypeAdapter):
method format_input (line 29) | def format_input(self, model_input):
method format_str_model_input (line 50) | def format_str_model_input(self, model_input: str) -> list:
method format_list_model_input (line 60) | def format_list_model_input(self, model_input: list) -> list:
method format_chat_model_input (line 70) | def format_chat_model_input(self, model_input: Chat) -> list:
method _create_message (line 80) | def _create_message(self, role: str, content: str | list) -> dict:
method format_output_type (line 109) | def format_output_type(
class Ollama (line 147) | class Ollama(Model):
method __init__ (line 155) | def __init__(self, client: "Client", model_name: Optional[str] = None):
method generate (line 169) | def generate(self,
method generate_batch (line 205) | def generate_batch(
method generate_stream (line 215) | def generate_stream(
class AsyncOllama (line 253) | class AsyncOllama(AsyncModel):
method __init__ (line 261) | def __init__(
method generate (line 277) | async def generate(self,
method generate_batch (line 311) | async def generate_batch(
method generate_stream (line 321) | async def generate_stream( # type: ignore
function from_ollama (line 359) | def from_ollama(
FILE: outlines/models/openai.py
class OpenAITypeAdapter (line 33) | class OpenAITypeAdapter(ModelTypeAdapter):
method format_input (line 43) | def format_input(self, model_input):
method format_str_model_input (line 63) | def format_str_model_input(self, model_input: str) -> list:
method format_list_model_input (line 73) | def format_list_model_input(self, model_input: list) -> list:
method format_chat_model_input (line 83) | def format_chat_model_input(self, model_input: Chat) -> list:
method _create_message (line 93) | def _create_message(self, role: str, content: str | list) -> dict:
method _create_img_content (line 129) | def _create_img_content(self, image: Image) -> dict:
method format_output_type (line 138) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
method format_json_output_type (line 181) | def format_json_output_type(self, schema: dict) -> dict:
method format_json_mode_type (line 200) | def format_json_mode_type(self) -> dict:
class OpenAI (line 209) | class OpenAI(Model):
method __init__ (line 217) | def __init__(
method generate (line 235) | def generate(
method generate_batch (line 295) | def generate_batch(
method generate_stream (line 305) | def generate_stream(
class AsyncOpenAI (line 359) | class AsyncOpenAI(AsyncModel):
method __init__ (line 367) | def __init__(
method generate (line 385) | async def generate(
method generate_batch (line 445) | async def generate_batch(
method generate_stream (line 455) | async def generate_stream( # type: ignore
function from_openai (line 509) | def from_openai(
FILE: outlines/models/sglang.py
class SGLangTypeAdapter (line 25) | class SGLangTypeAdapter(ModelTypeAdapter):
method format_input (line 28) | def format_input(self, model_input: Union[Chat, list, str]) -> list:
method format_output_type (line 47) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
class SGLang (line 82) | class SGLang(Model):
method __init__ (line 92) | def __init__(self, client, model_name: Optional[str] = None):
method generate (line 106) | def generate(
method generate_batch (line 152) | def generate_batch(
method generate_stream (line 162) | def generate_stream(
method _build_client_args (line 199) | def _build_client_args(
class AsyncSGLang (line 221) | class AsyncSGLang(AsyncModel):
method __init__ (line 231) | def __init__(self, client, model_name: Optional[str] = None):
method generate (line 250) | async def generate(
method generate_batch (line 294) | async def generate_batch(
method generate_stream (line 304) | async def generate_stream( # type: ignore
method _build_client_args (line 342) | def _build_client_args(
function from_sglang (line 364) | def from_sglang(
FILE: outlines/models/tgi.py
class TGITypeAdapter (line 23) | class TGITypeAdapter(ModelTypeAdapter):
method format_input (line 27) | def format_input(self, model_input):
method format_str_input (line 47) | def format_str_input(self, model_input: str) -> str:
method format_output_type (line 50) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
class TGI (line 88) | class TGI(Model):
method __init__ (line 98) | def __init__(self, client):
method generate (line 109) | def generate(
method generate_batch (line 142) | def generate_batch(
method generate_stream (line 150) | def generate_stream(
method _build_client_args (line 186) | def _build_client_args(
class AsyncTGI (line 205) | class AsyncTGI(AsyncModel):
method __init__ (line 215) | def __init__(self, client):
method generate (line 226) | async def generate(
method generate_batch (line 259) | async def generate_batch(
method generate_stream (line 267) | async def generate_stream( # type: ignore
method _build_client_args (line 303) | def _build_client_args(
function from_tgi (line 322) | def from_tgi(
FILE: outlines/models/tokenizer.py
class Tokenizer (line 10) | class Tokenizer(Hashable, Protocol):
method encode (line 17) | def encode(
method decode (line 23) | def decode(self, token_ids: "NDArray[np.int64]") -> List[str]:
method convert_token_to_string (line 27) | def convert_token_to_string(self, token: str) -> str:
function _check_hf_chat_template (line 37) | def _check_hf_chat_template(tokenizer: "PreTrainedTokenizer | PreTrained...
FILE: outlines/models/transformers.py
function get_llama_tokenizer_types (line 26) | def get_llama_tokenizer_types():
class TransformerTokenizer (line 68) | class TransformerTokenizer(Tokenizer):
method __init__ (line 71) | def __init__(self, tokenizer: "PreTrainedTokenizer", **kwargs):
method encode (line 89) | def encode(
method decode (line 97) | def decode(self, token_ids: "torch.LongTensor") -> List[str]:
method convert_token_to_string (line 101) | def convert_token_to_string(self, token: str) -> str:
method __eq__ (line 111) | def __eq__(self, other):
method __hash__ (line 121) | def __hash__(self):
method __getstate__ (line 126) | def __getstate__(self):
method __setstate__ (line 130) | def __setstate__(self, state):
class TransformersTypeAdapter (line 134) | class TransformersTypeAdapter(ModelTypeAdapter):
method __init__ (line 137) | def __init__(self, tokenizer: "PreTrainedTokenizer", has_chat_template...
method format_input (line 142) | def format_input(self, model_input):
method format_str_input (line 162) | def format_str_input(self, model_input: str) -> str:
method format_chat_input (line 168) | def format_chat_input(self, model_input: Chat) -> str:
method format_output_type (line 175) | def format_output_type(
class Transformers (line 199) | class Transformers(Model):
method __init__ (line 209) | def __init__(
method _prepare_model_inputs (line 283) | def _prepare_model_inputs(
method generate (line 309) | def generate(
method generate_batch (line 355) | def generate_batch(
method generate_stream (line 376) | def generate_stream(self, model_input, output_type, **inference_kwargs):
method _generate_output_seq (line 386) | def _generate_output_seq(self, prompts, inputs, **inference_kwargs):
method _decode_generation (line 402) | def _decode_generation(self, generated_ids: "torch.Tensor"):
class TransformersMultiModalTypeAdapter (line 419) | class TransformersMultiModalTypeAdapter(ModelTypeAdapter):
method __init__ (line 422) | def __init__(self, **kwargs):
method format_input (line 426) | def format_input(self, model_input):
method format_chat_input (line 448) | def format_chat_input(self, model_input: Chat) -> dict:
method _prepare_message (line 469) | def _prepare_message(self, role: str, content: str | list) -> tuple[di...
method _extract_assets_from_content (line 493) | def _extract_assets_from_content(self, content: list) -> list:
method _format_asset_for_template (line 535) | def _format_asset_for_template(self, asset: Image | Video | Audio) -> ...
method format_list_input (line 550) | def format_list_input(self, model_input: list) -> dict:
method format_output_type (line 583) | def format_output_type(
class TransformersMultiModal (line 607) | class TransformersMultiModal(Transformers):
method __init__ (line 617) | def __init__(
method _prepare_model_inputs (line 654) | def _prepare_model_inputs(
function from_transformers (line 690) | def from_transformers(
FILE: outlines/models/utils.py
function set_additional_properties_false_json_schema (line 4) | def set_additional_properties_false_json_schema(schema: dict) -> dict:
FILE: outlines/models/vllm.py
class VLLMTypeAdapter (line 17) | class VLLMTypeAdapter(ModelTypeAdapter):
method format_input (line 20) | def format_input(self, model_input: Union[Chat, str, list]) -> list:
method format_output_type (line 39) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
class VLLM (line 68) | class VLLM(Model):
method __init__ (line 77) | def __init__(
method generate (line 93) | def generate(
method generate_batch (line 139) | def generate_batch(
method generate_stream (line 147) | def generate_stream(
method _build_client_args (line 184) | def _build_client_args(
class AsyncVLLM (line 209) | class AsyncVLLM(AsyncModel):
method __init__ (line 218) | def __init__(
method generate (line 234) | async def generate(
method generate_batch (line 278) | async def generate_batch(
method generate_stream (line 286) | async def generate_stream( # type: ignore
method _build_client_args (line 323) | def _build_client_args(
function from_vllm (line 348) | def from_vllm(
FILE: outlines/models/vllm_offline.py
class VLLMOfflineTypeAdapter (line 19) | class VLLMOfflineTypeAdapter(ModelTypeAdapter):
method __init__ (line 22) | def __init__(self, has_chat_template: bool = False):
method format_input (line 26) | def format_input(self, model_input):
method format_input_str (line 42) | def format_input_str(self, model_input: str) -> str | list:
method format_input_chat (line 51) | def format_input_chat(self, model_input: Chat) -> list:
method format_output_type (line 64) | def format_output_type(self, output_type: Optional[Any] = None) -> dict:
class VLLMOffline (line 99) | class VLLMOffline(Model):
method __init__ (line 107) | def __init__(self, model: "LLM"):
method _build_generation_args (line 120) | def _build_generation_args(
method generate (line 142) | def generate(
method generate_batch (line 193) | def generate_batch(
method generate_stream (line 240) | def generate_stream(self, model_input, output_type, **inference_kwargs):
method _check_chat_template (line 250) | def _check_chat_template(self) -> bool:
function from_vllm_offline (line 273) | def from_vllm_offline(model: "LLM") -> VLLMOffline:
FILE: outlines/processors/base_logits_processor.py
class OutlinesLogitsProcessor (line 14) | class OutlinesLogitsProcessor:
method __init__ (line 25) | def __init__(self, tensor_library_name: str):
method reset (line 48) | def reset(self):
method process_logits (line 58) | def process_logits(
method __call__ (line 85) | def __call__(
FILE: outlines/processors/tensor_adapters/base.py
class TensorAdapter (line 12) | class TensorAdapter(ABC):
method shape (line 29) | def shape(self, tensor: TensorType) -> list[int]:
method unsqueeze (line 47) | def unsqueeze(self, tensor: TensorType) -> TensorType:
method squeeze (line 64) | def squeeze(self, tensor: TensorType) -> TensorType:
method to_list (line 81) | def to_list(self, tensor: TensorType) -> list:
method to_scalar (line 98) | def to_scalar(self, tensor: TensorType) -> Any:
method full_like (line 115) | def full_like(self, tensor: "torch.Tensor", fill_value: Any) -> Tensor...
method concatenate (line 139) | def concatenate(
method get_device (line 161) | def get_device(self, tensor: TensorType) -> str:
method to_device (line 178) | def to_device(self, tensor: TensorType, device: str) -> TensorType:
method boolean_ones_like (line 197) | def boolean_ones_like(self, tensor: TensorType) -> TensorType:
method apply_mask (line 215) | def apply_mask(
method argsort_descending (line 239) | def argsort_descending(
FILE: outlines/processors/tensor_adapters/mlx.py
class MLXTensorAdapter (line 6) | class MLXTensorAdapter(TensorAdapter):
method __init__ (line 9) | def __init__(self):
method shape (line 14) | def shape(self, tensor):
method unsqueeze (line 17) | def unsqueeze(self, tensor):
method squeeze (line 20) | def squeeze(self, tensor):
method to_list (line 25) | def to_list(self, tensor):
method to_scalar (line 28) | def to_scalar(self, tensor):
method full_like (line 31) | def full_like(self, tensor, fill_value):
method concatenate (line 35) | def concatenate(self, tensors):
method get_device (line 45) | def get_device(self, tensor):
method to_device (line 48) | def to_device(self, tensor, device):
method boolean_ones_like (line 51) | def boolean_ones_like(self, tensor):
method apply_mask (line 54) | def apply_mask(self, tensor, mask, value):
method argsort_descending (line 59) | def argsort_descending(self, tensor):
FILE: outlines/processors/tensor_adapters/numpy.py
class NumpyTensorAdapter (line 6) | class NumpyTensorAdapter(TensorAdapter):
method __init__ (line 9) | def __init__(self):
method shape (line 14) | def shape(self, tensor):
method unsqueeze (line 17) | def unsqueeze(self, tensor):
method squeeze (line 20) | def squeeze(self, tensor):
method to_list (line 23) | def to_list(self, tensor):
method to_scalar (line 26) | def to_scalar(self, tensor):
method full_like (line 29) | def full_like(self, tensor, fill_value):
method concatenate (line 32) | def concatenate(self, tensors):
method get_device (line 35) | def get_device(self, tensor):
method to_device (line 38) | def to_device(self, tensor, device):
method boolean_ones_like (line 41) | def boolean_ones_like(self, tensor):
method apply_mask (line 44) | def apply_mask(self, tensor, mask, value):
method argsort_descending (line 49) | def argsort_descending(self, tensor):
FILE: outlines/processors/tensor_adapters/torch.py
class TorchTensorAdapter (line 6) | class TorchTensorAdapter(TensorAdapter):
method __init__ (line 9) | def __init__(self):
method shape (line 14) | def shape(self, tensor):
method unsqueeze (line 17) | def unsqueeze(self, tensor):
method squeeze (line 20) | def squeeze(self, tensor):
method to_list (line 23) | def to_list(self, tensor):
method to_scalar (line 26) | def to_scalar(self, tensor):
method full_like (line 29) | def full_like(self, tensor, fill_value):
method concatenate (line 32) | def concatenate(self, tensors):
method get_device (line 35) | def get_device(self, tensor):
method to_device (line 38) | def to_device(self, tensor, device):
method boolean_ones_like (line 41) | def boolean_ones_like(self, tensor):
method apply_mask (line 44) | def apply_mask(self, tensor, mask, value):
method argsort_descending (line 47) | def argsort_descending(self, tensor):
FILE: outlines/templates.py
function Vision (line 21) | def Vision(prompt: str, image: PILImage.Image) -> list:
class Template (line 59) | class Template:
method __call__ (line 68) | def __call__(self, *args, **kwargs) -> str:
method from_string (line 80) | def from_string(cls, content: str, filters: Dict[str, Callable] = {}):
method from_file (line 98) | def from_file(cls, path: Path, filters: Dict[str, Callable] = {}):
function build_template_from_string (line 125) | def build_template_from_string(
function build_template_from_file (line 147) | def build_template_from_file(
function create_jinja_env (line 156) | def create_jinja_env(
function get_fn_name (line 203) | def get_fn_name(fn: Callable):
function get_fn_args (line 216) | def get_fn_args(fn: Callable):
function get_fn_description (line 228) | def get_fn_description(fn: Callable):
function get_fn_source (line 242) | def get_fn_source(fn: Callable):
function get_fn_signature (line 257) | def get_fn_signature(fn: Callable):
function get_schema (line 273) | def get_schema(model: Any):
function get_schema_dict (line 280) | def get_schema_dict(model: Dict):
function get_schema_pydantic (line 286) | def get_schema_pydantic(model: Type[BaseModel]):
function parse_pydantic_schema (line 301) | def parse_pydantic_schema(raw_schema, definitions):
FILE: outlines/types/__init__.py
class AirportImportError (line 26) | class AirportImportError:
method __getattr__ (line 28) | def __getattr__(self, name):
class CountryImportError (line 39) | class CountryImportError:
method __getattr__ (line 41) | def __getattr__(self, name):
FILE: outlines/types/countries.py
function get_country_flags (line 8) | def get_country_flags():
FILE: outlines/types/dsl.py
class Term (line 82) | class Term:
method __add__ (line 104) | def __add__(self: "Term", other: "Term") -> "Sequence":
method __radd__ (line 110) | def __radd__(self: "Term", other: "Term") -> "Sequence":
method __or__ (line 116) | def __or__(self: "Term", other: "Term") -> "Alternatives":
method __ror__ (line 122) | def __ror__(self: "Term", other: "Term") -> "Alternatives":
method __get_validator__ (line 128) | def __get_validator__(self, _core_schema):
method __get_pydantic_core_schema__ (line 134) | def __get_pydantic_core_schema__(
method __get_pydantic_json_schema__ (line 139) | def __get_pydantic_json_schema__(
method validate (line 144) | def validate(self, value: str) -> str:
method matches (line 153) | def matches(self, value: str) -> bool:
method display_ascii_tree (line 166) | def display_ascii_tree(self, indent="", is_last=True) -> str:
method _display_node (line 178) | def _display_node(self):
method _display_children (line 181) | def _display_children(self, indent: str) -> str:
method __str__ (line 185) | def __str__(self):
method optional (line 188) | def optional(self) -> "Optional":
method exactly (line 191) | def exactly(self, count: int) -> "QuantifyExact":
method at_least (line 194) | def at_least(self, count: int) -> "QuantifyMinimum":
method at_most (line 197) | def at_most(self, count: int) -> "QuantifyMaximum":
method between (line 200) | def between(self, min_count: int, max_count: int) -> "QuantifyBetween":
method one_or_more (line 203) | def one_or_more(self) -> "KleenePlus":
method zero_or_more (line 206) | def zero_or_more(self) -> "KleeneStar":
class String (line 211) | class String(Term):
method _display_node (line 214) | def _display_node(self) -> str:
method __repr__ (line 217) | def __repr__(self):
class Regex (line 222) | class Regex(Term):
method _display_node (line 233) | def _display_node(self) -> str:
method __repr__ (line 236) | def __repr__(self):
class CFG (line 241) | class CFG(Term):
method _display_node (line 252) | def _display_node(self) -> str:
method __repr__ (line 255) | def __repr__(self):
method __eq__ (line 258) | def __eq__(self, other):
method from_file (line 264) | def from_file(cls, path: str) -> "CFG":
class JsonSchema (line 282) | class JsonSchema(Term):
method __init__ (line 293) | def __init__(
method is_json_schema (line 339) | def is_json_schema(cls, obj: Any) -> bool:
method convert_to (line 362) | def convert_to(
method _display_node (line 441) | def _display_node(self) -> str:
method __repr__ (line 444) | def __repr__(self):
method __eq__ (line 447) | def __eq__(self, other):
method from_file (line 458) | def from_file(cls, path: str) -> "JsonSchema":
class Choice (line 478) | class Choice(Term):
method _display_node (line 489) | def _display_node(self) -> str:
method __repr__ (line 492) | def __repr__(self):
class KleeneStar (line 497) | class KleeneStar(Term):
method _display_node (line 500) | def _display_node(self) -> str:
method _display_children (line 503) | def _display_children(self, indent: str) -> str:
method __repr__ (line 506) | def __repr__(self):
class KleenePlus (line 511) | class KleenePlus(Term):
method _display_node (line 514) | def _display_node(self) -> str:
method _display_children (line 517) | def _display_children(self, indent: str) -> str:
method __repr__ (line 520) | def __repr__(self):
class Optional (line 525) | class Optional(Term):
method _display_node (line 528) | def _display_node(self) -> str:
method _display_children (line 531) | def _display_children(self, indent: str) -> str:
method __repr__ (line 534) | def __repr__(self):
class Alternatives (line 539) | class Alternatives(Term):
method _display_node (line 542) | def _display_node(self) -> str:
method _display_children (line 545) | def _display_children(self, indent: str) -> str:
method __repr__ (line 551) | def __repr__(self):
class Sequence (line 556) | class Sequence(Term):
method _display_node (line 559) | def _display_node(self) -> str:
method _display_children (line 562) | def _display_children(self, indent: str) -> str:
method __repr__ (line 568) | def __repr__(self):
class QuantifyExact (line 573) | class QuantifyExact(Term):
method _display_node (line 577) | def _display_node(self) -> str:
method _display_children (line 580) | def _display_children(self, indent: str) -> str:
method __repr__ (line 583) | def __repr__(self):
class QuantifyMinimum (line 588) | class QuantifyMinimum(Term):
method _display_node (line 592) | def _display_node(self) -> str:
method _display_children (line 595) | def _display_children(self, indent: str) -> str:
method __repr__ (line 598) | def __repr__(self):
class QuantifyMaximum (line 605) | class QuantifyMaximum(Term):
method _display_node (line 609) | def _display_node(self) -> str:
method _display_children (line 612) | def _display_children(self, indent: str) -> str:
method __repr__ (line 615) | def __repr__(self):
class QuantifyBetween (line 622) | class QuantifyBetween(Term):
method __post_init__ (line 627) | def __post_init__(self):
method _display_node (line 633) | def _display_node(self) -> str:
method _display_children (line 636) | def _display_children(self, indent: str) -> str:
method __repr__ (line 639) | def __repr__(self):
function regex (line 643) | def regex(pattern: str):
function cfg (line 647) | def cfg(definition: str):
function json_schema (line 651) | def json_schema(schema: Union[str, dict, type[BaseModel]]):
function either (line 655) | def either(*terms: Union[str, Term]):
function optional (line 666) | def optional(term: Union[Term, str]) -> Optional:
function exactly (line 671) | def exactly(count: int, term: Union[Term, str]) -> QuantifyExact:
function at_least (line 677) | def at_least(count: int, term: Union[Term, str]) -> QuantifyMinimum:
function at_most (line 683) | def at_most(count: int, term: Union[Term, str]) -> QuantifyMaximum:
function between (line 689) | def between(min_count: int, max_count: int, term: Union[Term, str]) -> Q...
function zero_or_more (line 694) | def zero_or_more(term: Union[Term, str]) -> KleeneStar:
function one_or_more (line 699) | def one_or_more(term: Union[Term, str]) -> KleenePlus:
function python_types_to_terms (line 704) | def python_types_to_terms(ptype: Any, recursion_depth: int = 0) -> Term:
function _get_enum_members (line 798) | def _get_enum_members(ptype: EnumMeta) -> List[Any]:
function _handle_literal (line 811) | def _handle_literal(args: tuple) -> Alternatives:
function _ensure_json_quoted (line 815) | def _ensure_json_quoted(term: Term) -> Term:
function _handle_union (line 831) | def _handle_union(args: tuple, recursion_depth: int) -> Alternatives:
function _handle_list (line 846) | def _handle_list(args: tuple, recursion_depth: int) -> Sequence:
function _handle_tuple (line 863) | def _handle_tuple(args: tuple, recursion_depth: int) -> Union[Sequence, ...
function _handle_dict (line 887) | def _handle_dict(args: tuple, recursion_depth: int) -> Sequence:
function to_regex (line 913) | def to_regex(term: Term) -> str:
FILE: outlines/types/json_schema_utils.py
function schema_type_to_python (line 15) | def schema_type_to_python(
function json_schema_dict_to_typeddict (line 67) | def json_schema_dict_to_typeddict(
function json_schema_dict_to_pydantic (line 100) | def json_schema_dict_to_pydantic(
function json_schema_dict_to_dataclass (line 134) | def json_schema_dict_to_dataclass(
FILE: outlines/types/utils.py
function is_int (line 35) | def is_int(value: Any) -> bool:
function is_int_instance (line 44) | def is_int_instance(value: Any) -> bool:
function is_float (line 48) | def is_float(value: Any) -> bool:
function is_float_instance (line 57) | def is_float_instance(value: Any) -> bool:
function is_str (line 61) | def is_str(value: Any) -> bool:
function is_str_instance (line 70) | def is_str_instance(value: Any) -> bool:
function is_bool (line 74) | def is_bool(value: Any) -> bool:
function is_dict_instance (line 83) | def is_dict_instance(value: Any) -> bool:
function is_datetime (line 87) | def is_datetime(value: Any) -> bool:
function is_date (line 91) | def is_date(value: Any) -> bool:
function is_time (line 95) | def is_time(value: Any) -> bool:
function is_native_dict (line 99) | def is_native_dict(value: Any) -> bool:
function is_typing_dict (line 103) | def is_typing_dict(value: Any) -> bool:
function is_typing_list (line 107) | def is_typing_list(value: Any) -> bool:
function is_typing_tuple (line 111) | def is_typing_tuple(value: Any) -> bool:
function is_union (line 115) | def is_union(value: Any) -> bool:
function is_literal (line 119) | def is_literal(value: Any) -> bool:
function is_dataclass (line 123) | def is_dataclass(value: Any) -> bool:
function is_typed_dict (line 127) | def is_typed_dict(value: Any) -> bool:
function is_pydantic_model (line 131) | def is_pydantic_model(value):
function is_genson_schema_builder (line 139) | def is_genson_schema_builder(value: Any) -> bool:
function is_enum (line 143) | def is_enum(value: Any) -> bool:
function is_callable (line 147) | def is_callable(value: Any) -> bool:
function get_enum_from_literal (line 154) | def get_enum_from_literal(value) -> Enum:
function get_enum_from_choice (line 161) | def get_enum_from_choice(value) -> Enum:
function get_schema_from_signature (line 168) | def get_schema_from_signature(fn: Callable) -> dict:
function get_schema_from_enum (line 196) | def get_schema_from_enum(myenum: type[Enum]) -> dict:
FILE: tests/backends/test_backends.py
function model (line 23) | def model():
function json_schema (line 30) | def json_schema():
function regex (line 38) | def regex():
function cfg_lark (line 42) | def cfg_lark():
function cfg_ebnf (line 65) | def cfg_ebnf():
function test_get_backend (line 72) | def test_get_backend(model):
function test_get_json_schema_logits_processor (line 86) | def test_get_json_schema_logits_processor(model, json_schema):
function test_get_regex_logits_processor (line 97) | def test_get_regex_logits_processor(model, regex):
function test_get_cfg_logits_processor (line 108) | def test_get_cfg_logits_processor(model, cfg_lark, cfg_ebnf):
FILE: tests/backends/test_backends_utils.py
function simulate_model_calling_processor (line 5) | def simulate_model_calling_processor(processor, tensor_library_name, voc...
class TorchTensorAdapter (line 29) | class TorchTensorAdapter():
method randn (line 30) | def randn(self, shape):
method randint (line 33) | def randint(self, low, high, size):
method add_token_inputs_ids (line 36) | def add_token_inputs_ids(self, input_ids, logits):
class NumpyTensorAdapter (line 42) | class NumpyTensorAdapter():
method randn (line 43) | def randn(self, shape):
method randint (line 46) | def randint(self, low, high, size):
method add_token_inputs_ids (line 49) | def add_token_inputs_ids(self, input_ids, logits):
class MLXTensorAdapter (line 56) | class MLXTensorAdapter():
method __init__ (line 57) | def __init__(self):
method randn (line 61) | def randn(self, shape):
method randint (line 64) | def randint(self, low, high, size):
method add_token_inputs_ids (line 67) | def add_token_inputs_ids(self, input_ids, logits):
FILE: tests/backends/test_llguidance.py
function model_transformers (line 23) | def model_transformers():
function model_llamacpp (line 29) | def model_llamacpp():
function model_mlxlm (line 38) | def model_mlxlm():
function json_schema (line 44) | def json_schema():
function regex (line 52) | def regex():
function cfg_lark (line 56) | def cfg_lark():
function cfg_ebnf (line 79) | def cfg_ebnf():
function test_llguidance_processor_torch (line 86) | def test_llguidance_processor_torch(regex):
function test_llguidance_processor_numpy (line 105) | def test_llguidance_processor_numpy(regex):
function test_llguidance_processor_mlx (line 125) | def test_llguidance_processor_mlx(regex):
function test_llguidance_backend (line 151) | def test_llguidance_backend(model, tensor_library_name, json_schema, reg...
FILE: tests/backends/test_outlines_core.py
function model_transformers (line 23) | def model_transformers():
function model_llamacpp (line 30) | def model_llamacpp():
function model_mlxlm (line 40) | def model_mlxlm():
function json_schema (line 45) | def json_schema():
function regex (line 54) | def regex():
function cfg (line 59) | def cfg():
function test_outlines_core_processor_torch (line 82) | def test_outlines_core_processor_torch(regex):
function test_outlines_core_processor_numpy (line 97) | def test_outlines_core_processor_numpy(regex):
function test_outlines_core_processor_mlx (line 112) | def test_outlines_core_processor_mlx():
function test_create_vocabulary_preserves_duplicate_token_ids (line 126) | def test_create_vocabulary_preserves_duplicate_token_ids():
function test_outlines_core_backend (line 159) | def test_outlines_core_backend(model, tensor_library_name, json_schema, ...
FILE: tests/backends/test_xgrammar.py
function model_transformers (line 19) | def model_transformers():
function model_llamacpp (line 25) | def model_llamacpp():
function model_mlxlm (line 34) | def model_mlxlm():
function tokenizer_info (line 40) | def tokenizer_info():
function json_schema (line 49) | def json_schema():
function regex (line 57) | def regex():
function cfg (line 61) | def cfg():
function test_xgr_processor_torch (line 68) | def test_xgr_processor_torch(regex):
function test_xgr_processor_mlx (line 92) | def test_xgr_processor_mlx(tokenizer_info):
function test_xgrammar_backend (line 119) | def test_xgrammar_backend(model, tensor_library_name, json_schema, regex...
function test_xgrammar_backend_invalid_model (line 163) | def test_xgrammar_backend_invalid_model():
FILE: tests/conftest.py
function pytest_collection_modifyitems (line 6) | def pytest_collection_modifyitems(config, items):
FILE: tests/models/test_anthopic_type_adapter.py
function image (line 11) | def image():
function adapter (line 26) | def adapter():
function test_anthropic_type_adapter_input_text (line 30) | def test_anthropic_type_adapter_input_text(adapter):
function test_anthropic_type_adapter_input_vision (line 36) | def test_anthropic_type_adapter_input_vision(adapter, image):
function test_anthropic_type_adapter_input_chat (line 60) | def test_anthropic_type_adapter_input_chat(adapter, image):
function test_anthropic_type_adapter_input_invalid (line 90) | def test_anthropic_type_adapter_input_invalid(adapter):
function test_anthropic_type_adapter_output (line 113) | def test_anthropic_type_adapter_output(adapter):
FILE: tests/models/test_anthropic.py
function model (line 17) | def model():
function model_no_model_name (line 22) | def model_no_model_name():
function image (line 27) | def image():
function test_init_from_client (line 41) | def test_init_from_client():
function test_anthropic_wrong_inference_parameters (line 57) | def test_anthropic_wrong_inference_parameters():
function test_anthropic_wrong_input_type (line 63) | def test_anthropic_wrong_input_type(image):
function test_anthropic_wrong_output_type (line 76) | def test_anthropic_wrong_output_type():
function test_anthropic_simple_call (line 87) | def test_anthropic_simple_call(model):
function test_anthropic_direct_call (line 94) | def test_anthropic_direct_call(model_no_model_name):
function test_anthropic_simple_vision (line 104) | def test_anthropic_simple_vision(model, image):
function test_anthropic_chat (line 116) | def test_anthropic_chat(model, image):
function test_anthopic_streaming (line 128) | def test_anthopic_streaming(model):
function test_anthropic_batch (line 134) | def test_anthropic_batch(model):
FILE: tests/models/test_dottxt.py
class User (line 17) | class User(BaseModel):
function api_key (line 24) | def api_key():
function model_name_and_revision (line 39) | def model_name_and_revision(api_key):
function model (line 46) | def model(api_key, model_name_and_revision):
function model_no_model_name (line 56) | def model_no_model_name(api_key):
function test_dottxt_init_from_client (line 62) | def test_dottxt_init_from_client(api_key, model_name_and_revision):
function test_dottxt_wrong_output_type (line 83) | def test_dottxt_wrong_output_type(model_no_model_name):
function test_dottxt_wrong_input_type (line 88) | def test_dottxt_wrong_input_type(model_no_model_name):
function test_dottxt_wrong_inference_parameters (line 94) | def test_dottxt_wrong_inference_parameters(model_no_model_name):
function test_dottxt_direct_pydantic_call (line 100) | def test_dottxt_direct_pydantic_call(model_no_model_name):
function test_dottxt_direct_jsonschema_call (line 106) | def test_dottxt_direct_jsonschema_call(
function test_dottxt_generator_pydantic_call (line 119) | def test_dottxt_generator_pydantic_call(model):
function test_dottxt_streaming (line 126) | def test_dottxt_streaming(model):
function test_dottxt_batch (line 135) | def test_dottxt_batch(model):
FILE: tests/models/test_dottxt_type_adapter.py
function schema (line 22) | def schema():
function image (line 35) | def image():
function adapter (line 50) | def adapter():
function test_dottxt_type_adapter_input_text (line 54) | def test_dottxt_type_adapter_input_text(adapter):
function test_dottxt_type_adapter_input_invalid (line 60) | def test_dottxt_type_adapter_input_invalid(adapter, image):
function test_dottxt_type_adapter_output_invalid (line 66) | def test_dottxt_type_adapter_output_invalid(adapter):
function test_dottxt_type_adapter_output_dataclass (line 83) | def test_dottxt_type_adapter_output_dataclass(adapter, schema):
function test_dottxt_type_adapter_output_typed_dict (line 93) | def test_dottxt_type_adapter_output_typed_dict(adapter, schema):
function test_dottxt_type_adapter_output_pydantic (line 102) | def test_dottxt_type_adapter_output_pydantic(adapter, schema):
function test_dottxt_type_adapter_output_genson_schema_builder (line 111) | def test_dottxt_type_adapter_output_genson_schema_builder(adapter, schema):
function test_dottxt_type_adapter_json_schema_str (line 129) | def test_dottxt_type_adapter_json_schema_str(adapter, schema):
function test_dottxt_type_adapter_json_schema_dict (line 135) | def test_dottxt_type_adapter_json_schema_dict(adapter, schema):
FILE: tests/models/test_gemini.py
function model (line 27) | def model():
function model_no_model_name (line 32) | def model_no_model_name():
function image (line 37) | def image():
function test_gemini_init_from_client (line 52) | def test_gemini_init_from_client():
function test_gemini_wrong_inference_parameters (line 69) | def test_gemini_wrong_inference_parameters(model):
function test_gemini_wrong_input_type (line 75) | def test_gemini_wrong_input_type(model, image):
function test_gemini_simple_call (line 81) | def test_gemini_simple_call(model):
function test_gemini_direct_call (line 87) | def test_gemini_direct_call(model_no_model_name):
function test_gemini_simple_vision (line 96) | def test_gemini_simple_vision(model, image):
function test_gemini_chat (line 102) | def test_gemini_chat(model, image):
function test_gemini_simple_pydantic (line 114) | def test_gemini_simple_pydantic(model):
function test_gemini_simple_vision_pydantic (line 124) | def test_gemini_simple_vision_pydantic(model, image):
function test_gemini_nested_pydantic (line 134) | def test_gemini_nested_pydantic(model):
function test_gemini_simple_json_schema_string (line 153) | def test_gemini_simple_json_schema_string(model):
function test_gemini_simple_json_schema_dict (line 164) | def test_gemini_simple_json_schema_dict(model):
function test_gemini_simple_typed_dict (line 176) | def test_gemini_simple_typed_dict(model):
function test_gemini_simple_dataclass (line 186) | def test_gemini_simple_dataclass(model):
function test_gemini_simple_choice_enum (line 197) | def test_gemini_simple_choice_enum(model):
function test_gemini_simple_choice_choice (line 208) | def test_gemini_simple_choice_choice(model):
function test_gemini_sample_choice_literal (line 215) | def test_gemini_sample_choice_literal(model):
function test_gemini_simple_choice_list (line 225) | def test_gemini_simple_choice_list(model):
function test_gemini_simple_list_pydantic (line 233) | def test_gemini_simple_list_pydantic(model):
function test_gemini_streaming (line 244) | def test_gemini_streaming(model):
function test_gemini_batch (line 251) | def test_gemini_batch(model):
FILE: tests/models/test_gemini_type_adapter.py
function schema (line 24) | def schema():
function image (line 37) | def image():
function adapter (line 52) | def adapter():
function test_gemini_type_adapter_input_text (line 56) | def test_gemini_type_adapter_input_text(adapter):
function test_gemini_type_adapter_input_vision (line 62) | def test_gemini_type_adapter_input_vision(adapter, image):
function test_gemini_type_adapter_input_chat (line 84) | def test_gemini_type_adapter_input_chat(adapter, image):
function test_gemini_type_adapter_input_invalid (line 113) | def test_gemini_type_adapter_input_invalid(adapter):
function test_gemini_type_adapter_output_invalid (line 125) | def test_gemini_type_adapter_output_invalid(adapter):
function test_gemini_type_adapter_output_none (line 139) | def test_gemini_type_adapter_output_none(adapter):
function test_gemini_type_adapter_output_json_schema (line 144) | def test_gemini_type_adapter_output_json_schema(adapter, schema):
function test_gemini_type_adapter_output_list_json_schema (line 151) | def test_gemini_type_adapter_output_list_json_schema(adapter, schema):
function test_gemini_type_adapter_output_dataclass (line 160) | def test_gemini_type_adapter_output_dataclass(adapter):
function test_gemini_type_adapter_output_list_dataclass (line 173) | def test_gemini_type_adapter_output_list_dataclass(adapter):
function test_gemini_type_adapter_output_typed_dict (line 185) | def test_gemini_type_adapter_output_typed_dict(adapter):
function test_gemini_type_adapter_output_list_typed_dict (line 197) | def test_gemini_type_adapter_output_list_typed_dict(adapter):
function test_gemini_type_adapter_output_pydantic (line 209) | def test_gemini_type_adapter_output_pydantic(adapter):
function test_gemini_type_adapter_output_list_pydantic (line 221) | def test_gemini_type_adapter_output_list_pydantic(adapter):
function test_gemini_type_adapter_output_genson_schema_builder (line 233) | def test_gemini_type_adapter_output_genson_schema_builder(adapter):
function test_gemini_type_adapter_output_list_genson_schema_builder (line 242) | def test_gemini_type_adapter_output_list_genson_schema_builder(adapter):
function test_gemini_type_adapter_output_enum (line 253) | def test_gemini_type_adapter_output_enum(adapter):
function test_gemini_type_adapter_output_literal (line 265) | def test_gemini_type_adapter_output_literal(adapter):
FILE: tests/models/test_llamacpp.py
function test_load_model (line 18) | def test_load_model():
function model (line 35) | def model(tmp_path_factory):
function model_no_chat (line 45) | def model_no_chat(tmp_path_factory):
function lark_grammar (line 55) | def lark_grammar():
function ebnf_grammar (line 78) | def ebnf_grammar():
function test_llamacpp_simple (line 85) | def test_llamacpp_simple(model):
function test_llamacpp_chat (line 90) | def test_llamacpp_chat(model):
function test_llamacpp_regex (line 103) | def test_llamacpp_regex(model):
function test_llamacpp_json (line 110) | def test_llamacpp_json(model):
function test_llamacpp_choice (line 119) | def test_llamacpp_choice(model):
function test_llamacpp_cfg (line 128) | def test_llamacpp_cfg(model, ebnf_grammar):
function test_llamacpp_cfg_outlines_core (line 133) | def test_llamacpp_cfg_outlines_core(model, lark_grammar):
function test_llamacpp_text_stop (line 145) | def test_llamacpp_text_stop(model):
function test_llamacpp_stream_simple (line 150) | def test_llamacpp_stream_simple(model):
function test_llamacpp_stream_chat (line 157) | def test_llamacpp_stream_chat(model):
function test_llamacpp_stream_regex (line 171) | def test_llamacpp_stream_regex(model):
function test_llamacpp_stream_json (line 178) | def test_llamacpp_stream_json(model):
function test_llamacpp_stream_cfg (line 190) | def test_llamacpp_stream_cfg(model, ebnf_grammar):
function test_llamacpp_stream_cfg_outlines_core (line 199) | def test_llamacpp_stream_cfg_outlines_core(model, lark_grammar):
function test_llamacpp_stream_choice (line 212) | def test_llamacpp_stream_choice(model):
function test_llamacpp_stream_text_stop (line 223) | def test_llamacpp_stream_text_stop(model):
function test_llamacpp_batch (line 231) | def test_llamacpp_batch(model):
function test_llamacpp_no_chat (line 237) | def test_llamacpp_no_chat(model_no_chat):
FILE: tests/models/test_llamacpp_tokenizer.py
function model (line 14) | def model():
function model_no_hf_tokenizer (line 29) | def model_no_hf_tokenizer():
function different_model (line 40) | def different_model():
function tokenizer (line 49) | def tokenizer(model):
function another_tokenizer (line 54) | def another_tokenizer(model):
function tokenizer_no_hf_tokenizer (line 59) | def tokenizer_no_hf_tokenizer(model_no_hf_tokenizer):
function different_tokenizer (line 64) | def different_tokenizer(different_model):
function test_llama_cpp_tokenizer_init (line 68) | def test_llama_cpp_tokenizer_init(tokenizer, tokenizer_no_hf_tokenizer):
function test_llama_cpp_tokenizer_encode (line 80) | def test_llama_cpp_tokenizer_encode(tokenizer):
function test_llama_cpp_tokenizer_decode (line 92) | def test_llama_cpp_tokenizer_decode(tokenizer):
function test_llama_cpp_tokenizer_convert_token_to_string (line 99) | def test_llama_cpp_tokenizer_convert_token_to_string(
function test_llama_cpp_tokenizer_eq (line 112) | def test_llama_cpp_tokenizer_eq(tokenizer, another_tokenizer, different_...
function test_llama_cpp_tokenizer_hash (line 118) | def test_llama_cpp_tokenizer_hash(tokenizer, another_tokenizer, differen...
function test_llama_cpp_tokenizer_getstate (line 124) | def test_llama_cpp_tokenizer_getstate(tokenizer):
function test_llama_cpp_tokenizer_setstate (line 135) | def test_llama_cpp_tokenizer_setstate(tokenizer):
function _make_mock_model (line 140) | def _make_mock_model(n_vocab, eos_id, pieces):
function test_vocab_truncation_retry_path (line 161) | def test_vocab_truncation_retry_path():
function test_attention_mask_all_ones_even_with_eos (line 207) | def test_attention_mask_all_ones_even_with_eos():
function test_negative_n_skips_invalid_token (line 243) | def test_negative_n_skips_invalid_token():
FILE: tests/models/test_llamacpp_type_adapter.py
function adapter (line 14) | def adapter():
function logits_processor (line 19) | def logits_processor():
function image (line 26) | def image():
function test_llamacpp_type_adapter_format_input (line 40) | def test_llamacpp_type_adapter_format_input(adapter, image):
function test_llamacpp_type_adapter_format_input_with_chat_template (line 65) | def test_llamacpp_type_adapter_format_input_with_chat_template():
function test_llamacpp_type_adapter_format_input_without_chat_template (line 73) | def test_llamacpp_type_adapter_format_input_without_chat_template():
function test_llamacpp_type_adapter_format_output_type (line 81) | def test_llamacpp_type_adapter_format_output_type(adapter, logits_proces...
FILE: tests/models/test_lmstudio.py
class Foo (line 51) | class Foo(BaseModel):
function model (line 109) | def model():
function model_no_model_name (line 114) | def model_no_model_name():
function async_model (line 119) | def async_model():
function async_model_no_model_name (line 128) | def async_model_no_model_name():
function test_lmstudio_init_from_client (line 136) | def test_lmstudio_init_from_client():
function test_lmstudio_simple (line 169) | def test_lmstudio_simple(model):
function test_lmstudio_direct (line 174) | def test_lmstudio_direct(model_no_model_name):
function test_lmstudio_simple_vision (line 184) | def test_lmstudio_simple_vision(model):
function test_lmstudio_chat (line 193) | def test_lmstudio_chat(model):
function test_lmstudio_json (line 209) | def test_lmstudio_json(model):
function test_lmstudio_wrong_output_type (line 215) | def test_lmstudio_wrong_output_type(model):
function test_lmstudio_wrong_input_type (line 224) | def test_lmstudio_wrong_input_type(model):
function test_lmstudio_stream (line 232) | def test_lmstudio_stream(model):
function test_lmstudio_stream_json (line 238) | def test_lmstudio_stream_json(model_no_model_name):
function test_lmstudio_batch (line 246) | def test_lmstudio_batch(model):
function test_lmstudio_async_init_from_client (line 251) | def test_lmstudio_async_init_from_client():
function test_lmstudio_async_simple (line 281) | async def test_lmstudio_async_simple(async_model):
function test_lmstudio_async_direct (line 287) | async def test_lmstudio_async_direct(async_model_no_model_name):
function test_lmstudio_async_simple_vision (line 298) | async def test_lmstudio_async_simple_vision(async_model):
function test_lmstudio_async_chat (line 308) | async def test_lmstudio_async_chat(async_model):
function test_lmstudio_async_json (line 325) | async def test_lmstudio_async_json(async_model):
function test_lmstudio_async_wrong_output_type (line 332) | async def test_lmstudio_async_wrong_output_type(async_model):
function test_lmstudio_async_wrong_input_type (line 342) | async def test_lmstudio_async_wrong_input_type(async_model):
function test_lmstudio_async_stream (line 351) | async def test_lmstudio_async_stream(async_model):
function test_lmstudio_async_stream_json (line 358) | async def test_lmstudio_async_stream_json(async_model_no_model_name):
function test_lmstudio_async_batch (line 367) | async def test_lmstudio_async_batch(async_model):
FILE: tests/models/test_lmstudio_type_adapter.py
function schema (line 33) | def schema():
function adapter (line 46) | def adapter():
function image (line 51) | def image():
function test_lmstudio_type_adapter_input_text (line 65) | def test_lmstudio_type_adapter_input_text(adapter):
function test_lmstudio_type_adapter_input_vision (line 73) | def test_lmstudio_type_adapter_input_vision(adapter, image):
function test_lmstudio_type_adapter_input_chat (line 82) | def test_lmstudio_type_adapter_input_chat(adapter):
function test_lmstudio_type_adapter_input_chat_no_system (line 96) | def test_lmstudio_type_adapter_input_chat_no_system(adapter):
function test_lmstudio_type_adapter_input_chat_with_image (line 108) | def test_lmstudio_type_adapter_input_chat_with_image(adapter, image):
function test_lmstudio_type_adapter_input_invalid (line 124) | def test_lmstudio_type_adapter_input_invalid(adapter):
function test_lmstudio_type_adapter_input_chat_invalid_content (line 130) | def test_lmstudio_type_adapter_input_chat_invalid_content(adapter):
function test_lmstudio_type_adapter_input_chat_invalid_role (line 138) | def test_lmstudio_type_adapter_input_chat_invalid_role(adapter):
function test_lmstudio_type_adapter_output_none (line 146) | def test_lmstudio_type_adapter_output_none(adapter):
function test_lmstudio_type_adapter_output_invalid (line 151) | def test_lmstudio_type_adapter_output_invalid(adapter):
function test_lmstudio_type_adapter_output_dataclass (line 165) | def test_lmstudio_type_adapter_output_dataclass(adapter, schema):
function test_lmstudio_type_adapter_output_typed_dict (line 175) | def test_lmstudio_type_adapter_output_typed_dict(adapter, schema):
function test_lmstudio_type_adapter_output_pydantic (line 184) | def test_lmstudio_type_adapter_output_pydantic(adapter, schema):
function test_lmstudio_type_adapter_output_genson_schema_builder (line 193) | def test_lmstudio_type_adapter_output_genson_schema_builder(adapter):
function test_lmstudio_type_adapter_json_schema_str (line 208) | def test_lmstudio_type_adapter_json_schema_str(adapter, schema):
function test_lmstudio_type_adapter_json_schema_dict (line 214) | def test_lmstudio_type_adapter_json_schema_dict(adapter, schema):
FILE: tests/models/test_mistral.py
function api_key (line 22) | def api_key():
function image (line 36) | def image():
function model (line 50) | def model(api_key):
function vision_model (line 55) | def vision_model(api_key):
function async_model (line 60) | def async_model(api_key):
function async_vision_model (line 65) | def async_vision_model(api_key):
function model_no_model_name (line 70) | def model_no_model_name(api_key):
function async_model_no_model_name (line 75) | def async_model_no_model_name(api_key):
function test_mistral_init_from_client (line 79) | def test_mistral_init_from_client(api_key):
function test_mistral_wrong_inference_parameters (line 95) | def test_mistral_wrong_inference_parameters(model):
function test_mistral_wrong_input_type (line 100) | def test_mistral_wrong_input_type(model):
function test_mistral_wrong_output_type (line 105) | def test_mistral_wrong_output_type(model):
function test_mistral_call (line 114) | def test_mistral_call(model):
function test_mistral_call_model_name (line 120) | def test_mistral_call_model_name(model_no_model_name):
function test_mistral_multiple_samples (line 129) | def test_mistral_multiple_samples(model):
function test_mistral_vision (line 138) | def test_mistral_vision(image, vision_model):
function test_mistral_chat (line 144) | def test_mistral_chat(image, vision_model):
function test_mistral_pydantic (line 156) | def test_mistral_pydantic(model):
function test_mistral_pydantic_refusal (line 166) | def test_mistral_pydantic_refusal(model):
function test_mistral_vision_pydantic (line 175) | def test_mistral_vision_pydantic(vision_model, image):
function test_mistral_json_schema (line 185) | def test_mistral_json_schema(model):
function test_mistral_streaming (line 197) | def test_mistral_streaming(model):
function test_mistral_batch (line 203) | def test_mistral_batch(model):
function test_mistral_async_init_from_client (line 210) | def test_mistral_async_init_from_client(api_key):
function test_mistral_async_wrong_inference_parameters (line 227) | async def test_mistral_async_wrong_inference_parameters(async_model):
function test_mistral_async_wrong_input_type (line 233) | async def test_mistral_async_wrong_input_type(async_model):
function test_mistral_async_wrong_output_type (line 239) | async def test_mistral_async_wrong_output_type(async_model):
function test_mistral_async_call (line 249) | async def test_mistral_async_call(async_model):
function test_mistral_async_call_model_name (line 256) | async def test_mistral_async_call_model_name(async_model_no_model_name):
function test_mistral_async_multiple_samples (line 266) | async def test_mistral_async_multiple_samples(async_model):
function test_mistral_async_vision (line 276) | async def test_mistral_async_vision(async_vision_model, image):
function test_mistral_async_chat (line 283) | async def test_mistral_async_chat(async_vision_model, image):
function test_mistral_async_pydantic (line 296) | async def test_mistral_async_pydantic(async_model):
function test_mistral_async_pydantic_refusal (line 307) | async def test_mistral_async_pydantic_refusal(async_model):
function test_mistral_async_vision_pydantic (line 317) | async def test_mistral_async_vision_pydantic(async_vision_model, image):
function test_mistral_async_json_schema (line 328) | async def test_mistral_async_json_schema(async_model):
function test_mistral_async_streaming (line 341) | async def test_mistral_async_streaming(async_model):
function test_mistral_async_batch (line 350) | async def test_mistral_async_batch(async_model):
FILE: tests/models/test_mistral_type_adapter.py
function schema (line 28) | def schema():
function image (line 41) | def image():
function adapter (line 55) | def adapter():
function test_mistral_type_adapter_input_text (line 59) | def test_mistral_type_adapter_input_text(adapter):
function test_mistral_type_adapter_input_list (line 67) | def test_mistral_type_adapter_input_list(adapter, image):
function test_mistral_type_adapter_input_chat (line 79) | def test_mistral_type_adapter_input_chat(adapter, image):
function test_mistral_type_adapter_input_invalid (line 98) | def test_mistral_type_adapter_input_invalid(adapter, image):
function test_mistral_type_adapter_output_none (line 131) | def test_mistral_type_adapter_output_none(adapter):
function test_mistral_type_adapter_output_json_mode (line 136) | def test_mistral_type_adapter_output_json_mode(adapter):
function test_mistral_type_adapter_dataclass (line 141) | def test_mistral_type_adapter_dataclass(adapter, schema):
function test_mistral_type_adapter_typed_dict (line 153) | def test_mistral_type_adapter_typed_dict(adapter, schema):
function test_mistral_type_adapter_pydantic (line 164) | def test_mistral_type_adapter_pydantic(adapter, schema):
function test_mistral_type_adapter_genson_schema_builder (line 175) | def test_mistral_type_adapter_genson_schema_builder(adapter, schema):
function test_mistral_type_adapter_json_schema_str (line 194) | def test_mistral_type_adapter_json_schema_str(adapter, schema):
function test_mistral_type_adapter_output_unsupported (line 202) | def test_mistral_type_adapter_output_unsupported(adapter):
FILE: tests/models/test_mlxlm.py
function test_mlxlm_model_initialization (line 29) | def test_mlxlm_model_initialization():
function model (line 42) | def model(tmp_path_factory):
function test_mlxlm_tokenizer (line 48) | def test_mlxlm_tokenizer(model):
function test_mlxlm_simple (line 57) | def test_mlxlm_simple(model):
function test_mlxlm_call (line 63) | def test_mlxlm_call(model):
function test_mlxlm_invalid_input_type (line 69) | def test_mlxlm_invalid_input_type(model):
function test_mlxlm_invalid_inference_kwargs (line 75) | def test_mlxlm_invalid_inference_kwargs(model):
function test_mlxlm_inference_kwargs (line 81) | def test_mlxlm_inference_kwargs(model):
function test_mlxlm_regex (line 88) | def test_mlxlm_regex(model):
function test_mlxlm_json_schema (line 95) | def test_mlxlm_json_schema(model):
function test_mlxlm_choice (line 104) | def test_mlxlm_choice(model):
function test_mlxlm_stream_text_stop (line 114) | def test_mlxlm_stream_text_stop(model):
function test_mlxlm_batch (line 123) | def test_mlxlm_batch(model):
function test_mlxlm_batch_output_type (line 134) | def test_mlxlm_batch_output_type(model):
FILE: tests/models/test_mlxlm_type_adapter.py
function adapter (line 25) | def adapter():
function logits_processor (line 31) | def logits_processor():
function image (line 38) | def image():
function test_mlxlm_type_adapter_format_input_with_template (line 50) | def test_mlxlm_type_adapter_format_input_with_template():
function test_mlxlm_type_adapter_format_input_without_template (line 67) | def test_mlxlm_type_adapter_format_input_without_template():
function test_mlxlm_type_adapter_format_input (line 79) | def test_mlxlm_type_adapter_format_input(adapter, image):
function test_mlxlm_type_adapter_format_output_type (line 109) | def test_mlxlm_type_adapter_format_output_type(adapter, logits_processor):
FILE: tests/models/test_ollama.py
function model (line 20) | def model():
function model_no_model_name (line 25) | def model_no_model_name():
function async_model (line 30) | def async_model():
function async_model_no_model_name (line 35) | def async_model_no_model_name():
function image (line 40) | def image():
function test_ollama_init_from_client (line 54) | def test_ollama_init_from_client():
function test_ollama_wrong_inference_parameters (line 74) | def test_ollama_wrong_inference_parameters(model):
function test_ollama_simple (line 81) | def test_ollama_simple(model):
function test_ollama_direct (line 88) | def test_ollama_direct(model_no_model_name):
function test_ollama_simple_vision (line 97) | def test_ollama_simple_vision(image, model):
function test_ollama_chat (line 107) | def test_ollama_chat(image, model):
function test_ollama_json (line 123) | def test_ollama_json(model):
function test_ollama_wrong_output_type (line 132) | def test_ollama_wrong_output_type(model):
function test_ollama_wrong_input_type (line 141) | def test_ollama_wrong_input_type(model, image):
function test_ollama_stream (line 149) | def test_ollama_stream(model):
function test_ollama_stream_json (line 154) | def test_ollama_stream_json(model_no_model_name):
function test_ollama_batch (line 165) | def test_ollama_batch(model):
function test_ollama_async_init_from_client (line 172) | def test_ollama_async_init_from_client():
function test_ollama_async_wrong_inference_parameters (line 189) | async def test_ollama_async_wrong_inference_parameters(async_model):
function test_ollama_async_simple (line 197) | async def test_ollama_async_simple(async_model):
function test_ollama_async_direct (line 205) | async def test_ollama_async_direct(async_model_no_model_name):
function test_ollama_async_simple_vision (line 215) | async def test_ollama_async_simple_vision(image, async_model):
function test_ollama_async_chat (line 226) | async def test_ollama_async_chat(image, async_model):
function test_ollama_async_json (line 243) | async def test_ollama_async_json(async_model):
function test_ollama_async_wrong_output_type (line 253) | async def test_ollama_async_wrong_output_type(async_model):
function test_ollama_async_wrong_input_type (line 263) | async def test_ollama_async_wrong_input_type(async_model):
function test_ollama_async_stream (line 269) | async def test_ollama_async_stream(async_model):
function test_ollama_async_stream_json (line 275) | async def test_ollama_async_stream_json(async_model_no_model_name):
function test_ollama_async_batch (line 287) | async def test_ollama_async_batch(async_model):
FILE: tests/models/test_ollama_type_adapter.py
function schema (line 22) | def schema():
function image (line 35) | def image():
function adapter (line 50) | def adapter():
function test_ollama_type_adapter_input_text (line 54) | def test_ollama_type_adapter_input_text(adapter):
function test_ollama_type_adapter_input_vision (line 62) | def test_ollama_type_adapter_input_vision(adapter, image):
function test_ollama_type_adapter_input_chat (line 75) | def test_ollama_type_adapter_input_chat(adapter, image):
function test_ollama_type_adapter_input_invalid (line 93) | def test_ollama_type_adapter_input_invalid(adapter):
function test_ollama_type_adapter_output_invalid (line 105) | def test_ollama_type_adapter_output_invalid(adapter):
function test_ollama_type_adapter_output_dataclass (line 119) | def test_ollama_type_adapter_output_dataclass(adapter, schema):
function test_ollama_type_adapter_output_typed_dict (line 129) | def test_ollama_type_adapter_output_typed_dict(adapter, schema):
function test_ollama_type_adapter_output_pydantic (line 138) | def test_ollama_type_adapter_output_pydantic(adapter, schema):
function test_ollama_type_adapter_output_genson_schema_builder (line 147) | def test_ollama_type_adapter_output_genson_schema_builder(adapter):
function test_ollama_type_adapter_json_schema_str (line 162) | def test_ollama_type_adapter_json_schema_str(adapter, schema):
function test_ollama_type_adapter_json_schema_dict (line 168) | def test_ollama_type_adapter_json_schema_dict(adapter, schema):
FILE: tests/models/test_openai.py
function api_key (line 20) | def api_key():
function image (line 34) | def image():
function model (line 49) | def model(api_key):
function async_model (line 54) | def async_model(api_key):
function model_no_model_name (line 59) | def model_no_model_name(api_key):
function async_model_no_model_name (line 64) | def async_model_no_model_name(api_key):
function test_openai_init_from_client (line 68) | def test_openai_init_from_client(api_key):
function test_openai_wrong_inference_parameters (line 84) | def test_openai_wrong_inference_parameters(model):
function test_openai_wrong_input_type (line 89) | def test_openai_wrong_input_type(model, image):
function test_openai_wrong_output_type (line 101) | def test_openai_wrong_output_type(model):
function test_openai_simple_call (line 111) | def test_openai_simple_call(model):
function test_openai_simple_call_multiple_samples (line 117) | def test_openai_simple_call_multiple_samples(model):
function test_openai_direct_call (line 126) | def test_openai_direct_call(model_no_model_name):
function test_openai_simple_vision (line 135) | def test_openai_simple_vision(image, model):
function test_openai_chat (line 141) | def test_openai_chat(image, model):
function test_openai_simple_pydantic (line 153) | def test_openai_simple_pydantic(model):
function test_openai_simple_pydantic_refusal (line 163) | def test_openai_simple_pydantic_refusal(model):
function test_openai_simple_vision_pydantic (line 172) | def test_openai_simple_vision_pydantic(image, model):
function test_openai_simple_json_schema (line 182) | def test_openai_simple_json_schema(model):
function test_openai_streaming (line 194) | def test_openai_streaming(model):
function test_openai_batch (line 200) | def test_openai_batch(model):
function test_openai_async_init_from_client (line 207) | def test_openai_async_init_from_client(api_key):
function test_openai_async_wrong_inference_parameters (line 224) | async def test_openai_async_wrong_inference_parameters(async_model):
function test_openai_async_wrong_input_type (line 230) | async def test_openai_async_wrong_input_type(async_model, image):
function test_openai_async_wrong_output_type (line 243) | async def test_openai_async_wrong_output_type(async_model):
function test_openai_async_simple_call (line 254) | async def test_openai_async_simple_call(async_model):
function test_openai_async_simple_call_multiple_samples (line 261) | async def test_openai_async_simple_call_multiple_samples(async_model):
function test_openai_async_direct_call (line 271) | async def test_openai_async_direct_call(async_model_no_model_name):
function test_openai_async_simple_vision (line 281) | async def test_openai_async_simple_vision(image, async_model):
function test_openai_async_chat (line 288) | async def test_openai_async_chat(image, async_model):
function test_openai_async_simple_pydantic (line 301) | async def test_openai_async_simple_pydantic(async_model):
function test_openai_async_simple_pydantic_refusal (line 312) | async def test_openai_async_simple_pydantic_refusal(async_model):
function test_openai_async_simple_vision_pydantic (line 322) | async def test_openai_async_simple_vision_pydantic(image, async_model):
function test_openai_async_simple_json_schema (line 333) | async def test_openai_async_simple_json_schema(async_model):
function test_openai_async_streaming (line 346) | async def test_openai_async_streaming(async_model):
function test_openai_async_batch (line 355) | async def test_openai_async_batch(async_model):
FILE: tests/models/test_openai_type_adapter.py
function schema (line 23) | def schema():
function image (line 37) | def image():
function adapter (line 52) | def adapter():
function test_openai_type_adapter_input_text (line 56) | def test_openai_type_adapter_input_text(adapter):
function test_openai_type_adapter_input_vision (line 62) | def test_openai_type_adapter_input_vision(adapter, image):
function test_openai_type_adapter_input_chat (line 82) | def test_openai_type_adapter_input_chat(adapter, image):
function test_openai_type_adapter_input_invalid (line 111) | def test_openai_type_adapter_input_invalid(adapter):
function test_openai_type_adapter_output_invalid (line 134) | def test_openai_type_adapter_output_invalid(adapter):
function test_openai_type_adapter_output_none (line 157) | def test_openai_type_adapter_output_none(adapter):
function test_openai_type_adapter_json_mode (line 162) | def test_openai_type_adapter_json_mode(adapter):
function test_openai_type_adapter_dataclass (line 167) | def test_openai_type_adapter_dataclass(adapter, schema):
function test_openai_type_adapter_typed_dict (line 179) | def test_openai_type_adapter_typed_dict(adapter, schema):
function test_openai_type_adapter_pydantic (line 190) | def test_openai_type_adapter_pydantic(adapter, schema):
function test_openai_type_adapter_genson_schema_builder (line 201) | def test_openai_type_adapter_genson_schema_builder(adapter, schema):
function test_openai_type_adapter_json_schema_str (line 220) | def test_openai_type_adapter_json_schema_str(adapter, schema):
function test_openai_type_adapter_json_schema_dict (line 228) | def test_openai_type_adapter_json_schema_dict(adapter, schema):
FILE: tests/models/test_sglang.py
function sync_model (line 179) | def sync_model():
function sync_model_no_model_name (line 184) | def sync_model_no_model_name():
function async_model (line 189) | def async_model():
function async_model_no_model_name (line 194) | def async_model_no_model_name():
function test_sglang_init (line 198) | def test_sglang_init():
function test_sglang_sync_simple_call (line 232) | def test_sglang_sync_simple_call(sync_model):
function test_sglang_sync_streaming (line 237) | def test_sglang_sync_streaming(sync_model_no_model_name):
function test_sglang_sync_batch (line 246) | def test_sglang_sync_batch(sync_model):
function test_sglang_sync_vision (line 253) | def test_sglang_sync_vision(sync_model):
function test_sglang_sync_vision_chat (line 258) | def test_sglang_sync_vision_chat(sync_model):
function test_sglang_sync_multiple_samples (line 273) | def test_sglang_sync_multiple_samples(sync_model):
function test_sglang_sync_json (line 281) | def test_sglang_sync_json(sync_model):
function test_sglang_sync_regex (line 291) | def test_sglang_sync_regex(sync_model):
function test_sglang_sync_cfg (line 297) | def test_sglang_sync_cfg(sync_model):
function test_sglang_async_simple_call (line 308) | async def test_sglang_async_simple_call(async_model):
function test_sglang_async_streaming (line 314) | async def test_sglang_async_streaming(async_model_no_model_name):
function test_sglang_async_batch (line 326) | async def test_sglang_async_batch(async_model):
function test_sglang_async_vision (line 334) | async def test_sglang_async_vision(async_model):
function test_sglang_async_vision_chat (line 340) | async def test_sglang_async_vision_chat(async_model):
function test_sglang_async_multiple_samples (line 356) | async def test_sglang_async_multiple_samples(async_model):
function test_sglang_async_json (line 365) | async def test_sglang_async_json(async_model):
function test_sglang_async_regex (line 376) | async def test_sglang_async_regex(async_model):
function test_sglang_async_cfg (line 383) | async def test_sglang_async_cfg(async_model):
FILE: tests/models/test_sglang_type_adapter.py
function type_adapter (line 29) | def type_adapter():
function cfg_instance (line 33) | def cfg_instance():
function json_schema_instance (line 37) | def json_schema_instance():
function json_schema_whitespace_instance (line 41) | def json_schema_whitespace_instance():
function image (line 45) | def image():
function test_sglang_type_adapter_input_text (line 59) | def test_sglang_type_adapter_input_text(type_adapter):
function test_sglang_type_adapter_input_vision (line 65) | def test_sglang_type_adapter_input_vision(type_adapter, image):
function test_sglang_type_adapter_input_chat (line 84) | def test_sglang_type_adapter_input_chat(type_adapter, image):
function test_sglang_type_adapter_input_invalid (line 113) | def test_sglang_type_adapter_input_invalid(type_adapter):
function test_sglang_type_adapter_output_type (line 125) | def test_sglang_type_adapter_output_type(
FILE: tests/models/test_tgi.py
function sync_model (line 85) | def sync_model():
function async_model (line 90) | def async_model():
function test_tgi_init (line 94) | def test_tgi_init():
function test_tgi_sync_simple_call (line 109) | def test_tgi_sync_simple_call(sync_model):
function test_tgi_sync_streaming (line 114) | def test_tgi_sync_streaming(sync_model):
function test_tgi_sync_batch (line 123) | def test_tgi_sync_batch(sync_model):
function test_tgi_sync_json (line 130) | def test_tgi_sync_json(sync_model):
function test_tgi_sync_regex (line 137) | def test_tgi_sync_regex(sync_model):
function test_tgi_sync_cfg (line 143) | def test_tgi_sync_cfg(sync_model):
function test_tgi_async_simple_call (line 152) | async def test_tgi_async_simple_call(async_model):
function test_tgi_async_streaming (line 158) | async def test_tgi_async_streaming(async_model):
function test_tgi_async_batch (line 167) | async def test_tgi_async_batch(async_model):
function test_tgi_async_json (line 175) | async def test_tgi_async_json(async_model):
function test_tgi_async_regex (line 183) | async def test_tgi_async_regex(async_model):
function test_tgi_async_cfg (line 190) | async def test_tgi_async_cfg(async_model):
FILE: tests/models/test_tgi_model_adapter.py
function type_adapter (line 24) | def type_adapter():
function cfg_instance (line 28) | def cfg_instance():
function json_schema_instance (line 32) | def json_schema_instance():
function json_schema_whitespace_instance (line 36) | def json_schema_whitespace_instance():
function test_tgi_type_adapter_input_text (line 40) | def test_tgi_type_adapter_input_text(type_adapter):
function test_tgi_type_adapter_input_invalid (line 45) | def test_tgi_type_adapter_input_invalid(type_adapter):
function test_tgi_type_adapter_output_type (line 53) | def test_tgi_type_adapter_output_type(
function test_tgi_type_adapter_output_type_invalid (line 80) | def test_tgi_type_adapter_output_type_invalid(
FILE: tests/models/test_tokenizer.py
function test_tokenizer (line 6) | def test_tokenizer():
function test_check_hf_chat_template (line 10) | def test_check_hf_chat_template():
FILE: tests/models/test_transformers.py
function test_transformers_instantiate_invalid (line 24) | def test_transformers_instantiate_invalid():
function test_transformers_instantiate_simple (line 32) | def test_transformers_instantiate_simple():
function test_transformers_instantiate_mamba (line 43) | def test_transformers_instantiate_mamba():
function test_transformers_instantiate_tokenizer_kwargs_dtype (line 51) | def test_transformers_instantiate_tokenizer_kwargs_dtype():
function model (line 65) | def model():
function model_bart (line 77) | def model_bart():
function test_transformers_simple (line 85) | def test_transformers_simple(model):
function test_transformers_call (line 90) | def test_transformers_call(model, model_bart):
function test_transformers_chat (line 102) | def test_transformers_chat(model):
function test_transformers_inference_kwargs (line 112) | def test_transformers_inference_kwargs(model):
function test_transformers_invalid_inference_kwargs (line 117) | def test_transformers_invalid_inference_kwargs(model):
function test_transformers_regex (line 122) | def test_transformers_regex(model):
function test_transformers_json (line 128) | def test_transformers_json(model):
function test_transformers_choice (line 136) | def test_transformers_choice(model):
function test_transformers_multiple_samples (line 145) | def test_transformers_multiple_samples(model):
function test_transformers_batch (line 155) | def test_transformers_batch(model):
function test_transformers_multiple_samples_constrained (line 189) | def test_transformers_multiple_samples_constrained(model):
function test_transformers_batch_constrained (line 201) | def test_transformers_batch_constrained(model):
function test_transformers_streaming (line 230) | def test_transformers_streaming(model):
function test_transformers_parametrized_smoke (line 242) | def test_transformers_parametrized_smoke(model_name):
FILE: tests/models/test_transformers_multimodal.py
function image (line 29) | def image():
function model (line 42) | def model():
function test_transformers_multimodal_instantiate (line 51) | def test_transformers_multimodal_instantiate():
function test_transformers_multimodal_simple (line 64) | def test_transformers_multimodal_simple(model, image):
function test_transformers_multimodal_call (line 73) | def test_transformers_multimodal_call(model, image):
function test_transformers_multimodal_wrong_number_image (line 88) | def test_transformers_multimodal_wrong_number_image(model, image):
function test_transformers_multimodal_wrong_input_type (line 99) | def test_transformers_multimodal_wrong_input_type(model):
function test_transformers_multimodal_chat (line 104) | def test_transformers_multimodal_chat(model, image):
function test_transformers_inference_kwargs (line 136) | def test_transformers_inference_kwargs(model, image):
function test_transformers_invalid_inference_kwargs (line 144) | def test_transformers_invalid_inference_kwargs(model, image):
function test_transformers_several_image (line 155) | def test_transformers_several_image(model, image):
function test_transformers_multimodal_json (line 167) | def test_transformers_multimodal_json(model, image):
function test_transformers_multimodal_regex (line 179) | def test_transformers_multimodal_regex(model, image):
function test_transformers_multimodal_choice (line 189) | def test_transformers_multimodal_choice(model, image):
function test_transformers_multimodal_multiple_samples (line 203) | def test_transformers_multimodal_multiple_samples(model, image):
function test_transformers_multimodal_batch (line 214) | def test_transformers_multimodal_batch(model, image):
FILE: tests/models/test_transformers_multimodal_type_adapter.py
function adapter (line 19) | def adapter():
function logits_processor (line 28) | def logits_processor():
function image (line 35) | def image():
function video (line 45) | def video():
function audio (line 51) | def audio():
function test_transformers_multimodal_type_adapter_format_input (line 56) | def test_transformers_multimodal_type_adapter_format_input(adapter, image):
function test_transformers_multimodal_type_adapter_format_input_empty_assets (line 103) | def test_transformers_multimodal_type_adapter_format_input_empty_assets(...
function test_transformers_multimodal_type_adapter_format_input_chat_invalid_asset_type (line 108) | def test_transformers_multimodal_type_adapter_format_input_chat_invalid_...
function test_transformers_multimodal_type_adapter_format_input_chat_unsupported_content_type (line 123) | def test_transformers_multimodal_type_adapter_format_input_chat_unsuppor...
function test_transformers_multimodal_type_adapter_format_output_type (line 135) | def test_transformers_multimodal_type_adapter_format_output_type(
function test_transformers_multimodal_type_adapter_format_input_chat_missing_asset_key (line 148) | def test_transformers_multimodal_type_adapter_format_input_chat_missing_...
function test_transformers_multimodal_type_adapter_format_input_chat_missing_type_key (line 175) | def test_transformers_multimodal_type_adapter_format_input_chat_missing_...
function test_transformers_multimodal_type_adapter_format_input_invalid_content_type (line 189) | def test_transformers_multimodal_type_adapter_format_input_invalid_conte...
function test_transformers_multimodal_type_adapter_format_asset_for_template (line 206) | def test_transformers_multimodal_type_adapter_format_asset_for_template(...
function test_transformers_multimodal_type_adapter_format_asset_for_template_invalid_type (line 223) | def test_transformers_multimodal_type_adapter_format_asset_for_template_...
function test_transformers_multimodal_type_adapter_multiple_assets_in_single_item (line 235) | def test_transformers_multimodal_type_adapter_multiple_assets_in_single_...
function test_transformers_multimodal_type_adapter_correct_multiple_assets_usage (line 251) | def test_transformers_multimodal_type_adapter_correct_multiple_assets_us...
FILE: tests/models/test_transformers_tokenizer.py
function tokenizer (line 16) | def tokenizer():
function tokenizer_no_pad_token_id (line 21) | def tokenizer_no_pad_token_id(tokenizer):
function tokenizer_seq2seq (line 27) | def tokenizer_seq2seq():
function transformer_tokenizer (line 32) | def transformer_tokenizer(tokenizer):
function another_transformer_tokenizer (line 37) | def another_transformer_tokenizer(tokenizer):
function transformer_tokenizer_seq2seq (line 42) | def transformer_tokenizer_seq2seq(tokenizer_seq2seq):
function test_get_llama_tokenizer_types (line 46) | def test_get_llama_tokenizer_types():
function test_transformer_tokenizer_init (line 54) | def test_transformer_tokenizer_init(
function test_transformer_tokenizer_encode (line 75) | def test_transformer_tokenizer_encode(transformer_tokenizer):
function test_transformer_tokenizer_decode (line 82) | def test_transformer_tokenizer_decode(transformer_tokenizer):
function test_transformer_tokenizer_convert_token_to_string (line 89) | def test_transformer_tokenizer_convert_token_to_string(transformer_token...
function test_transformer_tokenizer_eq (line 104) | def test_transformer_tokenizer_eq(
function test_transformer_tokenizer_hash (line 124) | def test_transformer_tokenizer_hash(
function test_transformer_tokenizer_getstate_setstate (line 134) | def test_transformer_tokenizer_getstate_setstate(
FILE: tests/models/test_transformers_type_adapter.py
function adapter (line 18) | def adapter():
function logits_processor (line 27) | def logits_processor():
function image (line 33) | def image():
function test_transformers_type_adapter_format_input (line 48) | def test_transformers_type_adapter_format_input(adapter, image):
function test_transformers_type_adapter_format_output_type (line 72) | def test_transformers_type_adapter_format_output_type(
FILE: tests/models/test_utils.py
function test_set_additional_properties_false_json_schema (line 4) | def test_set_additional_properties_false_json_schema():
FILE: tests/models/test_vllm.py
function sync_model (line 173) | def sync_model():
function sync_model_no_model_name (line 178) | def sync_model_no_model_name():
function async_model (line 183) | def async_model():
function async_model_no_model_name (line 188) | def async_model_no_model_name():
function test_vllm_init (line 192) | def test_vllm_init():
function test_vllm_sync_simple_call (line 226) | def test_vllm_sync_simple_call(sync_model):
function test_vllm_sync_streaming (line 231) | def test_vllm_sync_streaming(sync_model_no_model_name):
function test_vllm_sync_batch (line 240) | def test_vllm_sync_batch(sync_model):
function test_vllm_sync_vision (line 247) | def test_vllm_sync_vision(sync_model):
function test_vllm_sync_vision_chat (line 252) | def test_vllm_sync_vision_chat(sync_model):
function test_vllm_sync_multiple_samples (line 267) | def test_vllm_sync_multiple_samples(sync_model):
function test_vllm_sync_json (line 275) | def test_vllm_sync_json(sync_model):
function test_vllm_sync_regex (line 282) | def test_vllm_sync_regex(sync_model):
function test_vllm_sync_cfg (line 288) | def test_vllm_sync_cfg(sync_model):
function test_vllm_async_simple_call (line 295) | async def test_vllm_async_simple_call(async_model):
function test_vllm_async_streaming (line 301) | async def test_vllm_async_streaming(async_model_no_model_name):
function test_vllm_async_batch (line 313) | async def test_vllm_async_batch(async_model):
function test_vllm_async_vision (line 321) | async def test_vllm_async_vision(async_model):
function test_vllm_async_vision_chat (line 327) | async def test_vllm_async_vision_chat(async_model):
function test_vllm_async_multiple_samples (line 343) | async def test_vllm_async_multiple_samples(async_model):
function test_vllm_async_json (line 352) | async def test_vllm_async_json(async_model):
function test_vllm_async_regex (line 360) | async def test_vllm_async_regex(async_model):
function test_vllm_async_cfg (line 367) | async def test_vllm_async_cfg(async_model):
FILE: tests/models/test_vllm_offline.py
function image (line 33) | def image():
function test_vllm_model_initialization (line 47) | def test_vllm_model_initialization():
function model (line 55) | def model(tmp_path_factory):
function test_vllm_simple (line 60) | def test_vllm_simple(model):
function test_vllm_call (line 65) | def test_vllm_call(model):
function test_vllm_inference_kwargs (line 70) | def test_vllm_inference_kwargs(model):
function test_vllm_chat (line 80) | def test_vllm_chat(model):
function test_vllm_invalid_inference_kwargs (line 92) | def test_vllm_invalid_inference_kwargs(model):
function test_vllm_regex (line 97) | def test_vllm_regex(model):
function test_vllm_json (line 103) | def test_vllm_json(model):
function test_vllm_choice (line 111) | def test_vllm_choice(model):
function test_vllm_multiple_samples (line 120) | def test_vllm_multiple_samples(model):
function test_vllm_batch (line 129) | def test_vllm_batch(model):
function test_vllm_streaming (line 155) | def test_vllm_streaming(model):
FILE: tests/models/test_vllm_offline_type_adapter.py
function type_adapter (line 28) | def type_adapter():
function cfg_instance (line 32) | def cfg_instance():
function json_schema_instance (line 36) | def json_schema_instance():
function json_schema_whitespace_instance (line 40) | def json_schema_whitespace_instance():
function regex_instance (line 44) | def regex_instance():
function image (line 48) | def image():
function test_vllm_offline_type_adapter_input_text (line 62) | def test_vllm_offline_type_adapter_input_text(type_adapter):
function test_vllm_offline_type_adapter_input_text_with_template (line 68) | def test_vllm_offline_type_adapter_input_text_with_template():
function test_vllm_offline_type_adapter_input_text_without_template (line 76) | def test_vllm_offline_type_adapter_input_text_without_template():
function test_vllm_offline_type_adapter_input_chat (line 84) | def test_vllm_offline_type_adapter_input_chat(type_adapter):
function test_vllm_offline_type_adapter_input_invalid (line 98) | def test_vllm_offline_type_adapter_input_invalid(type_adapter, image):
function test_vllm_offline_type_adapter_output_type (line 111) | def test_vllm_offline_type_adapter_output_type(
FILE: tests/models/test_vllm_type_adapter.py
function type_adapter (line 30) | def type_adapter():
function cfg_instance (line 34) | def cfg_instance():
function json_schema_instance (line 38) | def json_schema_instance():
function json_schema_whitespace_instance (line 42) | def json_schema_whitespace_instance():
function image (line 46) | def image():
function test_vllm_type_adapter_input_text (line 60) | def test_vllm_type_adapter_input_text(type_adapter):
function test_vllm_type_adapter_input_vision (line 66) | def test_vllm_type_adapter_input_vision(type_adapter, image):
function test_vllm_type_adapter_input_chat (line 85) | def test_vllm_type_adapter_input_chat(type_adapter, image):
function test_vllm_type_adapter_input_invalid (line 114) | def test_vllm_type_adapter_input_invalid(type_adapter):
function test_vllm_type_adapter_output_type (line 126) | def test_vllm_type_adapter_output_type(
FILE: tests/processors/test_base_processor.py
class MockLogitsProcessor (line 56) | class MockLogitsProcessor(OutlinesLogitsProcessor):
method process_logits (line 57) | def process_logits(self, input_ids, logits):
function test_base_logits_processor_init (line 65) | def test_base_logits_processor_init(library):
function test_base_logits_processor_call (line 74) | def test_base_logits_processor_call(library):
function test_base_logits_processor_init_library_name (line 90) | def test_base_logits_processor_init_library_name(library):
FILE: tests/processors/test_tensor_adapters.py
function create_tensor (line 31) | def create_tensor(framework, shape, dtype=None):
function compare_tensors (line 41) | def compare_tensors(framework, tensor1, tensor2):
function test_tensor_adapter_shape (line 53) | def test_tensor_adapter_shape(framework):
function test_tensor_adapter_unsqueeze (line 77) | def test_tensor_adapter_unsqueeze(framework):
function test_tensor_adapter_squeeze (line 90) | def test_tensor_adapter_squeeze(framework):
function test_tensor_adapter_to_list (line 109) | def test_tensor_adapter_to_list(framework):
function test_tensor_adapter_to_scalar (line 138) | def test_tensor_adapter_to_scalar(framework):
function test_tensor_adapter_full_like (line 155) | def test_tensor_adapter_full_like(framework):
function test_tensor_adapter_concatenate (line 165) | def test_tensor_adapter_concatenate(framework):
function test_tensor_adapter_get_to_device (line 199) | def test_tensor_adapter_get_to_device(framework):
function test_tensor_adapter_boolean_ones_like (line 212) | def test_tensor_adapter_boolean_ones_like(framework):
function test_tensor_adapter_apply_mask (line 223) | def test_tensor_adapter_apply_mask(framework):
function test_tensor_adapter_argsort_descending (line 247) | def test_tensor_adapter_argsort_descending(framework):
FILE: tests/test_applications.py
function model (line 13) | def model():
function another_model (line 21) | def another_model():
function test_application_initialization (line 28) | def test_application_initialization():
function test_application_generator_no_model (line 39) | def test_application_generator_no_model():
function test_application_template_call (line 48) | def test_application_template_call(model):
function test_application_callable_call (line 57) | def test_application_callable_call(model):
function test_application_template_error (line 68) | def test_application_template_error(model):
function test_application_generator_reuse (line 77) | def test_application_generator_reuse(model, another_model):
FILE: tests/test_cache.py
function temp_dir (line 13) | def temp_dir():
function refresh_environment (line 20) | def refresh_environment():
function test_cache (line 41) | def test_cache(refresh_environment):
function test_get_cache (line 55) | def test_get_cache(test_cache):
function test_disable_cache (line 81) | def test_disable_cache(test_cache):
function test_clear_cache (line 103) | def test_clear_cache(test_cache):
function test_version_upgrade_cache_invalidate (line 128) | def test_version_upgrade_cache_invalidate(test_cache, mocker):
function test_cache_disabled_decorator (line 173) | def test_cache_disabled_decorator(test_cache):
function temp_cache_dir (line 204) | def temp_cache_dir():
FILE: tests/test_generator.py
function steerable_model (line 62) | def steerable_model():
function sample_processor (line 71) | def sample_processor():
function black_box_sync_model (line 78) | def black_box_sync_model():
function black_box_async_model (line 83) | def black_box_async_model():
function test_steerable_generator_init_valid_processor (line 90) | def test_steerable_generator_init_valid_processor(steerable_model, sampl...
function test_steerable_generator_init_cfg_output_type (line 96) | def test_steerable_generator_init_cfg_output_type(steerable_model):
function test_steerable_generator_init_other_output_type (line 102) | def test_steerable_generator_init_other_output_type(steerable_model):
function test_steerable_generator_init_invalid_output_type (line 108) | def test_steerable_generator_init_invalid_output_type(steerable_model, s...
function test_steerable_generator_call (line 113) | def test_steerable_generator_call(steerable_model):
function test_steerable_generator_stream (line 119) | def test_steerable_generator_stream(steerable_model):
function test_black_box_generator_init (line 130) | def test_black_box_generator_init(black_box_sync_model):
function test_black_box_generator_call (line 135) | def test_black_box_generator_call(black_box_sync_model):
function test_black_box_generator_stream (line 141) | def test_black_box_generator_stream(black_box_sync_model):
function test_async_black_box_generator_init (line 151) | def test_async_black_box_generator_init(black_box_async_model):
function test_async_black_box_generator_call (line 158) | async def test_async_black_box_generator_call(black_box_async_model):
function test_async_black_box_generator_stream (line 165) | async def test_async_black_box_generator_stream(black_box_async_model):
function test_generator_init_no_model (line 177) | def test_generator_init_no_model():
function test_generator_init_multiple_output_type (line 182) | def test_generator_init_multiple_output_type(steerable_model, sample_pro...
function test_generator_steerable_output_type (line 187) | def test_generator_steerable_output_type(steerable_model):
function test_generator_steerable_processor (line 194) | def test_generator_steerable_processor(steerable_model, sample_processor):
function test_generator_black_box_sync_output_type (line 201) | def test_generator_black_box_sync_output_type(black_box_sync_model):
function test_generator_black_box_sync_processor (line 208) | def test_generator_black_box_sync_processor(black_box_sync_model, sample...
function test_generator_black_box_async_output_type (line 213) | def test_generator_black_box_async_output_type(black_box_async_model):
function test_generator_black_box_async_processor (line 220) | def test_generator_black_box_async_processor(black_box_async_model, samp...
FILE: tests/test_inputs.py
function image_input (line 15) | def image_input():
function test_image_initialization (line 23) | def test_image_initialization():
function test_image_initialization_invalid (line 47) | def test_image_initialization_invalid():
function test_video_initialization (line 61) | def test_video_initialization():
function test_audio_initialization (line 67) | def test_audio_initialization():
function test_chat_initialization (line 73) | def test_chat_initialization():
function test_chat_append (line 94) | def test_chat_append():
function test_chat_extend (line 101) | def test_chat_extend():
function test_chat_pop (line 111) | def test_chat_pop():
function test_chat_add_system_message (line 129) | def test_chat_add_system_message(image_input):
function test_add_user_message_string (line 152) | def test_add_user_message_string(image_input):
function test_add_assistant_message_string (line 175) | def test_add_assistant_message_string(image_input):
FILE: tests/test_templates.py
function sample_function (line 25) | def sample_function(x, y=2):
function function_with_annotations (line 29) | def function_with_annotations(x: int, y: str) -> str:
function function_with_no_docstring (line 33) | def function_with_no_docstring(x, y):
class CallableClass (line 36) | class CallableClass:
method __call__ (line 37) | def __call__(self):
class PydanticClass (line 40) | class PydanticClass(BaseModel):
function test_vision_initialization (line 44) | def test_vision_initialization():
function test_vision_invalid_image_format (line 69) | def test_vision_invalid_image_format():
function render (line 79) | def render(content: str, filters: Optional[dict] = None, **kwargs):
function test_render (line 84) | def test_render():
function test_render_escaped_linebreak (line 119) | def test_render_escaped_linebreak():
function test_render_jinja (line 141) | def test_render_jinja():
function test_render_filters (line 182) | def test_render_filters():
function temp_prompt_file (line 237) | def temp_prompt_file():
function test_prompt_from_file (line 277) | def test_prompt_from_file(temp_prompt_file):
function test_prompt_from_str (line 300) | def test_prompt_from_str():
function test_template_from_str_with_extra_linebreaks (line 308) | def test_template_from_str_with_extra_linebreaks():
function test_get_fn_name (line 318) | def test_get_fn_name():
function test_get_fn_args (line 328) | def test_get_fn_args():
function test_get_fn_description (line 335) | def test_get_fn_description():
function test_get_fn_source (line 343) | def test_get_fn_source():
function test_get_fn_signature (line 354) | def test_get_fn_signature():
function test_get_schema (line 363) | def test_get_schema():
FILE: tests/test_utils/mock_lmstudio_client.py
function normalize_for_hash (line 7) | def normalize_for_hash(obj):
function normalize_lmstudio_messages (line 30) | def normalize_lmstudio_messages(messages):
function normalize_lmstudio_content (line 42) | def normalize_lmstudio_content(content):
function hash_lmstudio_request (line 63) | def hash_lmstudio_request(data: dict) -> str:
class MockLMStudioResponse (line 69) | class MockLMStudioResponse:
method __init__ (line 72) | def __init__(self, content: str):
class MockLMStudioModel (line 76) | class MockLMStudioModel:
method __init__ (line 79) | def __init__(self, mock_responses: Dict[str, Any]):
method respond (line 82) | def respond(self, messages, **kwargs):
method respond_stream (line 89) | def respond_stream(self, messages, **kwargs):
class MockLMStudioLLM (line 98) | class MockLMStudioLLM:
method __init__ (line 101) | def __init__(self, mock_responses: Dict[str, Any]):
method model (line 104) | def model(self, model_key=None):
class MockLMStudioClient (line 108) | class MockLMStudioClient:
method __init__ (line 111) | def __init__(self):
method add_mock_responses (line 115) | def add_mock_responses(self, mocks: List[Tuple[dict, Any]]):
class MockAsyncLMStudioModel (line 122) | class MockAsyncLMStudioModel:
method __init__ (line 125) | def __init__(self, mock_responses: Dict[str, Any]):
method respond (line 128) | async def respond(self, messages, **kwargs):
method respond_stream (line 135) | async def respond_stream(self, messages, **kwargs):
class MockAsyncLMStudioLLM (line 149) | class MockAsyncLMStudioLLM:
method __init__ (line 152) | def __init__(self, mock_responses: Dict[str, Any]):
method model (line 155) | async def model(self, model_key=None):
class MockAsyncLMStudioClient (line 159) | class MockAsyncLMStudioClient:
method __init__ (line 162) | def __init__(self):
method add_mock_responses (line 167) | def add_mock_responses(self, mocks: List[Tuple[dict, Any]]):
method __aenter__ (line 173) | async def __aenter__(self):
method __aexit__ (line 177) | async def __aexit__(self, exc_type, exc_val, exc_tb):
FILE: tests/test_utils/mock_openai_client.py
class MockChoice (line 7) | class MockChoice:
method __init__ (line 8) | def __init__(
class MockCompletionResponse (line 22) | class MockCompletionResponse:
method __init__ (line 23) | def __init__(self, choices: List[MockChoice]):
class MockStreamingChunk (line 27) | class MockStreamingChunk:
method __init__ (line 28) | def __init__(self, content: Optional[str] = None):
class MockOpenAIClient (line 38) | class MockOpenAIClient:
method __init__ (line 41) | def __init__(self):
method add_mock_responses (line 61) | def add_mock_responses(self, mocks: list):
method _create_standard_response (line 66) | def _create_standard_response(self, response):
method _create_streaming_response (line 72) | def _create_streaming_response(self, response):
class MockAsyncOpenAIClient (line 77) | class MockAsyncOpenAIClient:
method __init__ (line 80) | def __init__(self):
method add_mock_responses (line 100) | def add_mock_responses(self, mocks: list):
method _create_async_standard_response (line 105) | async def _create_async_standard_response(self, response):
method _create_async_streaming_response (line 112) | async def _create_async_streaming_response(self, response):
FILE: tests/test_utils/mock_tgi_client.py
class MockTGIInferenceClient (line 7) | class MockTGIInferenceClient:
method __init__ (line 10) | def __init__(self):
method add_mock_responses (line 28) | def add_mock_responses(self, mocks: list):
class MockAsyncTGIInferenceClient (line 34) | class MockAsyncTGIInferenceClient:
method __init__ (line 37) | def __init__(self):
method add_mock_responses (line 55) | def add_mock_responses(self, mocks: list):
method _create_async_streaming_response (line 60) | async def _create_async_streaming_response(self, response):
FILE: tests/test_utils/utils.py
function hash_dict (line 6) | def hash_dict(d) -> str:
FILE: tests/types/test_custom_types.py
function test_type_regex (line 98) | def test_type_regex(custom_type, test_string, should_match):
function test_type_enum (line 130) | def test_type_enum(custom_type, test_string, should_match):
FILE: tests/types/test_dsl.py
function test_dsl_init (line 69) | def test_dsl_init():
function test_dsl_term_methods (line 157) | def test_dsl_term_methods():
function test_dsl_sequence (line 195) | def test_dsl_sequence():
function test_dsl_alternatives (line 217) | def test_dsl_alternatives():
function test_dsl_optional (line 237) | def test_dsl_optional():
function test_dsl_exactly (line 251) | def test_dsl_exactly():
function test_dsl_at_least (line 266) | def test_dsl_at_least():
function test_dsl_at_most (line 281) | def test_dsl_at_most():
function test_between (line 296) | def test_between():
function test_dsl_zero_or_more (line 312) | def test_dsl_zero_or_more():
function test_dsl_one_or_more (line 326) | def test_dsl_one_or_more():
function test_dsl_aliases (line 340) | def test_dsl_aliases():
function test_dsl_term_pydantic_simple (line 348) | def test_dsl_term_pydantic_simple():
function test_dsl_term_pydantic_combination (line 363) | def test_dsl_term_pydantic_combination():
function test_dsl_display (line 382) | def test_dsl_display():
function test_cfg (line 395) | def test_cfg():
function test_json_schema (line 409) | def test_json_schema():
function test_dsl_cfg_from_file (line 490) | def test_dsl_cfg_from_file():
function test_dsl_json_schema_from_file (line 505) | def test_dsl_json_schema_from_file():
function test_dsl_python_types_to_terms (line 524) | def test_dsl_python_types_to_terms():
function test_dsl_handle_literal (line 650) | def test_dsl_handle_literal():
function test_dsl_handle_union (line 659) | def test_dsl_handle_union():
function test_dsl_handle_list (line 695) | def test_dsl_handle_list():
function test_dsl_handle_tuple (line 728) | def test_dsl_handle_tuple():
function test_dsl_handle_dict (line 769) | def test_dsl_handle_dict():
function test_ensure_json_quoted_string (line 791) | def test_ensure_json_quoted_string():
function test_ensure_json_quoted_alternatives (line 799) | def test_ensure_json_quoted_alternatives():
function test_ensure_json_quoted_passthrough (line 810) | def test_ensure_json_quoted_passthrough():
function test_list_of_literals_quoted (line 819) | def test_list_of_literals_quoted():
function test_tuple_of_literals_quoted (line 832) | def test_tuple_of_literals_quoted():
function test_dict_literal_key_quoted (line 844) | def test_dict_literal_key_quoted():
function test_list_of_int_unchanged (line 858) | def test_list_of_int_unchanged():
function test_ensure_json_quoted_sequence_passthrough (line 865) | def test_ensure_json_quoted_sequence_passthrough():
function test_ensure_json_quoted_regex_passthrough (line 871) | def test_ensure_json_quoted_regex_passthrough():
function test_list_single_literal (line 878) | def test_list_single_literal():
function test_dict_literal_value_quoted (line 889) | def test_dict_literal_value_quoted():
function test_tuple_ellipsis_literal_quoted (line 902) | def test_tuple_ellipsis_literal_quoted():
function test_list_of_bool_unchanged (line 914) | def test_list_of_bool_unchanged():
function test_dict_int_value_unchanged (line 921) | def test_dict_int_value_unchanged():
function test_ensure_json_quoted_nested_alternatives (line 931) | def test_ensure_json_quoted_nested_alternatives():
function test_literal_with_special_characters (line 947) | def test_literal_with_special_characters():
function test_e2e_list_literal_matches_quoted_json (line 965) | def test_e2e_list_literal_matches_quoted_json():
function test_e2e_standalone_literal_no_quotes (line 975) | def test_e2e_standalone_literal_no_quotes():
function test_e2e_list_literal_empty_string (line 983) | def test_e2e_list_literal_empty_string():
function test_e2e_list_mixed_literal_string_and_int (line 991) | def test_e2e_list_mixed_literal_string_and_int():
function test_e2e_dict_literal_keys_quoted (line 1001) | def test_e2e_dict_literal_keys_quoted():
function test_e2e_dict_literal_values_quoted (line 1009) | def test_e2e_dict_literal_values_quoted():
function test_e2e_tuple_fixed_literal_quoted (line 1016) | def test_e2e_tuple_fixed_literal_quoted():
function test_e2e_tuple_variadic_literal_quoted (line 1023) | def test_e2e_tuple_variadic_literal_quoted():
function test_e2e_list_enum_string_values_quoted (line 1031) | def test_e2e_list_enum_string_values_quoted():
function test_e2e_list_int_not_quoted (line 1044) | def test_e2e_list_int_not_quoted():
function test_e2e_list_literal_special_characters (line 1052) | def test_e2e_list_literal_special_characters():
function test_e2e_dict_literal_key_and_enum_value (line 1060) | def test_e2e_dict_literal_key_and_enum_value():
function test_to_regex (line 1073) | def test_to_regex():
FILE: tests/types/test_json_schema_utils.py
function test_schema_type_to_python_simple_types (line 21) | def test_schema_type_to_python_simple_types():
function test_schema_type_to_python_enum (line 30) | def test_schema_type_to_python_enum():
function test_schema_type_to_python_array (line 36) | def test_schema_type_to_python_array():
function test_schema_type_to_python_object (line 53) | def test_schema_type_to_python_object():
function test_schema_type_to_python_unknown_type (line 88) | def test_schema_type_to_python_unknown_type():
function test_json_schema_dict_to_typeddict_basic (line 100) | def test_json_schema_dict_to_typeddict_basic():
function test_json_schema_dict_to_typeddict_array_enum (line 119) | def test_json_schema_dict_to_typeddict_array_enum():
function test_json_schema_dict_to_typeddict_nested_object (line 143) | def test_json_schema_dict_to_typeddict_nested_object():
function test_json_schema_dict_to_pydantic_basic (line 170) | def test_json_schema_dict_to_pydantic_basic():
function test_json_schema_dict_to_pydantic_array_enum (line 190) | def test_json_schema_dict_to_pydantic_array_enum():
function test_json_schema_dict_to_pydantic_nested_object (line 215) | def test_json_schema_dict_to_pydantic_nested_object():
function test_json_schema_dict_to_dataclass_basic (line 245) | def test_json_schema_dict_to_dataclass_basic():
function test_json_schema_dict_to_dataclass_array_enum (line 266) | def test_json_schema_dict_to_dataclass_array_enum():
function test_json_schema_dict_to_dataclass_nested_object (line 292) | def test_json_schema_dict_to_dataclass_nested_object():
FILE: tests/types/test_to_regex.py
function test_to_regex_simple (line 23) | def test_to_regex_simple():
function test_to_regex_combinations (line 91) | def test_to_regex_combinations():
FILE: tests/types/test_types_utils.py
function member (line 10) | def member(x): # type: ignore[no-redef]
function sample_enum (line 68) | def sample_enum():
function sample_complex_enum (line 76) | def sample_complex_enum():
function sample_empty_enum (line 88) | def sample_empty_enum():
function sample_class (line 99) | def sample_class():
function sample_dataclass (line 106) | def sample_dataclass():
function sample_typed_dict (line 115) | def sample_typed_dict():
function sample_pydantic_model (line 123) | def sample_pydantic_model():
function sample_schema_builder (line 131) | def sample_schema_builder():
function sample_function (line 139) | def sample_function():
function sample_function_missing_type (line 146) | def sample_function_missing_type():
function test_is_int (line 153) | def test_is_int():
function test_is_int_instance (line 165) | def test_is_int_instance():
function test_is_float (line 173) | def test_is_float():
function test_is_float_instance (line 185) | def test_is_float_instance():
function test_is_str (line 192) | def test_is_str():
function test_is_str_instance (line 204) | def test_is_str_instance():
function test_is_bool (line 212) | def test_is_bool():
function test_is_datetime (line 222) | def test_is_datetime():
function test_is_date (line 229) | def test_is_date():
function test_is_time (line 236) | def test_is_time():
function test_is_native_dict (line 243) | def test_is_native_dict():
function test_is_typing_dict (line 251) | def test_is_typing_dict():
function test_is_typing_list (line 258) | def test_is_typing_list():
function test_is_typing_tuple (line 266) | def test_is_typing_tuple():
function test_is_union (line 274) | def test_is_union():
function test_is_literal (line 282) | def test_is_literal():
function test_is_dataclass (line 290) | def test_is_dataclass(
function test_is_typed_dict (line 304) | def test_is_typed_dict(
function test_is_pydantic_model (line 318) | def test_is_pydantic_model(
function test_is_genson_schema_builder (line 332) | def test_is_genson_schema_builder(
function test_is_enum (line 350) | def test_is_enum(sample_enum):
function test_is_callable (line 358) | def test_is_callable(sample_function, sample_class, sample_dataclass, sa...
function test_get_enum_from_choice (line 371) | def test_get_enum_from_choice(sample_enum):
function test_get_enum_from_literal (line 380) | def test_get_enum_from_literal(sample_enum):
function test_get_schema_from_signature (line 395) | def test_get_schema_from_signature(sample_function, sample_function_miss...
function test_get_schema_from_enum (line 408) | def test_get_schema_from_enum(sample_complex_enum, sample_empty_enum):
Condensed preview — 239 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,322K chars).
[
{
"path": ".devcontainer/devcontainer.json",
"chars": 455,
"preview": "{\n \"name\": \"dottxt-ai\",\n \"image\": \"mcr.microsoft.com/devcontainers/python:3.12\",\n \"runArgs\": [\n \"--device=nvidia.c"
},
{
"path": ".editorconfig",
"chars": 255,
"preview": "# EditorConfig is awesome: https://EditorConfig.org\n\n# top-most EditorConfig file\nroot = true\n\n[*]\nindent_style = space\n"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.yml",
"chars": 2638,
"preview": "# Issue template inspired by NumPy's excellent template:\n# https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLA"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 197,
"preview": "contact_links:\n - name: 🤔 Questions & Help\n url: https://github.com/dottxt-ai/outlines/discussions/new\n about: \"I"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE/pull_request_template.md",
"chars": 1221,
"preview": "# 🚧 Thank you for opening a PR!\n\nA few important guidelines and requirements before we can merge your PR:\n\n- [ ] We shou"
},
{
"path": ".github/scripts/build_sdist_and_wheel.sh",
"chars": 521,
"preview": "#!/bin/bash\n\n# Build sdist and wheel\npython -m pip install -U pip\npython -m pip install build\npython -m build\n\n# Check s"
},
{
"path": ".github/workflows/build_documentation.yml",
"chars": 2687,
"preview": "name: Build the documentation\n\non:\n pull_request:\n types: [opened, synchronize, reopened, closed]\n branches: [mai"
},
{
"path": ".github/workflows/publish_documentation.yml",
"chars": 1402,
"preview": "name: Publish the documentation\n\non:\n workflow_dispatch:\n push:\n branches:\n - main\n release:\n types:\n "
},
{
"path": ".github/workflows/release_pypi.yaml",
"chars": 713,
"preview": "name: Release PyPi\n\non:\n release:\n types:\n - created\njobs:\n release-job:\n name: Build and publish on PyPi\n "
},
{
"path": ".github/workflows/tests.yml",
"chars": 3482,
"preview": "name: Tests\n\non:\n pull_request:\n branches: [main,v1.0]\n push:\n branches: [main]\n\njobs:\n style:\n name: Check "
},
{
"path": ".github/workflows/tests_api_models.yml",
"chars": 2012,
"preview": "name: API Models Tests\n\non:\n workflow_dispatch:\n\njobs:\n tests:\n name: Run API Models Tests\n runs-on: ubuntu-late"
},
{
"path": ".gitignore",
"chars": 180,
"preview": "__pycache__\n.benchmarks\n.cache\n.coverage\n.direnv\n.env\n.idea\n.pytest_cache\n.python-version\n.venv\n*_version.py\n*.egg-info\n"
},
{
"path": ".pre-commit-config.yaml",
"chars": 549,
"preview": "repos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n rev: v5.0.0\n hooks:\n - id: check-merge-conflict\n "
},
{
"path": ".pydocstyle",
"chars": 32,
"preview": "[pydocstyle]\nconvention = numpy\n"
},
{
"path": ".readthedocs.yaml",
"chars": 257,
"preview": "version: 2\n\npython:\n version: \"3.8\"\n install:\n - method: pip\n path: .\n extra_requirements:\n "
},
{
"path": ".vscode/settings.json",
"chars": 148,
"preview": "{\n \"python.testing.pytestArgs\": [\n \"tests\"\n ],\n \"python.testing.unittestEnabled\": false,\n \"python.tes"
},
{
"path": "LICENSE",
"chars": 11354,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 24139,
"preview": "<div align=\"center\" style=\"margin-bottom: 1em;\">\n\n<img src=\"./docs/assets/images/logo-light-mode.svg#gh-light-mode-only\""
},
{
"path": "docs/api_reference/index.md",
"chars": 16,
"preview": "# API Reference\n"
},
{
"path": "docs/blog/index.md",
"chars": 7,
"preview": "# Blog\n"
},
{
"path": "docs/community/contribute.md",
"chars": 5505,
"preview": "---\ntitle: Contribute\n---\n\n## What contributions?\n\n- **Documentation** contributions are very valuable to us!\n- **Exampl"
},
{
"path": "docs/community/examples.md",
"chars": 1695,
"preview": "# Community projects and articles\n\nPublishing examples and articles about Outlines are a meaningful way to contribute to"
},
{
"path": "docs/community/feedback.md",
"chars": 8239,
"preview": "---\ntitle: Feedback\n---\n\n# Feedback\n\nIf Outlines has been helpful to you, let us know on [Discord][discord] or give us a"
},
{
"path": "docs/community/index.md",
"chars": 521,
"preview": "# Community\n\nOutlines exists for a community of users who believe software doesn't need to be complicated. Who share the"
},
{
"path": "docs/community/versioning.md",
"chars": 1339,
"preview": "---\ntitle: Versioning Guide\n---\n\n# Versioning Guide\n\n\nThe Outlines project follows a structured versioning scheme design"
},
{
"path": "docs/core_concepts.md",
"chars": 522,
"preview": "---\ntitle: Core concepts\n---\n\n# Core concepts\n\nComing soon. This will document various concepts at a high level, so user"
},
{
"path": "docs/examples/chain_of_density.md",
"chars": 7300,
"preview": "# Summarize documents using Chain of Density prompting\n\nA good summary should be informative, concise and clear. While l"
},
{
"path": "docs/examples/chain_of_thought.md",
"chars": 4819,
"preview": "# Chain of thought\n\n\nChain of thought is a prompting technique introduced in the paper [\"Chain-of-Thought Prompting Elic"
},
{
"path": "docs/examples/classification.md",
"chars": 2395,
"preview": "# Classification\n\nClassification is a classic problem in NLP and finds many applications: spam detection, sentiment anal"
},
{
"path": "docs/examples/dating_profiles.md",
"chars": 9189,
"preview": "# Generate a synthetic dating profile from a description\n\nIn this example we will see how we can use Outlines to generat"
},
{
"path": "docs/examples/deploy-using-bentoml.md",
"chars": 7648,
"preview": "# Run Outlines using BentoML\n\n[BentoML](https://github.com/bentoml/BentoML) is an open-source model serving library for "
},
{
"path": "docs/examples/deploy-using-cerebrium.md",
"chars": 3781,
"preview": "# Run Outlines using Cerebrium\n\n[Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that m"
},
{
"path": "docs/examples/deploy-using-modal.md",
"chars": 7767,
"preview": "# Run Outlines using Modal\n\n[Modal](https://modal.com/) is a serverless platform that allows you to easily run code on t"
},
{
"path": "docs/examples/earnings-reports.md",
"chars": 11454,
"preview": "# Extracting financial data from earnings reports\n\nA common task in finance is to extract financial data from earnings r"
},
{
"path": "docs/examples/extract_event_details.md",
"chars": 1061,
"preview": "This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message.\nWe w"
},
{
"path": "docs/examples/extract_event_details.py",
"chars": 1536,
"preview": "from datetime import datetime\n\nfrom mlx_lm import load\nfrom pydantic import BaseModel, Field\n\nimport outlines\nfrom outli"
},
{
"path": "docs/examples/extraction.md",
"chars": 3031,
"preview": "# Named entity extraction\n\nNamed Entity Extraction is a fundamental problem in NLP. It involves identifying and categori"
},
{
"path": "docs/examples/index.md",
"chars": 2098,
"preview": "# Examples\n\nThis part of the documentation provides a few cookbooks that you can browse to get acquainted with the libra"
},
{
"path": "docs/examples/knowledge_graph_extraction.md",
"chars": 5386,
"preview": "# Knowledge Graph Extraction\n\nIn this guide, we use [outlines](https://dottxt-ai.github.io/outlines/) to extract a knowl"
},
{
"path": "docs/examples/models_playing_chess.md",
"chars": 2825,
"preview": "# Large language models playing chess\n\nIn this example we will make a Phi-3 model play chess against itself. On its own "
},
{
"path": "docs/examples/prompt_templates/chain_of_density.txt",
"chars": 1651,
"preview": "Article: {{ article }}\n\nYou will generate increasingly concise, entity-dense summaries of the above Article.\n\nRepeat the"
},
{
"path": "docs/examples/prompt_templates/classification.txt",
"chars": 360,
"preview": "You are an experienced customer success manager.\n\nGiven a request from a client, you need to determine when the\nrequest "
},
{
"path": "docs/examples/prompt_templates/react_agent.txt",
"chars": 1126,
"preview": "<|im_start|>system\nYou are a world class AI model who answers questions in JSON with correct Pydantic schema.\nHere's the"
},
{
"path": "docs/examples/prompt_templates/simtom_prospective_taking.txt",
"chars": 894,
"preview": "<s>[INST] The following is a sequence of events about some characters, that takes place in multiple locations.\nYour job "
},
{
"path": "docs/examples/prompt_templates/simtom_simulation.txt",
"chars": 303,
"preview": "<s>[INST] {% for event in events %}\n{{event}}\n{% endfor %}\nYou are {{name}}.\nBased on the above information, answer the "
},
{
"path": "docs/examples/qa-with-citations.md",
"chars": 9154,
"preview": "# Generate Synthetic Data and Q&A with Citations\n\nThis tutorial is adapted from the [instructor-ollama notebook](https:/"
},
{
"path": "docs/examples/react_agent.md",
"chars": 9546,
"preview": "# ReAct Agent\n\nThis example shows how to use [outlines](https://dottxt-ai.github.io/outlines/) to build your own agent w"
},
{
"path": "docs/examples/read-pdfs.md",
"chars": 10465,
"preview": "# PDF to structured output with vision language models\n\nA common task with language models is to ask language models que"
},
{
"path": "docs/examples/receipt-digitization.md",
"chars": 8726,
"preview": "# Receipt Data Extraction with VLMs\n\n## Setup\n\nYou'll need to install the dependencies:\n\n```shell\npip install outlines t"
},
{
"path": "docs/examples/simtom.md",
"chars": 4778,
"preview": "# Build perspective-taking agents with SimToM\n\nPrompting strategies like Chain-of-Thought (CoT) can improve LLMs' reason"
},
{
"path": "docs/examples/structured_generation_workflow.md",
"chars": 6770,
"preview": "# Structured Generation Workflow: Generating Synthetic Phone Numbers\n\nThis is a condensed version of [Coding for Structu"
},
{
"path": "docs/features/advanced/backends.md",
"chars": 1935,
"preview": "---\ntitle: Structured Generation Backends\n---\n\n# Structured Generation Backends\n\nOutlines relies on a structured generat"
},
{
"path": "docs/features/advanced/logits_processors.md",
"chars": 4316,
"preview": "---\ntitle: Logits Processors\n---\n\n# Logits Processors\n\nLogits processors are objects that control text generation by mod"
},
{
"path": "docs/features/core/generator.md",
"chars": 3703,
"preview": "---\ntitle: Generator API\n---\n\n# Generator\n\n\nThe `Generator` class is the core component of Outlines v1. `Generator` acce"
},
{
"path": "docs/features/core/inputs.md",
"chars": 6064,
"preview": "---\ntitle: Model Inputs\n---\n\n# Model Inputs\n\nOutlines models accept various types of inputs to generate text. The input "
},
{
"path": "docs/features/core/output_types.md",
"chars": 8356,
"preview": "---\ntitle: Output Types\n---\n\n# Output Types\n\nOutlines provides a simple and intuitive way of defining the output structu"
},
{
"path": "docs/features/index.md",
"chars": 445,
"preview": "# Features\n\nThis section presents in details the different features of Outlines.\n\n## Core Concepts\n\n- [Models](./models/"
},
{
"path": "docs/features/models/anthropic.md",
"chars": 4494,
"preview": "---\ntitle: Anthropic\n---\n\n# Anthropic\n\n!!! Installation\n\n You need to install the `anthropic` library to be able to u"
},
{
"path": "docs/features/models/dottxt.md",
"chars": 2463,
"preview": "---\ntitle: Dottxt\n---\n\n# Dottxt\n\n!!! Installation\n\n You need to install the `dottxt` python sdk to be able to use the"
},
{
"path": "docs/features/models/gemini.md",
"chars": 6461,
"preview": "# Gemini\n\n!!! Installation\n\n You need to install the `google.genai` libray to be able to use the Gemini API in Outlin"
},
{
"path": "docs/features/models/index.md",
"chars": 6130,
"preview": "---\ntitle: Models\n---\n\n# Models\n\n## Overview\n\nOutlines models are objects that wrap an inference client or engine. Model"
},
{
"path": "docs/features/models/llamacpp.md",
"chars": 6612,
"preview": "---\ntitle: llama.cpp\n---\n\n# llama.cpp\n\nOutlines provides an integration with [Llama.cpp](https://github.com/ggerganov/ll"
},
{
"path": "docs/features/models/mistral.md",
"chars": 8289,
"preview": "# Mistral\n\n!!! Installation\n\n You need to install the `mistralai` library to be able to use the Mistral API in Outlin"
},
{
"path": "docs/features/models/mlxlm.md",
"chars": 5697,
"preview": "---\ntitle: mlx-lm\n---\n\n# mlx-lm\n\nOutlines provides an integration with [mlx-lm](https://github.com/ml-explore/mlx-exampl"
},
{
"path": "docs/features/models/ollama.md",
"chars": 6476,
"preview": "---\ntitle: Ollama\n---\n\n# Ollama\n\n!!! Installation\n\n To be able to use Ollama in Outlines, you must install both Ollam"
},
{
"path": "docs/features/models/openai.md",
"chars": 7045,
"preview": "# OpenAI\n\n!!! Installation\n\n You need to install the `openai` library to be able to use the OpenAI API in Outlines. I"
},
{
"path": "docs/features/models/openai_compatible.md",
"chars": 2772,
"preview": "# OpenAI-Compatible APIs\n\nMany inference providers offer OpenAI-compatible APIs, allowing you to use the familiar OpenAI"
},
{
"path": "docs/features/models/openrouter.md",
"chars": 1632,
"preview": "# Openrouter\n\n!!! Installation\n\n [OpenRouter](https://openrouter.ai/docs/api-reference/overview) uses the same API as"
},
{
"path": "docs/features/models/sglang.md",
"chars": 8430,
"preview": "---\ntitle: SGLang\n---\n\n# SGLang\n\n## Prerequisites\n\nThe Outlines `SGLang` model is intended to be used along with an SGLa"
},
{
"path": "docs/features/models/tgi.md",
"chars": 7594,
"preview": "---\ntitle: TGI\n---\n\n# TGI\n\n## Prerequisites\n\nThe Outlines `TGI` model is intended to be used along with a HuggingFace `T"
},
{
"path": "docs/features/models/transformers.md",
"chars": 7830,
"preview": "---\ntitle: Transformers\n---\n\n# Transformers\n\n!!! Installation\n\n You need to install the `transformers` library to be "
},
{
"path": "docs/features/models/transformers_multimodal.md",
"chars": 10146,
"preview": "---\ntitle: Transformers MultiModal\n---\n\n# Transformers MultiModal\n\nThe Outlines `TransformersMultiModal` model inherits "
},
{
"path": "docs/features/models/vllm.md",
"chars": 11301,
"preview": "---\ntitle: vLLM\n---\n\n# vLLM\n\n## Prerequisites\n\nThe Outlines `VLLM` model is intended to be used along with a vLLM instan"
},
{
"path": "docs/features/models/vllm_offline.md",
"chars": 7526,
"preview": "---\ntitle: vLLM Offline\n---\n\n# vLLM Offline\n\nOutlines provides an integration with [vLLM](https://docs.vllm.ai/en/latest"
},
{
"path": "docs/features/utility/application.md",
"chars": 2457,
"preview": "---\ntitle: Application\n---\n\n# Application\n\nThe `Application` class enables you to encapsulate a prompt template and an o"
},
{
"path": "docs/features/utility/regex_dsl.md",
"chars": 11048,
"preview": "---\ntitle: Regex DSL\n---\n\n# Regex DSL\n\nThis library provides a Domain-Specific Language (DSL) to construct regular expre"
},
{
"path": "docs/features/utility/template.md",
"chars": 2405,
"preview": "---\ntitle: Template\n---\n\n# Template\n\nOutlines templates provide a way of creating reusable prompt structures with placeh"
},
{
"path": "docs/guide/architecture.md",
"chars": 11565,
"preview": "# Architecture Overview\n\nThis guide explains how Outlines is organized so you can navigate the codebase, debug issues, a"
},
{
"path": "docs/guide/chat_templating.md",
"chars": 803,
"preview": "# Chat templating\n\nInstruction-tuned language models use \"special tokens\" to indicate different parts of text, such as t"
},
{
"path": "docs/guide/core_concepts.md",
"chars": 522,
"preview": "---\ntitle: Core concepts\n---\n\n# Core concepts\n\nComing soon. This will document various concepts at a high level, so user"
},
{
"path": "docs/guide/fastapi_vllm_deployment.md",
"chars": 8691,
"preview": "---\ntitle: Deploying with FastAPI\n---\n\n# Deploying with FastAPI\n\nThis guide demonstrates how to build a FastAPI applicat"
},
{
"path": "docs/guide/getting_started.md",
"chars": 10380,
"preview": "---\ntitle: Getting Started\n---\n\n# Getting Started\n\n## Installation\n\nWe recommend using `uv` to install Outlines. You can"
},
{
"path": "docs/guide/installation.md",
"chars": 2896,
"preview": "---\ntitle: Installation\n---\n\n# Installation\n\n## Dependency Management\n\nWe recommend using modern Python packaging tools "
},
{
"path": "docs/guide/migration.md",
"chars": 8260,
"preview": "# Outlines 1.0 migration guide\n\nOutlines 1.0 introduces some breaking changes that affect the way you use the library. Y"
},
{
"path": "docs/guide/selecting_an_inference_backend.md",
"chars": 157,
"preview": "This guide should provide a general overview of the available models in the [API reference](/api/models/).\n\n## Models\n\n-"
},
{
"path": "docs/guide/vlm.md",
"chars": 9327,
"preview": "# Vision-Language Models with Outlines\n\nThis guide demonstrates how to use Outlines with vision-language models. Vision-"
},
{
"path": "docs/index.md",
"chars": 7297,
"preview": "---\ntitle: Welcome to Outlines!\nhide:\n - navigation\n---\n\n#\n\n<figure markdown>\n and install the Beam SDK\n2. Download the `ap"
},
{
"path": "examples/beam-cloud/app.py",
"chars": 1196,
"preview": "from typing import Literal\n\nfrom beam import Image, endpoint, env\n\n\nif env.is_remote():\n import outlines\n\n\n# Pre-load"
},
{
"path": "examples/bentoml/.bentoignore",
"chars": 59,
"preview": "__pycache__/\n*.py[cod]\n*$py.class\n.ipynb_checkpoints\nvenv/\n"
},
{
"path": "examples/bentoml/bentofile.yaml",
"chars": 162,
"preview": "service: \"service:Outlines\"\nlabels:\n owner: bentoml-team\n stage: demo\ninclude:\n- \"*.py\"\npython:\n requirements_txt: \"."
},
{
"path": "examples/bentoml/import_model.py",
"chars": 686,
"preview": "import bentoml\n\nMODEL_ID = \"mistralai/Mistral-7B-v0.1\"\nBENTO_MODEL_TAG = MODEL_ID.lower().replace(\"/\", \"--\")\n\n\ndef impor"
},
{
"path": "examples/bentoml/requirements.txt",
"chars": 90,
"preview": "bentoml>=1.2.11\noutlines==0.0.37\ntransformers==4.38.2\ndatasets==2.18.0\naccelerate==0.27.2\n"
},
{
"path": "examples/bentoml/service.py",
"chars": 2101,
"preview": "import typing as t\n\nimport bentoml\nfrom import_model import BENTO_MODEL_TAG, MODEL_ID\n\nDEFAULT_SCHEMA = \"\"\"{\n \"title\""
},
{
"path": "examples/cerebrium/cerebrium.toml",
"chars": 473,
"preview": "[cerebrium.deployment]\nname = \"cerebrium\"\npython_version = \"3.11\"\ncuda_version = \"12\"\ninclude = \"[./*, main.py, cerebriu"
},
{
"path": "examples/cerebrium/main.py",
"chars": 1439,
"preview": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nimport outlines\n\n\nmodel = outlines.from_transformers(\n "
},
{
"path": "examples/dating_profile.py",
"chars": 7109,
"preview": "from dataclasses import dataclass\nfrom enum import Enum\n\nimport torch\nimport transformers\nfrom pydantic import BaseModel"
},
{
"path": "examples/llamacpp_example.py",
"chars": 1221,
"preview": "from enum import Enum\n\nfrom pydantic import BaseModel, constr\nfrom llama_cpp import Llama\n\nimport outlines\n\n\nclass Weapo"
},
{
"path": "examples/llamacpp_processor.py",
"chars": 1299,
"preview": "from enum import Enum\n\nfrom llama_cpp import Llama, LogitsProcessorList\nfrom pydantic import BaseModel, constr\n\nfrom out"
},
{
"path": "examples/math_generate_code.py",
"chars": 1506,
"preview": "\"\"\"Example from https://dust.tt/spolu/a/d12ac33169\"\"\"\n\nimport openai\n\nimport outlines\nfrom outlines import Template\n\n\nex"
},
{
"path": "examples/meta_prompting.py",
"chars": 4680,
"preview": "\"\"\"Meta-prompting examples.\n\nReferences\n----------\n\n.. [0] \"Prompting is programming: A Query Language for Large Languag"
},
{
"path": "examples/modal_example.py",
"chars": 2328,
"preview": "import modal\n\napp = modal.App(name=\"outlines-app\")\n\n\noutlines_image = modal.Image.debian_slim(python_version=\"3.11\").pip"
},
{
"path": "examples/pick_odd_one_out.py",
"chars": 1133,
"preview": "\"\"\"Chain-of-thought prompting for Odd one out classification.\n\nExample taken from the LQML library [1]_.\n\nReferences\n---"
},
{
"path": "examples/prompts/babyagi_create_task.txt",
"chars": 291,
"preview": "Objective: {{ objective }}\nCurrent Task: {{ task }}\nResult: {{ result }}\nPrevious Tasks: {{ previous_tasks }}\n\nBased on "
},
{
"path": "examples/prompts/babyagi_perform_task.txt",
"chars": 158,
"preview": "Objective: {{ objective }}\nTask: {{ task }}\n\nPlease perform the task and provide a concise result in the following forma"
},
{
"path": "examples/prompts/babyagi_prioritize_task.txt",
"chars": 266,
"preview": "Tasks: {{ tasks }}\nNext Task ID: {{ next_task_id }}\n\nPlease prioritize the tasks based on their importance and urgency t"
},
{
"path": "examples/prompts/dating_profile.txt",
"chars": 619,
"preview": "You are a world-renowned matchmaker who understands the modern dating market. Your job is to generate dating app profile"
},
{
"path": "examples/prompts/pick_odd_one_out.txt",
"chars": 361,
"preview": "Pick the odd word out: skirt, dress, pen, jacket.\nskirt is clothing, dress is clothing, pen is an object, jacket is clot"
},
{
"path": "examples/prompts/self_consistency.txt",
"chars": 114,
"preview": "{% for example in examples %}\nQ: {{ example.question }}\nA: {{ example.answer }}\n{% endfor %}\nQ: {{ question }}\nA:\n"
},
{
"path": "examples/react.py",
"chars": 3473,
"preview": "\"\"\"ReAct\n\nThis example was inspired by the LQML library [1]_. The ReAct framework was\nfirst developed in [2]_ and augmen"
},
{
"path": "examples/sampling.ipynb",
"chars": 33233,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"id\": \"62129e1a-e9de-454e-a714-35ccbcf0b518\",\n \""
},
{
"path": "examples/self_consistency.py",
"chars": 3871,
"preview": "import re\n\nimport numpy as np\nimport openai\n\nimport outlines\nfrom outlines import Template\n\nexamples = [\n {\n \""
},
{
"path": "examples/simulation_based_inference.ipynb",
"chars": 20118,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"e7c7d0bb-8d45-4139-a584-02c7196db92b\",\n \"metadata\": {},\n \"so"
},
{
"path": "examples/vllm_offline_integration.py",
"chars": 847,
"preview": "\"\"\"Example of integrating `outlines` with `vllm`.\"\"\"\n\nimport vllm\nfrom pydantic import BaseModel\nfrom transformers impor"
},
{
"path": "flake.nix",
"chars": 341,
"preview": "{\n inputs.flake-utils.url = \"github:numtide/flake-utils\";\n outputs = { self, nixpkgs, flake-utils }:\n flake-utils.l"
},
{
"path": "llm.txt",
"chars": 7683,
"preview": "# Outlines Codebase Reference\n\n## Overview\n\nOutlines is a library for structured generation for type-safe LLMs. It ensur"
},
{
"path": "mkdocs.yml",
"chars": 6963,
"preview": "# Site information\nsite_name: Outlines\nsite_author: The Outlines developers\nsite_description: >-\n Structured text gen"
},
{
"path": "outlines/__init__.py",
"chars": 986,
"preview": "\"\"\"Outlines is a Generative Model Programming Framework.\"\"\"\n\n# re-export on top-level namespace\nfrom outlines import gra"
},
{
"path": "outlines/applications.py",
"chars": 3269,
"preview": "\"\"\"Encapsulate a prompt template and an output type into a reusable object.\"\"\"\n\nfrom typing import Any, Callable, Dict, "
},
{
"path": "outlines/backends/__init__.py",
"chars": 3523,
"preview": "\"\"\"Module to define the backends in charge of creating logits processors.\"\"\"\n\nfrom outlines.backends.base import (\n B"
},
{
"path": "outlines/backends/base.py",
"chars": 1534,
"preview": "\"\"\"Base class for all backends.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any\n\n\nLogitsProcessorType = A"
},
{
"path": "outlines/backends/llguidance.py",
"chars": 9679,
"preview": "\"\"\"Backend class for LLGuidance.\"\"\"\n\nimport warnings\nfrom typing import TYPE_CHECKING\n\nfrom outlines.backends.base impor"
},
{
"path": "outlines/backends/outlines_core.py",
"chars": 10323,
"preview": "\"\"\"Backend class for Outlines Core.\"\"\"\n\nfrom typing import Callable, Dict, List\n\nfrom outlines_core import Guide, Index,"
},
{
"path": "outlines/backends/xgrammar.py",
"chars": 6795,
"preview": "\"\"\"Backend class for XGrammar.\"\"\"\n\nfrom outlines.backends.base import BaseBackend\nfrom outlines.models import SteerableM"
},
{
"path": "outlines/caching.py",
"chars": 6031,
"preview": "\"\"\"Caching and memoization of function calls.\"\"\"\n\nimport asyncio\nimport contextlib\nimport functools\nimport os\nimport tem"
},
{
"path": "outlines/generator.py",
"chars": 12133,
"preview": "\"\"\"Encapsulate a model and an output type into a reusable object.\"\"\"\n\nfrom typing import (\n Any,\n AsyncIterator,\n "
},
{
"path": "outlines/grammars/arithmetic.lark",
"chars": 293,
"preview": "?start: sum\n\n?sum: product\n| sum \"+\" product -> add\n| sum \"-\" product -> sub\n\n?product: atom\n| product \"*\" atom -> "
},
{
"path": "outlines/grammars/common.lark",
"chars": 2242,
"preview": "// Adapted from https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark\n\n// Lark License:\n// Copyright"
},
{
"path": "outlines/grammars/json.lark",
"chars": 373,
"preview": "?start: value\n\n?value: object\n| array\n| ESCAPED_STRING\n| SIGNED_NUMBER -> number\n| \"true\" -> true\n| \"fa"
},
{
"path": "outlines/grammars.py",
"chars": 728,
"preview": "\"\"\"A few common Lark grammars.\"\"\"\n\nfrom pathlib import Path\n\nGRAMMAR_PATH = Path(__file__).parent / \"grammars\"\n\n\ndef rea"
},
{
"path": "outlines/inputs.py",
"chars": 4922,
"preview": "\"\"\"Contain classes used to define the inputs of a model.\"\"\"\n\nimport base64\nfrom dataclasses import dataclass\nfrom io imp"
},
{
"path": "outlines/models/__init__.py",
"chars": 2229,
"preview": "\"\"\"Module that contains all the models integrated in outlines.\n\nWe group the models in submodules by provider instead of"
},
{
"path": "outlines/models/anthropic.py",
"chars": 7986,
"preview": "\"\"\"Integration with Anthropic's API.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKING, An"
},
{
"path": "outlines/models/base.py",
"chars": 17349,
"preview": "\"\"\"Base classes for all models and model type adapters.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import Any, "
},
{
"path": "outlines/models/dottxt.py",
"chars": 5673,
"preview": "\"\"\"Integration with Dottxt's API.\"\"\"\n\nfrom typing import TYPE_CHECKING, Any, Optional, cast\n\nfrom outlines.models.base i"
},
{
"path": "outlines/models/gemini.py",
"chars": 11284,
"preview": "\"\"\"Integration with Gemini's API.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n TYPE_CHECKING,"
},
{
"path": "outlines/models/llamacpp.py",
"chars": 13240,
"preview": "\"\"\"Integration with the `llama-cpp-python` library.\"\"\"\n\nimport ctypes\nfrom functools import singledispatchmethod\nfrom ty"
},
{
"path": "outlines/models/lmstudio.py",
"chars": 14196,
"preview": "\"\"\"Integration with the `lmstudio` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n TYPE"
},
{
"path": "outlines/models/mistral.py",
"chars": 18596,
"preview": "\"\"\"Integration with Mistral AI API.\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom typing import (\n "
},
{
"path": "outlines/models/mlxlm.py",
"chars": 8034,
"preview": "\"\"\"Integration with the `mlx_lm` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import TYPE_CHECKIN"
},
{
"path": "outlines/models/ollama.py",
"chars": 11545,
"preview": "\"\"\"Integration with the `ollama` library.\"\"\"\n\nfrom functools import singledispatchmethod\nfrom typing import (\n TYPE_C"
},
{
"path": "outlines/models/openai.py",
"chars": 17291,
"preview": "\"\"\"Integration with OpenAI's API.\"\"\"\n\nfrom typing import (\n TYPE_CHECKING,\n Any,\n AsyncIterator,\n Iterator,\n"
},
{
"path": "outlines/models/sglang.py",
"chars": 12176,
"preview": "\"\"\"Integration with an SGLang server.\"\"\"\n\nimport json\nimport warnings\nfrom typing import (\n TYPE_CHECKING, Any, Async"
},
{
"path": "outlines/models/tgi.py",
"chars": 9877,
"preview": "\"\"\"Integration with a TGI server.\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom typing import (\n TY"
},
{
"path": "outlines/models/tokenizer.py",
"chars": 1394,
"preview": "from typing import Dict, Hashable, List, Protocol, Set, Tuple, Union, TYPE_CHECKING\n\n\nif TYPE_CHECKING:\n import numpy"
},
{
"path": "outlines/models/transformers.py",
"chars": 25569,
"preview": "\"\"\"Integration with the `transformers` library. \"\"\"\n\nimport warnings\n\nfrom collections import defaultdict\nfrom functools"
},
{
"path": "outlines/models/utils.py",
"chars": 766,
"preview": "import jsonpath_ng\n\n\ndef set_additional_properties_false_json_schema(schema: dict) -> dict:\n \"\"\"Set additionalPropert"
},
{
"path": "outlines/models/vllm.py",
"chars": 11820,
"preview": "\"\"\"Integration with a vLLM server.\"\"\"\n\nimport json\nfrom typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, Optio"
},
{
"path": "outlines/models/vllm_offline.py",
"chars": 9569,
"preview": "\"\"\"Integration with the `vllm` library (offline mode).\"\"\"\n\nimport json\nfrom functools import singledispatchmethod\nfrom t"
},
{
"path": "outlines/processors/__init__.py",
"chars": 166,
"preview": "\"\"\"Processors to control generation in steerable models.\"\"\"\n\nfrom .base_logits_processor import OutlinesLogitsProcessor\n"
},
{
"path": "outlines/processors/base_logits_processor.py",
"chars": 5212,
"preview": "\"\"\"Base class for logits processors.\"\"\"\n\nfrom abc import abstractmethod\nfrom typing import TypeVar\n\nfrom outlines.proces"
},
{
"path": "outlines/processors/tensor_adapters/__init__.py",
"chars": 568,
"preview": "\"\"\"Library specific objects to manipulate tensors.\"\"\"\n\nfrom typing import Union\n\nfrom .mlx import MLXTensorAdapter\nfrom "
},
{
"path": "outlines/processors/tensor_adapters/base.py",
"chars": 6115,
"preview": "\"\"\"Base class for tensor adapters.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom typing import TYPE_CHECKING, TypeVar, An"
},
{
"path": "outlines/processors/tensor_adapters/mlx.py",
"chars": 1574,
"preview": "\"\"\"Tensor adapter for the `mlx` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclass "
},
{
"path": "outlines/processors/tensor_adapters/numpy.py",
"chars": 1239,
"preview": "\"\"\"Tensor adapter for the `numpy` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclas"
},
{
"path": "outlines/processors/tensor_adapters/torch.py",
"chars": 1218,
"preview": "\"\"\"Tensor adapter for the `torch` library.\"\"\"\n\nfrom outlines.processors.tensor_adapters.base import TensorAdapter\n\n\nclas"
},
{
"path": "outlines/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "outlines/release_note.md",
"chars": 16888,
"preview": "# Release Note\n\n### Why a new major version?\n\nThe v1 intends on making Outlines more closely focused on constrained gene"
},
{
"path": "outlines/templates.py",
"chars": 9485,
"preview": "\"\"\"Create templates to easily build prompts.\"\"\"\n\nimport functools\nimport inspect\nimport json\nimport os\nimport re\nimport "
},
{
"path": "outlines/types/__init__.py",
"chars": 4012,
"preview": "\"\"\"Output types for structured generation and regex DSL.\"\"\"\n\nfrom outlines.types.dsl import (\n CFG,\n Choice,\n J"
},
{
"path": "outlines/types/airports.py",
"chars": 240,
"preview": "\"\"\"Generate valid airport codes.\"\"\"\n\nfrom enum import Enum\n\nimport airportsdata\n\nAIRPORT_IATA_LIST = [\n (v[\"iata\"], v"
},
{
"path": "outlines/types/countries.py",
"chars": 1046,
"preview": "\"\"\"Generate valid country codes and names.\"\"\"\n\nfrom enum import Enum\n\nfrom iso3166 import countries\n\n\ndef get_country_fl"
},
{
"path": "outlines/types/dsl.py",
"chars": 28668,
"preview": "\"\"\"Regular expression DSL and output types for structured generation.\n\nThis module contains elements related to three lo"
},
{
"path": "outlines/types/json_schema_utils.py",
"chars": 4641,
"preview": "\"\"\"Convert JSON Schema dicts to Python types.\"\"\"\n\nimport sys\nfrom dataclasses import dataclass, field\nfrom typing import"
},
{
"path": "outlines/types/locale/__init__.py",
"chars": 81,
"preview": "\"\"\"Locale-specific regex patterns.\"\"\"\n\nfrom . import us\n\n__all__ = [\n \"us\",\n]\n"
},
{
"path": "outlines/types/locale/us.py",
"chars": 205,
"preview": "\"\"\"Locale-specific regex patterns for the United States.\"\"\"\n\nfrom outlines.types.dsl import Regex\n\nzip_code = Regex(r\"\\d"
},
{
"path": "outlines/types/utils.py",
"chars": 5277,
"preview": "\"\"\"Utility functions for the types module.\"\"\"\n\nimport dataclasses\nimport datetime\nimport inspect\nimport sys\nimport warni"
},
{
"path": "pyproject.toml",
"chars": 5115,
"preview": "[build-system]\nrequires = [\"setuptools>=45\", \"setuptools_scm[toml]>=6.2\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[proj"
},
{
"path": "requirements-doc.txt",
"chars": 245,
"preview": "mkdocs\nmkdocs-material\nmkdocs-material[imaging]\nmkdocs-mermaid2-plugin\nmkdocs-section-index\nmkdocstrings[python]\nmkdocs-"
},
{
"path": "scripts/gen_ref_pages.py",
"chars": 1858,
"preview": "\"\"\"Generate the API reference pages and navigation automatically.\n\nThis script is based on the `gen_ref_pages.py` script"
},
{
"path": "setup.cfg",
"chars": 179,
"preview": "[flake8]\nmax-line-length = 88\nselect = C,E,F,W\nignore = E203,E231,E501,E741,W503,W504,C901,E731\nper-file-ignores =\n *"
},
{
"path": "shell.nix",
"chars": 1896,
"preview": "{ pkgs ? import <nixpkgs> { config = { allowUnfree = true; }; } }:\n\n(pkgs.buildFHSEnv {\n name = \"dottxt-ai\";\n targetPk"
},
{
"path": "tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/backends/test_backends.py",
"chars": 3347,
"preview": "import outlines\nimport pytest\nimport transformers\n\nfrom outlines.backends import (\n _get_backend,\n get_json_schema"
},
{
"path": "tests/backends/test_backends_utils.py",
"chars": 2294,
"preview": "import torch\nimport numpy as np\n\n\ndef simulate_model_calling_processor(processor, tensor_library_name, vocabulary_size, "
},
{
"path": "tests/backends/test_llguidance.py",
"chars": 6269,
"preview": "import re\n\nimport llama_cpp\nimport llguidance\nimport pytest\nimport transformers\nfrom llguidance import LLTokenizer\n\nimpo"
},
{
"path": "tests/backends/test_outlines_core.py",
"chars": 5977,
"preview": "import re\n\nimport llama_cpp\nimport pytest\nimport transformers\nfrom outlines_core import Index, Vocabulary\n\nimport outlin"
},
{
"path": "tests/backends/test_xgrammar.py",
"chars": 5459,
"preview": "import re\n\nimport llama_cpp\nimport outlines\nimport pytest\nimport transformers\nfrom xgrammar import GrammarCompiler, Toke"
},
{
"path": "tests/cfg_samples/arithmetic/lots_of_ops.arithmetic.test",
"chars": 36,
"preview": "5+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1\n"
},
{
"path": "tests/cfg_samples/arithmetic/simple_math.arithmetic.test",
"chars": 27,
"preview": "(1 * 2) - (0.1 * 2 * 9.42)\n"
},
{
"path": "tests/cfg_samples/json/outlines.generate.samplers.mypy.json.test",
"chars": 13621,
"preview": "{\n \".class\": \"MypyFile\",\n \"_fullname\": \"outlines.generate.samplers\",\n \"future_import_flags\": [],\n \"is_partia"
},
{
"path": "tests/cfg_samples/json/simple_fruit.json.test",
"chars": 423,
"preview": "[\n {\n \"ID\": \"1\",\n \"Name\": \"Andrew \\\"The Escaper\\\" Lapp\",\n \"Age\": \"30\",\n \"FavFruit\": \"Bana"
},
{
"path": "tests/cfg_samples/json/simple_fruit_no_indent.json.test",
"chars": 198,
"preview": "[{\"ID\": \"1\", \"Name\": \"Andrew\", \"Age\": \"30\", \"FavFruit\": \"Banana\"}, {\"ID\": \"2\", \"Name\": \"Mohammad\", \"Age\": \"40\", \"FavFrui"
},
{
"path": "tests/conftest.py",
"chars": 619,
"preview": "import sys\n\nimport pytest\n\n\ndef pytest_collection_modifyitems(config, items):\n if sys.platform != \"linux\":\n if"
},
{
"path": "tests/models/test_anthopic_type_adapter.py",
"chars": 3291,
"preview": "import io\nimport pytest\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\nfrom outlines.inputs import"
},
{
"path": "tests/models/test_anthropic.py",
"chars": 3809,
"preview": "import io\nfrom typing import Generator\n\nfrom anthropic import Anthropic as AnthropicClient\nfrom PIL import Image as PILI"
},
{
"path": "tests/models/test_dottxt.py",
"chars": 3715,
"preview": "import json\nimport os\n\nimport pytest\nfrom dottxt.client import Dottxt as DottxtClient\nfrom pydantic import BaseModel\n\nim"
},
{
"path": "tests/models/test_dottxt_type_adapter.py",
"chars": 3816,
"preview": "import io\nimport json\nimport pytest\nimport sys\nfrom dataclasses import dataclass\n\nfrom PIL import Image as PILImage\nfrom"
},
{
"path": "tests/models/test_gemini.py",
"chars": 6729,
"preview": "import io\nimport json\nimport sys\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom typing import Generator, L"
},
{
"path": "tests/models/test_gemini_type_adapter.py",
"chars": 7911,
"preview": "import io\nimport pytest\nimport sys\nfrom dataclasses import dataclass\nfrom enum import Enum, EnumMeta\nfrom typing import "
},
{
"path": "tests/models/test_llamacpp.py",
"chars": 6234,
"preview": "import json\nfrom enum import Enum\n\nimport pytest\nfrom llama_cpp import Llama\nfrom pydantic import BaseModel\n\nfrom outlin"
},
{
"path": "tests/models/test_llamacpp_tokenizer.py",
"chars": 8396,
"preview": "import ctypes\n\nimport pytest\nimport sys\nfrom unittest.mock import MagicMock, patch\n\nimport llama_cpp\nimport transformers"
},
{
"path": "tests/models/test_llamacpp_type_adapter.py",
"chars": 2521,
"preview": "import pytest\nimport io\n\nfrom llama_cpp import LogitsProcessorList\nfrom PIL import Image as PILImage\nfrom outlines_core "
},
{
"path": "tests/models/test_lmstudio.py",
"chars": 11238,
"preview": "import io\nimport json\nimport os\nimport warnings\nfrom enum import Enum\nfrom typing import Annotated, AsyncGenerator, Gene"
},
{
"path": "tests/models/test_lmstudio_type_adapter.py",
"chars": 6127,
"preview": "import io\nimport json\nimport os\nimport sys\nfrom dataclasses import dataclass\n\nimport pytest\nfrom genson import SchemaBui"
}
]
// ... and 39 more files (download for full content)
About this extraction
This page contains the full source code of the dottxt-ai/outlines GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 239 files (1.2 MB), approximately 310.7k tokens, and a symbol index with 1469 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.