Full Code of guidance-ai/guidance for AI

main 172f1d4bf541 cached
280 files
29.2 MB
4.8M tokens
790 symbols
1 requests
Copy disabled (too large) Download .txt
Showing preview only (19,307K chars total). Download the full file to get everything.
Repository: guidance-ai/guidance
Branch: main
Commit: 172f1d4bf541
Files: 280
Total size: 29.2 MB

Directory structure:
gitextract_c7vutqob/

├── .git-blame-ignore-revs
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── bug_report.md
│   └── workflows/
│       ├── call_cpu_tests.yml
│       ├── call_gpu_tests.yml
│       ├── ci_credentials.yml
│       ├── ci_docs.yml
│       ├── ci_linux.yml
│       ├── ci_macos.yml
│       ├── ci_windows.yml
│       ├── code_quality.yml
│       ├── notebook_tests.yml
│       ├── pull_request.yml
│       ├── pypi_upload.yml
│       └── widget_build.yml
├── .gitignore
├── CONTRIBUTING.md
├── GOVERNANCE.md
├── LICENSE.md
├── MAINTAINERS.md
├── README.md
├── client/
│   └── graphpaper-inline/
│       ├── .gitignore
│       ├── build-to-guidance.sh
│       ├── dist/
│       │   └── .gitignore
│       ├── package.json
│       ├── postcss.config.js
│       ├── rollup.config.mjs
│       ├── src/
│       │   ├── App.svelte
│       │   ├── CustomAudio.svelte
│       │   ├── CustomVideo.svelte
│       │   ├── MetricRecord.svelte
│       │   ├── ResizeListener.svelte
│       │   ├── Select.svelte
│       │   ├── Sparkline.svelte
│       │   ├── StitchHandler.svelte
│       │   ├── TokenGrid.svelte
│       │   ├── TokenGridItem.svelte
│       │   ├── clickoutside.ts
│       │   ├── interfaces.ts
│       │   ├── longhover.ts
│       │   ├── main.css
│       │   ├── main.js
│       │   ├── metrics.ts
│       │   ├── mocks.ts
│       │   ├── stitch.ts
│       │   └── template.html
│       └── tsconfig.json
├── docs/
│   ├── .readthedocs.yaml
│   ├── Makefile
│   ├── _static/
│   │   └── css/
│   │       └── styles.css
│   ├── api.rst
│   ├── api_examples.rst
│   ├── art_of_prompt_design.rst
│   ├── conf.py
│   ├── index.rst
│   ├── make.bat
│   └── tutorials.rst
├── guidance/
│   ├── __init__.py
│   ├── _ast.py
│   ├── _bg/
│   │   └── __init__.py
│   ├── _grammar.py
│   ├── _guidance.py
│   ├── _guidance.pyi
│   ├── _parser.py
│   ├── _schema.py
│   ├── _tools.py
│   ├── _topics.py
│   ├── _utils.py
│   ├── chat.py
│   ├── debug.py
│   ├── library/
│   │   ├── __init__.py
│   │   ├── _audio.py
│   │   ├── _block.py
│   │   ├── _capture.py
│   │   ├── _ebnf.py
│   │   ├── _gen.py
│   │   ├── _image.py
│   │   ├── _json.py
│   │   ├── _optional.py
│   │   ├── _pydantic.py
│   │   ├── _role.py
│   │   ├── _sequences.py
│   │   ├── _subgrammar.py
│   │   ├── _substring.py
│   │   └── _video.py
│   ├── metrics/
│   │   ├── __init__.py
│   │   └── _metrics.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _azureai.py
│   │   ├── _base/
│   │   │   ├── __init__.py
│   │   │   ├── _interpreter.py
│   │   │   ├── _model.py
│   │   │   └── _state.py
│   │   ├── _byte_tokenizer.py
│   │   ├── _engine/
│   │   │   ├── __init__.py
│   │   │   ├── _engine.py
│   │   │   ├── _interpreter.py
│   │   │   ├── _state.py
│   │   │   └── _tokenizer.py
│   │   ├── _llama_cpp.py
│   │   ├── _mock.py
│   │   ├── _onnxruntime.py
│   │   ├── _openai.py
│   │   ├── _openai_base.py
│   │   ├── _transformers.py
│   │   ├── broken_models/
│   │   │   ├── README.MD
│   │   │   ├── _Gemini.py
│   │   │   ├── _anthropic.py
│   │   │   ├── _azure_openai.py
│   │   │   ├── _azureai_studio.py
│   │   │   ├── _cohere.py
│   │   │   ├── _googleai.py
│   │   │   ├── _lite_llm.py
│   │   │   ├── _togetherai.py
│   │   │   └── _vertexai.py
│   │   └── experimental/
│   │       ├── __init__.py
│   │       ├── _litellm.py
│   │       ├── _sglang.py
│   │       └── _vllm.py
│   ├── py.typed
│   ├── registry/
│   │   ├── __init__.py
│   │   └── _registry.py
│   ├── resources/
│   │   └── graphpaper-inline.html
│   ├── trace/
│   │   ├── __init__.py
│   │   └── _trace.py
│   └── visual/
│       ├── __init__.py
│       ├── _environment.py
│       ├── _exchange.py
│       ├── _jupyter.py
│       ├── _message.py
│       ├── _renderer.py
│       └── _trace.py
├── notebooks/
│   ├── anachronism.ipynb
│   ├── api_examples/
│   │   ├── library/
│   │   │   └── gen.ipynb
│   │   └── models/
│   │       ├── AzureOpenAI.ipynb
│   │       ├── OpenAI.ipynb
│   │       └── TogetherAI.ipynb
│   ├── art_of_prompt_design/
│   │   ├── prompt_boundaries_and_token_healing.ipynb
│   │   ├── rag.ipynb
│   │   ├── react.ipynb
│   │   ├── tool_use.ipynb
│   │   └── use_clear_syntax.ipynb
│   ├── chatgpt_vs_open_source_on_harder_tasks.ipynb
│   ├── engine_chat_completion.ipynb
│   ├── guaranteeing_valid_syntax.ipynb
│   ├── proverb.ipynb
│   ├── testing_lms.ipynb
│   ├── tutorials/
│   │   ├── adding_new_models.ipynb
│   │   ├── chat.ipynb
│   │   ├── code_generation.ipynb
│   │   ├── guidance_acceleration.ipynb
│   │   ├── intro_to_guidance.ipynb
│   │   ├── litellm_models.ipynb
│   │   ├── onnxruntime_models.ipynb
│   │   ├── regex_constraints.ipynb
│   │   ├── token_healing.ipynb
│   │   └── tool_calling.ipynb
│   └── unstable/
│       └── .gitignore
├── packages/
│   └── python/
│       └── stitch/
│           ├── .coveragerc
│           ├── .eslintignore
│           ├── .eslintrc.js
│           ├── .github/
│           │   └── workflows/
│           │       └── build.yml
│           ├── .gitignore
│           ├── .npmignore
│           ├── .prettierignore
│           ├── .prettierrc
│           ├── .yarnrc.yml
│           ├── LICENSE.txt
│           ├── MANIFEST.in
│           ├── README.md
│           ├── babel.config.js
│           ├── codecov.yml
│           ├── css/
│           │   └── widget.css
│           ├── docs/
│           │   ├── Makefile
│           │   ├── environment.yml
│           │   ├── make.bat
│           │   └── source/
│           │       ├── _static/
│           │       │   └── helper.js
│           │       ├── conf.py
│           │       ├── develop-install.rst
│           │       ├── examples/
│           │       │   ├── index.rst
│           │       │   └── introduction.nblink
│           │       ├── index.rst
│           │       ├── installing.rst
│           │       └── introduction.rst
│           ├── examples/
│           │   └── introduction.ipynb
│           ├── install.json
│           ├── jest.config.js
│           ├── package.json
│           ├── pyproject.toml
│           ├── pytest.ini
│           ├── readthedocs.yml
│           ├── setup.py
│           ├── src/
│           │   ├── __tests__/
│           │   │   ├── index.spec.ts
│           │   │   └── utils.ts
│           │   ├── extension.ts
│           │   ├── index.ts
│           │   ├── plugin.ts
│           │   ├── version.ts
│           │   └── widget.ts
│           ├── stitch/
│           │   ├── __init__.py
│           │   ├── _frontend.py
│           │   ├── _version.py
│           │   ├── nbextension/
│           │   │   └── extension.js
│           │   ├── stitch.py
│           │   └── tests/
│           │       ├── __init__.py
│           │       ├── conftest.py
│           │       ├── test_example.py
│           │       └── test_nbextension_path.py
│           ├── stitch.json
│           ├── tsconfig.eslint.json
│           ├── tsconfig.json
│           └── webpack.config.js
├── pyproject.toml
├── ruff.toml
├── scripts/
│   └── extract_python_from_readme.py
└── tests/
    ├── ReadMe.md
    ├── __init__.py
    ├── conftest.py
    ├── model_integration/
    │   ├── __init__.py
    │   ├── library/
    │   │   ├── test_gen.py
    │   │   ├── test_json.py
    │   │   ├── test_subgrammar.py
    │   │   └── test_substring.py
    │   ├── test_engine.py
    │   ├── test_grammar.py
    │   ├── test_model.py
    │   └── test_tokenizers.py
    ├── model_specific/
    │   ├── __init__.py
    │   ├── common_chat_testing.py
    │   ├── llama_cpp_tests/
    │   │   ├── __init__.py
    │   │   ├── test_chat_templates.py
    │   │   ├── test_llama_cpp.py
    │   │   └── test_think.py
    │   ├── test_onnxruntime_genai.py
    │   ├── test_transformers.py
    │   └── test_visual.py
    ├── need_credentials/
    │   ├── __init__.py
    │   ├── test_anthropic.py
    │   ├── test_azureai_openai.py
    │   ├── test_azureai_studio.py
    │   ├── test_chat_templates.py
    │   ├── test_cohere.py
    │   ├── test_googleai.py
    │   ├── test_lite_llm.py
    │   ├── test_openai.py
    │   ├── test_togetherai.py
    │   ├── test_tokenizers.py
    │   └── test_vertexai.py
    ├── notebooks/
    │   ├── __init__.py
    │   ├── nb_test_utils.py
    │   ├── test_notebooks.py
    │   └── test_notebooks_azure.py
    ├── tokenizer_common.py
    ├── unit/
    │   ├── __init__.py
    │   ├── library/
    │   │   ├── __init__.py
    │   │   ├── json/
    │   │   │   ├── __init__.py
    │   │   │   ├── test_allOf.py
    │   │   │   ├── test_json.py
    │   │   │   ├── test_refs.py
    │   │   │   ├── test_string_format.py
    │   │   │   └── utils.py
    │   │   ├── test_block.py
    │   │   ├── test_capture.py
    │   │   ├── test_gen.py
    │   │   ├── test_image.py
    │   │   ├── test_one_or_more.py
    │   │   ├── test_pydantic.py
    │   │   ├── test_regex.py
    │   │   ├── test_sequences.py
    │   │   ├── test_special.py
    │   │   ├── test_subgrammar.py
    │   │   └── test_substring.py
    │   ├── test_ast.py
    │   ├── test_decorator.py
    │   ├── test_grammar.py
    │   ├── test_ll.py
    │   ├── test_model.py
    │   ├── test_parser.py
    │   ├── test_trace.py
    │   └── test_visual.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .git-blame-ignore-revs
================================================
# .git-blame-ignore-revs
# Ran black on major files to standardize codebase
57da386795bc94a34275b333da586f171f96d7c8
# Ran black on tests and other ancillary python files in code
083fb9877b507ed27136441c683ce051edf37e81
# Ran ruff on the codebase to standardize formatting
95b1bbe75b049c9d3bdc0346d09116b76f736472
# ruff check --select UP007,UP045 --fix
18ac24dbeeb09cf2ff0b9dc9825e506b3dbfa89c


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

**The bug**
A clear and concise description of what the bug is.

**To Reproduce**
Give a full working code snippet that can be pasted into a notebook cell or python file. Make sure to include the LLM load step so we know which model you are using.
```python
# put your code snippet here
```

**System info (please complete the following information):**
 - OS (e.g. Ubuntu, Windows 11, Mac OS, etc.):
 - Guidance Version (`guidance.__version__`):


================================================
FILE: .github/workflows/call_cpu_tests.yml
================================================
name: call_cpu_tests

on:
  workflow_call:
    inputs:
      os:
        required: true
        type: string
      python-version:
        required: true
        type: string
      model:
        required: true
        type: string
      codeCovPython:
        required: true
        type: string
        default: "3.12"
    secrets:
      HF_TOKEN:
        required: false
      CODECOV_TOKEN:
        required: false
  workflow_dispatch:
    inputs:
      os:
        required: false
        type: string
        default: "Large_Linux" # can instead use "Large_Windows" or the default OSes like "macos-latest"
      python-version:
        required: false
        type: string
        default: "3.12"
      model:
        required: false
        type: string
        default: "transformers_gpt2_cpu" # also try "llamacpp_llama2_7b_cpu", etc
      codeCovPython:
        required: true
        type: string
        default: "3.12"
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string

jobs:
  cpu_tests:
    runs-on: ${{ inputs.os }}
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python ${{ inputs.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ inputs.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Install guidance and dependencies
        shell: bash
        if: ${{ inputs.python-version != '3.14' }}
        run: |
          uv pip install --system -e .[llamacpp,transformers,onnxruntime-genai,test]
          uv pip install --system accelerate  # required if using smaller quantizations
      - name: Install guidance and dependencies (3.14/ONNX workaround)
        # https://github.com/microsoft/onnxruntime/issues/26547
        shell: bash
        if: ${{ inputs.python-version == '3.14' }}
        run: |
          uv pip install --system -e .[llamacpp,transformers,test]
          uv pip install --system accelerate  # required if using smaller quantizations
      - name: cpu_tests for ${{ inputs.model }}
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
            --selected_model ${{ inputs.model }} \
            ./tests/model_integration ./tests/model_specific
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ (inputs.codeCovPython == inputs.python-version) }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/call_gpu_tests.yml
================================================
name: call_gpu_tests

on:
  workflow_call:
    inputs:
      os:
        required: true
        type: string
      python-version:
        required: true
        type: string
      model:
        required: true
        type: string
      codeCovPython:
        required: true
        type: string
        default: "3.12"
    secrets:
      HF_TOKEN:
        required: false
      CODECOV_TOKEN:
        required: false
  workflow_dispatch:
    inputs:
      os:
        required: false
        type: string
        default: "gpu-runner"
      python-version:
        required: false
        type: string
        default: "3.12"
      model:
        required: false
        type: string
        default: "llamacpp_llama2_7b_gpu" # also try "transformers_gpt2_gpu", "transformers_phi2_gpu", etc
      codeCovPython:
        required: true
        type: string
        default: "3.12"
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string

jobs:
  gpu_tests:
    runs-on: ${{ inputs.os }}
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python ${{ inputs.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ inputs.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Install NVIDIA SDK
        shell: bash
        run: |
          nvidia-smi
          sudo apt-get --yes update
          sudo apt-get --yes install cuda-toolkit-12.6
          echo "/usr/local/cuda-12.6/bin" >> $GITHUB_PATH
      - name: Install other packages
        shell: bash
        run: |
          uv pip install --system accelerate gpustat
      - name: Install guidance in ${{ inputs.os }}
        shell: bash
        if: ${{ inputs.python-version != '3.14' }}
        run: |
          CMAKE_ARGS="-DGGML_CUDA=on" uv pip install --system -e .[llamacpp,transformers,onnxruntime-genai,test]
      - name: Install guidance in ${{ inputs.os }} (3.14/ONNX workaround)
        # https://github.com/microsoft/onnxruntime/issues/26547
        shell: bash
        if: ${{ inputs.python-version == '3.14' }}
        run: |
          CMAKE_ARGS="-DGGML_CUDA=on" uv pip install --system -e .[llamacpp,transformers,test]
      - name: Check GPU available
        shell: bash
        run: |
          python -c "import torch; assert torch.cuda.is_available()"
      - name: gpu_tests for ${{ inputs.model }}
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
            --selected_model ${{ inputs.model }} \
            ./tests/model_integration ./tests/model_specific
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ (inputs.codeCovPython == inputs.python-version) }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/ci_credentials.yml
================================================
# These access secrets, so should only be run on local branches.

name: CI Tests - Credentialed
permissions:
  contents: read


on:
  push:
    branches:
      - main
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 09:00 UTC every day
    - cron:  '00 09 * * *'

jobs:
  credentialed_tests:
    runs-on: ubuntu-latest
    environment: test
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
    permissions:
      id-token: write  # for Azure CLI login
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Install guidance
        run: |
          uv pip install --system -e .[all,test]
      - name: Model tests
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          # Configure OpenAI
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          # Configure environment for Azure AI Studio
          AZUREAI_STUDIO_PHI4_ENDPOINT: ${{ vars.AZUREAI_STUDIO_PHI4_ENDPOINT }}
          AZUREAI_STUDIO_PHI4_MODEL_NAME: ${{ vars.AZUREAI_STUDIO_PHI4_MODEL_NAME }}
          AZUREAI_STUDIO_PHI4_KEY: ${{ secrets.AZUREAI_STUDIO_PHI4_KEY }}
          # Do not configure the environment for Azure OpenAI, so those tests will
          # be skipped. GitHub cannot authenticate.
        run: |
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
            ./tests/need_credentials
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ (vars.CODECOV_PYTHON == matrix.python-version) }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/ci_docs.yml
================================================
# These access secrets, so should only be run on local branches.

name: CI Tests - Docs
permissions:
  contents: read


on:
  push:
    branches:
      - main
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 07:00 UTC every day
    - cron:  '00 07 * * *'

jobs:
  check_ReadMe:
    runs-on: Large_Linux
    environment: test
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.14"]
    steps:
    - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      uses: actions/checkout@v6
      with:
        ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python-version }}
    - name: Set up uv
      uses: astral-sh/setup-uv@v7
    - name: Install guidance
      run: |
        uv pip install --system -e .[all,test]
    - name: Extract Python code
      run: python ./scripts/extract_python_from_readme.py --input_file ./README.md --output_file ./readme.py
    - name: Run extracted Python
      run: python ./readme.py

================================================
FILE: .github/workflows/ci_linux.yml
================================================
# CI Tests which run on Linux machines

# These access secrets, so should only be run on local branches.

# Ideally, the CI tests would be a single workflow, but several issues
# (especially varied OS support) mean that it is hard to keep a single
# workflow green.

name: CI Tests - Linux
permissions:
  contents: read

on:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 09:30 UTC every day
    - cron:  '30 09 * * *'


jobs:
  cpu_small:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        model:
          - "transformers_gpt2_cpu"
          - "llamacpp_llama3.2_3b_cpu"
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: Large_Linux
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}
      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

  cpu_big:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        model:
          - "transformers_llama3_8b_cpu"
          - "transformers_phi4_mini_cpu"
          - "onnxruntime_phi4_mini_instruct"
        exclude:
          - model: "onnxruntime_phi4_mini_instruct"
            python-version: "3.14"  # Waiting for ONNX update
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: Large_Linux
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}
      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

  gpu_tests:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        model:
          - "transformers_gpt2_gpu"
          - "transformers_phi4_mini_gpu"
          - "onnxruntime_phi4_mini_instruct"
        exclude:
          # https://github.com/microsoft/onnxruntime/issues/26547
          - model: "onnxruntime_phi4_mini_instruct"
            python-version: "3.14"  # Waiting for ONNX update
    uses: ./.github/workflows/call_gpu_tests.yml
    with:
      os: "gpu-runner"
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}
      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/ci_macos.yml
================================================
# CI Tests which run on MacOS machines

# These access secrets, so should only be run on local branches.

# Ideally, the CI tests would be a single workflow, but several issues
# (especially varied OS support) mean that it is hard to keep a single
# workflow green.

# MacOS has been a particular trouble due to the small disk space
# allocations on all the VMs, leading to the --selected_model
# machinery

name: CI Tests - MacOS
permissions:
  contents: read

on:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 09:10 UTC every day
    - cron:  '10 09 * * *'

jobs:
  cpu_small:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13"]
        model:
          - "transformers_gpt2_cpu"
          - "llamacpp_llama3.2_3b_cpu"
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: "macos-latest"
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}

================================================
FILE: .github/workflows/ci_windows.yml
================================================
# CI Tests which run on Windows machines

# These access secrets, so should only be run on local branches.

# Ideally, the CI tests would be a single workflow, but several issues
# (especially varied OS support) mean that it is hard to keep a single
# workflow green. If there is one OS likely to lag slightly in support
# it is Windows

name: CI Tests - Windows
permissions:
  contents: read

on:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 09:30 UTC every day
    - cron:  '30 09 * * *'

jobs:
  cpu_small:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        model:
          - "transformers_gpt2_cpu"
          - "llamacpp_llama3.2_3b_cpu"
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: "Large_Windows"
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}

  cpu_big:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
        model:
          - "transformers_llama3_8b_cpu"
          - "transformers_phi4_mini_cpu"
          - "onnxruntime_phi4_mini_instruct"
        exclude:
          # https://github.com/microsoft/onnxruntime/issues/26547
          - model: "onnxruntime_phi4_mini_instruct"
            python-version: "3.14"  # Waiting for ONNX update
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: "Large_Windows"
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}
    secrets:
      HF_TOKEN: ${{ secrets.HF_TOKEN }}


================================================
FILE: .github/workflows/code_quality.yml
================================================
name: Code Quality

env:
  PYTHON_VERSION: "3.12"

on:
  pull_request:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # Run at 10:00 UTC every day
    - cron: "00 10 * * *"


jobs:
  format_ruff:
    name: Check format with ruff
    runs-on: ubuntu-latest
    permissions:
      checks: write
    steps:
      - name: Check out repo ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Do dev install
        run: uv pip install --system -e .[dev]
      - name: Check format with ruff
        shell: bash
        id: check_format
        continue-on-error: true
        run: |
          if ! ruff format --check; then
            echo "::warning title=ruff format::Files need re-formatting (run 'ruff format .' locally)"
            exit 78  # no longer works in github, but would mark action step with a warning
          fi
      - name: Check imports with ruff
        shell: bash
        id: check_import
        continue-on-error: true
        run: |
          # This is separate from formatting. See:
          # https://docs.astral.sh/ruff/formatter/#sorting-imports
          if ! ruff check --select I,RUF022; then
            echo "::warning title=ruff import::Files need import sorting (run 'ruff check --select I,RUF022 --fix' locally to auto-fix)"
            exit 78  # no longer works in github, but would mark action step with a warning
          fi
      - name: Mark step with a warning
        if: ${{ steps.check_format.outcome == 'failure' || steps.check_import.outcome == 'failure' }} 
        uses: actions/github-script@v8
        with:
          script: |
            await github.rest.checks.create({
              owner: context.repo.owner,
              repo: context.repo.repo,
              name: 'Failed ruff checks',
              head_sha: context.sha,
              status: 'completed',
              conclusion: 'neutral',
              completed_at: new Date().toISOString(),
              output: {
                title: 'ruff found violations',
                summary: 'Run `ruff format . and `ruff check --select I,RUF022 --fix` locally and push the changes.'
              }
            })

  # Have a separate workflow because we don't want to enforce this at all
  # It will have too many errors initially and is likely to deter contributors
  ruff-linting:
    name: Linting with ruff
    runs-on: ubuntu-latest
    permissions:
      checks: read
    steps:
      - name: Check out repo ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Do dev install
        run: uv pip install --system -e .[dev]
      - name: Run ruff linting
        shell: bash
        continue-on-error: true
        run: |
          ruff check


  run-mypy:
    name: Run informational mypy
    runs-on: ubuntu-latest
    permissions:
      checks: read
    steps:
      - name: Check out repo ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Do guidance install
        run: uv pip install --system -e .[all,dev]
      - name: Get mypy type packages
        continue-on-error: true
        run: mypy --install-types --non-interactive guidance
      - name: Run mypy
        shell: bash
        continue-on-error: true
        run: |
          mypy guidance

          echo "==========================================="
          echo "Done"

================================================
FILE: .github/workflows/notebook_tests.yml
================================================
# These access secrets, so should only be run on local branches.

# Not part of the regular CI run, since notebook tests seem
# particularly flaky

name: CI Tests - Notebook

on:
  push:
    branches:
      - main
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # * is a special character in YAML so we quote this string
    # Run at 10:00 UTC every day
    - cron:  '00 10 * * *'

jobs:
  notebook_tests:
    runs-on: "Large_Linux"
    environment: test
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
    permissions:
      id-token: write  # for Azure CLI login
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Install guidance
        shell: bash
        run: |
          uv pip install --system -e .[all,llamacpp,test]
      - name: Install gpustat
        shell: bash
        run: |
          uv pip install --system gpustat
      - name: Notebook tests
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          # Configure OpenAI
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          # Run the non-AOAI notebooks
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing --cov-append \
            ./tests/notebooks/test_notebooks.py
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ (vars.CODECOV_PYTHON == matrix.python-version) }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/pull_request.yml
================================================
name: Pull Request

on:
  pull_request:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # Run at 09:00 UTC every day
    - cron: "00 09 * * *"

jobs:
  unit_tests:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
    runs-on: ${{ matrix.os }}
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - name: Minimal install
        run: |
          uv pip install --system -e .
      - name: Attempt import
        run: |
          python -c "import guidance"
      - name: Bigger install
        run: |
          uv pip install --system -e .[test-unit]
      - name: Unit Tests
        shell: bash
        run: |
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
            ./tests/unit
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        if: ${{ (vars.CODECOV_PYTHON == matrix.python-version) }}
        with:
          token: ${{ secrets.CODECOV_TOKEN }}

  cpu_tests:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        os: ["Large_Linux"]  # , "Large_Windows"]
        python-version: ["3.10", "3.14"]
        model:
          - "transformers_gpt2_cpu"
    uses: ./.github/workflows/call_cpu_tests.yml
    with:
      os: ${{ matrix.os }}
      python-version: ${{ matrix.python-version }}
      model: ${{ matrix.model }}
      codeCovPython: ${{ vars.CODECOV_PYTHON }}

  # gpu_tests:
  #   strategy:
  #     fail-fast: false # Don't cancel all on first failure
  #     matrix:
  #       os: ["gpu-runner"]
  #       python-version: ["3.10", "3.13"]
  #       model:
  #         - "transformers_gpt2_gpu"
  #         - "llamacpp_llama2_7b_gpu"
  #   uses: ./.github/workflows/call_gpu_tests.yml
  #   with:
  #     os: ${{ matrix.os }}
  #     python-version: ${{ matrix.python-version }}
  #     model: ${{ matrix.model }}


================================================
FILE: .github/workflows/pypi_upload.yml
================================================
name: Build wheels

on:
  release:
    types: [published]
  workflow_dispatch:  # Enable manual run
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string

jobs:
  build_wheels:
    name: Build wheel distribution
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python 3.11
        uses: actions/setup-python@v6
        with:
          python-version: '3.11'
      - name: Set up uv
        uses: astral-sh/setup-uv@v7

      - name: Build bdist
        run: |
          uv build --wheel

      - name: Upload bdist
        uses: actions/upload-artifact@v7
        with:
          name: bdist_files
          path: dist/*.whl

  build_sdist:
    name: Build source distribution
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - name: Set up Python 3.11
        uses: actions/setup-python@v6
        with:
          python-version: '3.11'
      - name: Set up uv
        uses: astral-sh/setup-uv@v7

      - name: Build sdist (pep517)
        run: |
          uv build --sdist

      - name: Upload sdist
        uses: actions/upload-artifact@v7
        with:
          name: sdist_files
          path: dist/*.tar.gz


  assemble_wheels:
    name: Combine wheels
    needs: [build_wheels, build_sdist]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v8
        with:
          name: sdist_files
          path: dist
      
      - uses: actions/download-artifact@v8
        with:
          name: bdist_files
          path: dist

      - uses: actions/upload-artifact@v7
        with:
          path: ./dist/*
          name: collected_dist_files

  test_wheels:
    name: Test Wheels
    needs: [assemble_wheels]
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-14, macos-latest]
        python-version: ["3.11", "3.12"]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/download-artifact@v8
        with:
          name: collected_dist_files
          path: wheelhouse
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
      - name: Set up uv
        uses: astral-sh/setup-uv@v7
      - run: uv pip install --system guidance -f ./wheelhouse/
        name: Install guidance from wheels
      - run: uv pip install --system transformers torch
        name: Other installs
      # - run: python -c "import guidance; import transformers; lm = guidance.models.Transformers('gpt2'); lm += '1,2,3,4,5,' + guidance.gen('num', max_tokens=5, temperature=0); print(f'\n Transformers Version:{transformers.__version__}\n\n{str(lm)=}\n'); assert lm['num'].startswith('6')"
      #   name: Run smoke test

  publish_wheels:
    permissions:
      id-token: write
    name: Publish wheels on pypi
    needs: [test_wheels]
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v8
        with:
          name: collected_dist_files
          path: dist

      - name: Publish package to PyPI
        uses: pypa/gh-action-pypi-publish@v1
        if: startsWith(github.ref, 'refs/tags')
        with:
          user: __token__
          password: ${{ secrets.PYPI_API_TOKEN }}


================================================
FILE: .github/workflows/widget_build.yml
================================================
name: Widget Build
permissions:
  contents: read

on:
  pull_request:
  workflow_dispatch:
    inputs:
      commit_id:
        description: 'Branch or Commit ID (optional)'
        required: false
        type: string
  schedule:
    # Run at 10:00 UTC every day
    - cron: "00 10 * * *"

jobs:
  build_widget:
    defaults:
      run:
        shell: bash
        working-directory: ./client/graphpaper-inline
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo at ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
        uses: actions/checkout@v6
        with:
          ref: ${{ github.event_name == 'workflow_dispatch' && inputs.commit_id || github.sha }}
      - uses: actions/setup-node@v6
        with:
          node-version: 18
      - name: Install dependencies
        run: |
          npm install
      - name: Build
        run: |
          ./build-to-guidance.sh
        

================================================
FILE: .gitignore
================================================
notebooks/local_scratch
__pycache__/
.vscode
.vs
.idea/
/build
/dist
*.egg-info
*.diskcache
.ipynb_checkpoints
node_modules
.eggs/
.env
.DS_Store
venv/
.venv/

# Ignore native library built by setup
guidance/*.so
guidance/_rust/*.so
guidance/_rust/target/
guidance/_rust/Cargo.lock
*.pyd

notebooks/**/*.papermill_out.ipynb

.mypy_cache/*

**/scratch.*

# Claude Code generated files
CLAUDE.md


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

This Project welcomes contributions, suggestions, and feedback. All contributions, suggestions, and feedback you submitted are accepted under the [Project's license](./LICENSE.md). You represent that if you do not own copyright in the code that you have the authority to submit it under the [Project's license](./LICENSE.md). All feedback, suggestions, or contributions are not confidential.

The Project abides by the Organization's [code of conduct](https://github.com/guidance-ai/governance/blob/main/CODE-OF-CONDUCT.md) and [trademark policy](https://github.com/guidance-ai/governance/blob/main/TRADEMARKS.md).

# Development Notes

We welcome contributions to `guidance`, and this document exists to provide useful information contributors.

## Developer Setup

Start by creating a fresh environment with something similar to:
```bash
conda create --name guidancedev python=3.12
conda activate guidancedev
```

Install guidance (without CUDA):
```bash
python -m pip install -e .[all,test,llamacpp,transformers]
```

Alternatively, install guidance with CUDA support. There are various ways to do this. We recommend:
```bash
conda install pytorch pytorch-cuda=12.1 -c pytorch -c nvidia
CMAKE_ARGS="-DGGML_CUDA=on" python -m pip install -e .[all,test,llamacpp,transformers]
```

## Running Tests

To run a basic test suite locally:
```bash
python -m pytest ./tests/
```
which runs our basic test suite.
Where an LLM is required, this will default to using GPT2 on the CPU.

To change that default, run
```bash
python -m pytest --selected_model <MODELNAME> ./tests/
```
where `<MODELNAME>` is taken from one of the selected_model_name options defined in `./tests/conftest.py`.

Alternatively, the default value for `--selected_model` can be set via the `GUIDANCE_SELECTED_MODEL` environment variable.
This may be useful when trying to use a debugger when running `pytest`, and setting the extra command line argument in the debugger configuration is tricky.
Just remember that the environment variable needs to be set _before_ starting PyCharm/VSCode etc.

## Adding LLMs to the test matrix

Our tests run on a variety of LLMs.
These fall into three categories: CPU-based, GPU-based and endpoint-based (which need credentials).

### New CPU or GPU-based models

Due to the limited resources of the regular GitHub runner machines, the LLM under test is a dimension of our test matrix (otherwise the GitHub runners will tend to run out of RAM and/or hard drive space).
New models should be configured in `conftest.py`.
The model will then be available via the `selected_model` fixture for all tests.
If you have a test which should only run for particular models, you can use the `selected_model_name` fixture to check, and call `pytest.skip()` if necessary.
An example of this is given in `test_llama_cpp.py`.

### New endpoint based models

If your model requires credentials, then those will need to be added to our GitHub repository as secrets.
The endpoint itself (and any other required information) should be configured as environment variables too.
When the test runs, the environment variables will be set, and can then be used to configure the model as required.
See `test_azureai_openai.py` for examples of this being done.

## Formatting & Linting

We use `ruff` to format our codebase.
To install the correct version, run `pip install -e .[dev]`.
You can then run `ruff format /path/to/modified/file.py` to format the code.
The path can also be an entire directory, or omitted entirely to format all files beneath the current directory.
There are (rare) cases where manual formatting is preferable; for these [`ruff` provides pragmas for suppression](https://docs.astral.sh/ruff/formatter/#format-suppression).
To sort imports, use `ruff check --select I /path/to/modified/file.py --fix`.
These commands are run (but not enforced *yet*) in the build.



---
Part of MVG-0.1-beta.
Made with love by GitHub. Licensed under the [CC-BY 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/).


================================================
FILE: GOVERNANCE.md
================================================
# Governance Policy

This document provides the governance policy for the Project. Maintainers agree to this policy and to abide by all Project polices, including the [code of conduct](https://github.com/guidance-ai/governance/blob/main/CODE-OF-CONDUCT.md), [trademark policy](https://github.com/guidance-ai/governance/blob/main//TRADEMARKS.md), and [antitrust policy](https://github.com/guidance-ai/governance/blob/main/ANTITRUST.md) by adding their name to the [maintainers.md file](./MAINTAINERS.md).

## 1. Roles.

This project may include the following roles. Additional roles may be adopted and documented by the Project.

**1.1. Maintainers**. Maintainers are responsible for organizing activities around developing, maintaining, and updating the Project. Maintainers are also responsible for determining consensus. This Project may add or remove Maintainers with the approval of the current Maintainers.

**1.2. Contributors**. Contributors are those that have made contributions to the Project.

## 2. Decisions.

**2.1. Consensus-Based Decision Making**. Projects make decisions through consensus of the Maintainers. While explicit agreement of all Maintainers is preferred, it is not required for consensus. Rather, the Maintainers will determine consensus based on their good faith consideration of a number of factors, including the dominant view of the Contributors and nature of support and objections. The Maintainers will document evidence of consensus in accordance with these requirements.

**2.2. Appeal Process**. Decisions may be appealed by opening an issue and that appeal will be considered by the Maintainers in good faith, who will respond in writing within a reasonable time. If the Maintainers deny the appeal, the appeal may be brought before the Organization Steering Committee, who will also respond in writing in a reasonable time.

## 3. How We Work.

**3.1. Openness**. Participation is open to anyone who is directly and materially affected by the activity in question. There shall be no undue financial barriers to participation.

**3.2. Balance**. The development process should balance the interests of Contributors and other stakeholders. Contributors from diverse interest categories shall be sought with the objective of achieving balance.

**3.3. Coordination and Harmonization**. Good faith efforts shall be made to resolve potential conflicts or incompatibility between releases in this Project.

**3.4. Consideration of Views and Objections**. Prompt consideration shall be given to the written views and objections of all Contributors.

**3.5. Written procedures**. This governance document and other materials documenting this project's development process shall be available to any interested person.

## 4. No Confidentiality.

Information disclosed in connection with any Project activity, including but not limited to meetings, contributions, and submissions, is not confidential, regardless of any markings or statements to the contrary.

## 5. Trademarks.

Any names, trademarks, logos, or goodwill developed by and associated with the Project (the "Marks") are controlled by the Organization. Maintainers may only use these Marks in accordance with the Organization's trademark policy. If a Maintainer resigns or is removed, any rights the Maintainer may have in the Marks revert to the Organization.

## 6. Amendments.

Amendments to this governance policy may be made by affirmative vote of 2/3 of all Maintainers, with approval by the Organization's Steering Committee.

---
Part of MVG-0.1-beta.
Made with love by GitHub. Licensed under the [CC-BY 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/).


================================================
FILE: LICENSE.md
================================================
MIT License

Copyright (c) The Guidance Contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: MAINTAINERS.md
================================================
# Maintainers

This document lists the Maintainers of the Project. Maintainers may be added once approved by the existing maintainers as described in the [Governance document](./GOVERNANCE.md). By adding your name to this list you are agreeing to abide by the Project governance documents and to abide by all of the Organization's polices, including the [code of conduct](https://github.com/guidance-ai/governance/blob/main/CODE-OF-CONDUCT.md), [trademark policy](https://github.com/guidance-ai/governance/blob/main/TRADEMARKS.md), and [antitrust policy](https://github.com/guidance-ai/governance/blob/main/ANTITRUST.md). If you are participating because of your affiliation with another organization (designated below), you represent that you have the authority to bind that organization to these policies.

| **NAME** | **Handle** | **Affiliated Organization** |
| --- | --- | --- |
| Scott Lundberg | [slundberg](https://github.com/slundberg) | |
| Harsha Nori | [Harsha-Nori](https://github.com/Harsha-Nori) | Microsoft |
| Marco Tulio Ribeiro | [marcotcr](https://github.com/marcotcr) | Google |

---
Part of MVG-0.1-beta.
Made with love by GitHub. Licensed under the [CC-BY 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/).


================================================
FILE: README.md
================================================
<div align="right">
  <a href="https://discord.gg/cjPfAK43dz"><img src="https://img.shields.io/badge/Discord-Join%20Us-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
  <a href="mailto:guidanceai@microsoft.com"><img src="https://img.shields.io/badge/Email-guidanceai%40microsoft.com-0078D4?logo=microsoft-outlook&logoColor=white" alt="Email"></a>
  <img src="https://img.shields.io/badge/Hours-10am--2pm%20Pacific-gray" alt="Hours">
</div>
<div align="center"><picture>
  <source media="(prefers-color-scheme: dark)" srcset="docs/figures/guidance_logo_blue_dark.svg">
  <img alt="guidance" src="docs/figures/guidance_logo_blue.svg" width=300">
</picture></div>
<br/>

**Guidance is an efficient programming paradigm for steering language models.** With Guidance, you can control how output is structured and get high-quality output for your use case—*while reducing latency and cost vs. conventional prompting or fine-tuning.* It allows users to constrain generation (e.g. with regex and CFGs) as well as to interleave control (conditionals, loops, tool use) and generation seamlessly.

   * [Install](#install)
   * [Features](#features)


## Install
Guidance is available through PyPI and supports a variety of backends (Transformers, llama.cpp, OpenAI, etc.).
If you already have the backend required for your model, you can simply run
```bash
pip install guidance
```

## Features

### A Pythonic interface for language models

When using Guidance, you can work with large language models using common Python idioms:

```python
from guidance import system, user, assistant, gen
from guidance.models import Transformers

# Could also do LlamaCpp or many other models
phi_lm = Transformers("microsoft/Phi-4-mini-instruct")

# Model objects are immutable, so this is a copy
lm = phi_lm

with system():
    lm += "You are a helpful assistant"

with user():
    lm += "Hello. What is your name?"

with assistant():
    lm += gen(max_tokens=20)

print(lm)
```
If run at the command line, this will produce output like:
```
<|system|>You are a helpful assistant<|end|><|user|>Hello. What is your name?<|end|><|assistant|>I am Phi, an AI developed by Microsoft. How can I help you today?
```
However, if running in a Jupyter notebook, then Guidance provides a widget for a richer user experience:

<img src="docs/figures/widget_basic_example_20250703.png" alt="Guidance widget showing HTML generation" />

With Guidance, it's really easy to capture generated text:

```python
# Get a new copy of the Model
lm = phi_lm

with system():
    lm += "You are a helpful assistant"

with user():
    lm += "Hello. What is your name?"

with assistant():
    lm += gen(name="lm_response", max_tokens=20)

print(f"{lm['lm_response']=}")
```

```
lm['lm_response']='I am Phi, an AI developed by Microsoft. How can I help you today?'
```

### Guarantee output syntax with constrained generation

Guidance provides an easy to use, yet immensely powerful syntax for constraining the output of a language model.
For example, a `gen()` call can be constrained to match a regular expression:

```python
lm = phi_lm

with system():
    lm += "You are a teenager"

with user():
    lm += "How old are you?"

with assistant():
    lm += gen("lm_age", regex=r"\d+", temperature=0.8)

print(f"The language model is {lm['lm_age']} years old")
```

```
The language model is 13 years old
```

Often, we know that the output has to be an item from a list we know in advance.
Guidance provides a `select()` function for this scenario:

```python
from guidance import select

lm = phi_lm

with system():
    lm += "You are a geography expert"

with user():
    lm += """What is the capital of Sweden? Answer with the correct letter.

    A) Helsinki
    B) Reykjavík 
    C) Stockholm
    D) Oslo
    """

with assistant():
    lm += select(["A", "B", "C", "D"], name="model_selection")

print(f"The model selected {lm['model_selection']}")
```

```
The model selected C
```

The constraint system offered by Guidance is extremely powerful.
It can ensure that the output conforms to any context free grammar (so long as the backend LLM has full support for Guidance).
More on this below.

### Debug grammars offline (no model API calls)

When iterating on constraints, you can validate candidate strings locally and test a full run with the `Mock` model.

```python
from guidance import gen
from guidance.models import Mock

grammar = "expr=" + gen(regex=r"\d+([+*]\d+)*", name="expr")

# 1) Validate strings directly against the grammar
assert grammar.match("expr=12+7*3") is not None
assert grammar.match("expr=12+*3") is None

# 2) Run the same grammar with a local mock model
lm = Mock(b"<s>expr=12+7*3")
lm += grammar
print(lm["expr"])  # 12+7*3
```

### Create your own Guidance functions

With Guidance, you can create your own Guidance functions which can interact with language models.
These are marked using the `@guidance` decorator.
Suppose we wanted to answer lots of multiple choice questions.
We could do something like the following:

```python
import guidance

from guidance.models import Model

ASCII_OFFSET = ord("a")

@guidance
def zero_shot_multiple_choice(
    language_model: Model,
    question: str,
    choices: list[str],
):
    with user():
        language_model += question + "\n"
        for i, choice in enumerate(choices):
            language_model += f"{chr(i+ASCII_OFFSET)} : {choice}\n"

    with assistant():
        language_model += select(
            [chr(i + ASCII_OFFSET) for i in range(len(choices))], name="string_choice"
        )

    return language_model
```
Now, define some questions:
```python
questions = [
    {
        "question" : "Which state has the northernmost capital?",
        "choices" : [
            "New South Wales",
            "Northern Territory",
            "Queensland",
            "South Australia",
            "Tasmania",
            "Victoria",
            "Western Australia",
        ],
        "answer" : 1,
    },
    {
        "question" : "Which of the following is venomous?",
        "choices" : [
            "Kangaroo",
            "Koala Bear",
            "Platypus",
        ],
        "answer" : 2,
    }
]
```
We can use our decorated function like `gen()` or `select()`.
The `language_model` argument will be filled in for us automatically:
```python
lm = phi_lm

with system():
    lm += "You are a student taking a multiple choice test."

for mcq in questions:
    lm_temp = lm + zero_shot_multiple_choice(question=mcq["question"], choices=mcq["choices"])
    converted_answer = ord(lm_temp["string_choice"]) - ASCII_OFFSET
    print(lm_temp)
    print(f"LM Answer: {converted_answer},  Correct Answer: {mcq['answer']}")
```

```
<|system|>You are a student taking a multiple choice test.<|end|><|user|>Which state has the northernmost capital?
a : New South Wales
b : Northern Territory
c : Queensland
d : South Australia
e : Tasmania
f : Victoria
g : Western Australia
<|end|><|assistant|>b
LM Answer: 1,  Correct Answer: 1
<|system|>You are a student taking a multiple choice test.<|end|><|user|>Which of the following is venomous?
a : Kangaroo
b : Koala Bear
c : Platypus
<|end|><|assistant|>c
LM Answer: 2,  Correct Answer: 2
```

Guidance functions can be composed, in order to construct a full context free grammar.
For example, we can create Guidance functions to build a simple HTML webpage (note that this is _not_ a full implementation of HTML).
We start with a simple function which will generate text which does not contain any HTML tags.
The function is marked as `stateless` to indicate that we intend to use it for composing a grammar:

```python
@guidance(stateless=True)
def _gen_text(lm: Model):
    return lm + gen(regex="[^<>]+") 
```

We can then use this function to generate text within an arbitrary HTML tag:
```python
@guidance(stateless=True)
def _gen_text_in_tag(lm: Model, tag: str):
    lm += f"<{tag}>"
    lm += _gen_text()
    lm += f"</{tag}>"
    return lm
```
Now, let us create the page header. As part of this, we need to generate a page title:
```python
@guidance(stateless=True)
def _gen_header(lm: Model):
    lm += "<head>\n"
    lm += _gen_text_in_tag("title") + "\n"
    lm += "</head>\n"
    return lm
```
The body of the HTML page is going to be filled with headings and paragraphs.
We can define a function to do each:
```python
from guidance.library import one_or_more

@guidance(stateless=True)
def _gen_heading(lm: Model):
    lm += select(
        options=[_gen_text_in_tag("h1"), _gen_text_in_tag("h2"), _gen_text_in_tag("h3")]
    )
    lm += "\n"
    return lm

@guidance(stateless=True)
def _gen_para(lm: Model):
    lm += "<p>"
    lm += one_or_more(
        select(
            options=[
                _gen_text(),
                _gen_text_in_tag("em"),
                _gen_text_in_tag("strong"),
                "<br />",
            ],
        )
    )
    lm += "</p>\n"
    return lm
```
Now, the function to define the body of the HTML itself:
```python
@guidance(stateless=True)
def _gen_body(lm: Model):
    lm += "<body>\n"
    lm += one_or_more(select(options=[_gen_heading(), one_or_more(_gen_para())]))
    lm += "</body>\n"
    return lm
```
Next, we come to the function which generates the complete HTML page.
We add the HTML start tag, then generate the header, then body, and then append the ending HTML tag:
```python
@guidance(stateless=True)
def _gen_html(lm: Model):
    lm += "<html>\n"
    lm += _gen_header()
    lm += _gen_body()
    lm += "</html>\n"
    return lm
```
Finally, we provide a user-friendly wrapper, which will allow us to:
- Set the temperature of the generation
- Capture the generated page from the Model object
```python
from guidance.library import capture, with_temperature

@guidance(stateless=True)
def make_html(
    lm,
    name: str | None = None,
    *,
    temperature: float = 0.0,
):
    return lm + capture(
        with_temperature(_gen_html(), temperature=temperature),
        name=name,
    )
```
Now, use this to generate a simple webpage:
```python
lm = phi_lm

with system():
    lm += "You are an expert in HTML"

with user():
    lm += "Create a simple and short web page about your life story."

with assistant():
    lm += make_html(name="html_text", temperature=0.7)
```

When running in a Jupyter Notebook so that the widget is active, we get the following output:

<img src="docs/figures/widget_make_html_20250703.png" alt="Guidance widget showing HTML generation with token fast-forwarding" />

Note the varying highlighting of the generation.
This is showing another of Guidance's capabilities: fast-forwarding of tokens.
The constraints imposed by a grammar often mean that some tokens are known in advance.
Guidance doesn't need the model to generate these; instead it can insert them into the generation.
This saves forward passes through the model, and hence reduces GPU usage.
For example, in the above HTML generation, Guidance always knows the last opening tag.
If the last opened tag was `<h1>` (for example), then as soon as the model generates `</`, Guidance can fill in `h1>` without needing the model to perform a forward pass.

### Generating JSON

A JSON schema is actually a context free grammar, and hence it can be used to constrain an LLM using Guidance.
This is a common enough case that Guidance provides special support for it.
A quick sample, based on a Pydantic model:
```python
import json
from pydantic import BaseModel, Field

from guidance import json as gen_json

class BloodPressure(BaseModel):
    systolic: int = Field(gt=300, le=400)
    diastolic: int = Field(gt=0, le=20)
    location: str = Field(max_length=50)
    model_config = dict(extra="forbid")

lm = phi_lm

with system():
    lm += "You are a doctor taking a patient's blood pressure taken from their arm"

with user():
    lm += "Report the blood pressure"

with assistant():
    lm += gen_json(name="bp", schema=BloodPressure)

print(f"{lm['bp']=}")

# Use Python's JSON library
loaded_json = json.loads(lm["bp"])
print(json.dumps(loaded_json, indent=4))

# Use Pydantic
result = BloodPressure.model_validate_json(lm["bp"])
print(result.model_dump_json(indent=8))
```
```
lm['bp']='{"systolic": 301, "diastolic": 15, "location": "arm"}'
{
    "systolic": 301,
    "diastolic": 15,
    "location": "arm"
}
{
        "systolic": 301,
        "diastolic": 15,
        "location": "arm"
}
```
Note that the generated blood pressure is not one the model will have seen for a human.
When generating JSON, a substantial number of tokens can often be fast-forwarded, due to the structural constraints imposed by the schema.


================================================
FILE: client/graphpaper-inline/.gitignore
================================================
node_modules/
build/
.DS_Store
test-results/
playwright-report/

================================================
FILE: client/graphpaper-inline/build-to-guidance.sh
================================================
#!/bin/bash
set -x

npm run build
cp dist/index.html ../../guidance/resources/graphpaper-inline.html


================================================
FILE: client/graphpaper-inline/dist/.gitignore
================================================
*
!.gitignore

================================================
FILE: client/graphpaper-inline/package.json
================================================
{
    "name": "graphpaper",
    "version": "0.0.1",
    "scripts": {
        "build": "rollup -c",
        "dev": "rollup -c -w",
        "start": "sirv dist --port 3000"
    },
    "devDependencies": {
        "@rollup/plugin-commonjs": "^26.0.1",
        "@rollup/plugin-node-resolve": "^15.2.3",
        "@rollup/plugin-terser": "^0.4.4",
        "@rollup/plugin-typescript": "^11.1.6",
        "@tailwindcss/postcss": "^4.1.8",
        "@types/d3-scale": "^4.0.8",
        "@types/d3-scale-chromatic": "^3.0.3",
        "@types/dompurify": "^3.0.5",
        "@types/video.js": "^7.3.58",
        "autoprefixer": "^10.4.20",
        "cssnano": "^7.0.5",
        "postcss": "^8.4.41",
        "rollup": "^4.21.0",
        "rollup-plugin-copy": "^3.5.0",
        "rollup-plugin-html-bundle": "^0.0.3",
        "rollup-plugin-livereload": "^2.0.5",
        "rollup-plugin-postcss": "^4.0.2",
        "rollup-plugin-serve": "^1.1.1",
        "rollup-plugin-svelte": "^7.2.2",
        "sirv-cli": "^2.0.2",
        "svelte": "^4.2.18",
        "svelte-preprocess": "^6.0.2",
        "tailwindcss": "^4.1.11",
        "tslib": "^2.6.3",
        "typescript": "^5.5.4"
    },
    "dependencies": {
        "d3-interpolate": "^3.0.1",
        "d3-scale": "^4.0.2",
        "d3-scale-chromatic": "^3.1.0",
        "dompurify": "^3.1.7",
        "tailwind-scrollbar": "^4.0.2",
        "video.js": "^8.21.0"
    }
}


================================================
FILE: client/graphpaper-inline/postcss.config.js
================================================
module.exports = {
    plugins: {
        '@tailwindcss/postcss': {},
        autoprefixer: {},
        cssnano: { preset: 'default' }
    }
}

================================================
FILE: client/graphpaper-inline/rollup.config.mjs
================================================
import svelte from 'rollup-plugin-svelte';
import { sveltePreprocess } from 'svelte-preprocess';
import resolve from '@rollup/plugin-node-resolve';
import commonjs from '@rollup/plugin-commonjs';
import terser from '@rollup/plugin-terser';
import typescript from '@rollup/plugin-typescript';
import postcss from 'rollup-plugin-postcss';
import livereload from 'rollup-plugin-livereload';
// @ts-ignore
import serve from 'rollup-plugin-serve';
// @ts-ignore
import htmlBundle from 'rollup-plugin-html-bundle';
import copy from 'rollup-plugin-copy';

const production = !process.env.ROLLUP_WATCH;

export default [{
	input: 'src/main.js',
	output: {
		format: 'iife',
		name: 'app',
		file: 'build/bundle.js',
		sourcemap: !production,
	},
	plugins: [
		typescript(),
		svelte({
			compilerOptions: {
				dev: !production
			},
			preprocess: sveltePreprocess()
		}),
		resolve({
			browser: true,
			dedupe: importee => importee === 'svelte' || importee.startsWith('svelte/'),
			extensions: ['.svelte', '.mjs', '.ts', '.js', '.json', '.node']
		}),
		commonjs(),
		postcss(),
		copy({
			targets: [
				{ src: 'src/template.html', dest: 'build' }
			]
		}),
		htmlBundle({
			template: 'build/template.html',
			target: production ? 'dist/index.html' : 'build/index.html',
			targetElement: 'body',
			inline: production
		}),
		!production && serve('build'),
		!production && livereload('build'),
		production && terser()
	],
	watch: {
		clearScreen: false
	}
}];

================================================
FILE: client/graphpaper-inline/src/App.svelte
================================================
<!-- Main app that handles token rendering and metrics.

This has bidirectional communication between the guidance server (usually Jupyter kernel) and client.
For upcoming features, we won't be able to send all details over the wire, and will need to operate on client request.
-->
<script lang="ts">
  import './main.css';
  import TokenGrid from './TokenGrid.svelte';
  import ResizeListener from './ResizeListener.svelte';
  import {
    clientmsg,
    type GuidanceMessage,
    isAudioOutput,
    isBacktrack,
    isClientReadyAckMessage,
    isExecutionCompletedMessage,
    isExecutionStartedMessage,
    isImageOutput,
    isMetricMessage, isOutputRequestAckMessage,
    isResetDisplayMessage,
    isRoleCloserInput,
    isRoleOpenerInput,
    isTextOutput,
    isTokenOutput,
    isTraceMessage,
    isVideoOutput,
    kernelmsg,
    type NodeAttr,
    state,
    Status,
    type StitchMessage
  } from './stitch';
  import StitchHandler from './StitchHandler.svelte';
  import { onMount } from 'svelte';
  import MetricRecord from './MetricRecord.svelte';
  import Select from './Select.svelte';
  import { metricDefs } from './metrics';
  import type { MetricVal } from './interfaces';
  // import { mockNodeAttrs } from './mocks';

  let isDarkMode = false;
  
  interface AppState {
    components: Array<NodeAttr>,
    status: Status,
    metrics: Record<string, MetricVal>,
    shownMetrics: Array<string>,
    requireFullReplay: boolean,
    currentMessageId: number,
    backtrackCount: number,
    resetCount: number,
  }
  let appState: AppState = {
    components: [],
    status: Status.Running,
    shownMetrics: [],
    metrics: {
      'status': Status.Running,
      'wall time': 0,
      'consumed': 0,
      'token reduction': 0,
      'avg latency': 0,
      'cpu': [0., 0., 0., 0., 0.],
      'gpu': [0., 0., 0., 0., 0.],
      'ram': 0,
      'vram': 0,
    },
    requireFullReplay: true,
    currentMessageId: -1,
    backtrackCount: 0,
    resetCount: 0,
  };

  let bgField: string = 'Type';
  let underlineField: string = 'Probability';

  const handleMessage = (msg: GuidanceMessage): void => {
    // console.log("Received GuidanceMessage:", msg);

    // Duplicates can randomly occur from ipywidget layer.
    if (appState.currentMessageId === msg.message_id) {
      console.log(`Duplicate message detected: ${msg.message_id}`)
      return;
    } else {
      appState.currentMessageId = msg.message_id;
    }

    if (isTraceMessage(msg)) {
      if (isTokenOutput(msg.node_attr)) {
        // console.log(msg.node_attr);
        appState.components = [...appState.components, msg.node_attr];
      } else if (isTextOutput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isRoleOpenerInput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isRoleCloserInput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isAudioOutput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isImageOutput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isVideoOutput(msg.node_attr)) {
        appState.components = [...appState.components, msg.node_attr];
      } else if (isBacktrack(msg.node_attr)) {
        let numBacktrack = msg.node_attr.n_tokens;
        console.log(`Backtracking ${numBacktrack} tokens.`);
        appState.components = appState.components.slice(0, -numBacktrack);
        appState.backtrackCount += 1;
      } else {
        // console.log("Unknown trace msg node_attr: ", msg)
      }
    } else if (isExecutionStartedMessage(msg)) {
      appState.requireFullReplay = false;
    } else if (isOutputRequestAckMessage(msg)) {
      appState.requireFullReplay = false;
    } else if (isClientReadyAckMessage(msg)) {
      // Do nothing -- server will handle replay.
    } else if (isResetDisplayMessage(msg)) {
      appState.components = [];
      appState.status = appState.status !== Status.Error ? Status.Running : appState.status;
      appState.backtrackCount = 0;
      appState.resetCount += 1;
    } else if (isMetricMessage(msg)) {
      const name = msg.name;
      const value = msg.value;

      if (name in appState.metrics && name in metricDefs) {
        let currVal = appState.metrics[name];
        const metricDef = metricDefs[name];
        if (metricDef.isScalar === false) {
          if (value.constructor === Array) {
            appState.metrics[name] = value;
          } else {
            currVal = currVal as Array<any>;
            appState.metrics[name] = [...currVal.slice(1), value as string | number];
          }
        } else if (metricDef.isScalar === true) {
          appState.metrics[name] = value;
        } else {
          console.error(`Cannot handle metric: ${name}: ${value}.`);
        }

        // NOTE(nopdive): Need to update status too.
        if (name === 'status') {
          appState.status = value as Status;
        }
      }
    } else if (isExecutionCompletedMessage(msg)) {
      appState.status = Status.Done;

      // Good time to save state.
      const savedState = JSON.stringify(appState);
      const stateMessage: StitchMessage = {
        type: 'state',
        content: savedState,
      };
      state.set(stateMessage);

      // console.log(appState.components);
    }

    // Force reactivity update
    appState = { ...appState };
  };

  $: if ($state !== undefined && $state.content !== '') {
    // console.log("Client state received.")
    appState = JSON.parse($state.content);
  }
  $: if ($kernelmsg !== undefined && $kernelmsg.content !== '') {
    const msg = JSON.parse($kernelmsg.content);
    handleMessage(msg);
  }
  $: {
    if (appState.status === Status.Running) {
      appState.shownMetrics = [
        'status',
        // 'wall time',
        'cpu',
        'ram',
        'gpu',
        'vram',
      ];
    } else {
      appState.shownMetrics = [
        'status',
        'consumed',
        'token reduction',
        'avg latency',
        // 'wall time',
      ];
    }
  }

  let requestOutputIfNoMessages = () => {
    if (appState.components.length === 0) {
      console.log("No messages received: requesting output.")
      const msg: StitchMessage = {
        type: 'clientmsg',
        content: JSON.stringify({ 'class_name': 'OutputRequestMessage', 'identifier': '' })
      };
      clientmsg.set(msg);
    }
  };

  onMount(() => {
    const msg: StitchMessage = {
      type: 'init_stitch',
      content: ''
    };
    clientmsg.set(msg);

    setTimeout(requestOutputIfNoMessages, 200 * 2);

    // Listen for theme messages from parent
    const handleThemeMessage = (event: MessageEvent) => {
      if (event.data?.type === 'theme' && event.data?.theme === 'dark') {
        isDarkMode = true;
        document.documentElement.classList.add('dark');
        console.log('[Guidance Widget] ✅ Dark mode applied via postMessage');
      }
    };
    
    window.addEventListener('message', handleThemeMessage);
    
    return () => {
      window.removeEventListener('message', handleThemeMessage);
    };
  });
</script>

<svelte:head>
  <title>graphpaper</title>
  <meta name="description" content="graphpaper" />
</svelte:head>

<StitchHandler />
<ResizeListener />
<div class="w-full">
  <nav class="sticky top-0 z-50 bg-white dark:bg-gray-900">
    <section class="">
      <div class="text-sm pt-2 pb-2 flex justify-between border-b border-gray-200 dark:border-gray-700">
        <!-- Controls -->
        <span class="flex mr-2">
          <Select values={["None", "Type", "Probability", "Latency (ms)"]} classes="ml-4 pl-1 bg-gray-200 dark:bg-transparent"
                  defaultValue={"Type"}
                  on:select={(selected) => bgField = selected.detail} />
          <Select values={["None", "Probability", "Latency (ms)"]} classes="border-b-2 pl-1 border-gray-400 dark:border-gray-500 bg-transparent dark:bg-transparent"
                  defaultValue={"Probability"} on:select={(selected) => underlineField = selected.detail} />
        </span>
        <!-- Metrics -->
        <!-- Commenting out scrollbar until it seems like it's actually necessary. Shows up all the time and looks ugly on website. -->
        <!-- <span class="flex mr-4 text-gray-300 dark:text-gray-400 overflow-x-scroll scrollbar-thin scrollbar-track-gray-100 dark:scrollbar-track-gray-800 scrollbar-thumb-gray-200 dark:scrollbar-thumb-gray-700"> -->
        <span class="flex mr-4 text-gray-300 dark:text-gray-400">
          {#each appState.shownMetrics as name}
            <MetricRecord value={appState.metrics[name]} metricDef={metricDefs[name]} />
          {/each}
        </span>
      </div>
    </section>
  </nav>

  <!-- Content pane -->
  <section class="w-full min-h-40">
    <TokenGrid components={appState.components}
               isCompleted={['Done', 'Error'].includes(appState.status)}
               isError={appState.status === Status.Error}
               bgField={bgField} underlineField={underlineField} requireFullReplay="{appState.requireFullReplay}"
               backtrackCount={appState.backtrackCount}
               resetCount={appState.resetCount}
               isDarkMode={isDarkMode} />
  </section>
</div>


================================================
FILE: client/graphpaper-inline/src/CustomAudio.svelte
================================================
<script lang="ts">
  import { onMount } from "svelte";
  import type { MediaNode } from "./interfaces";

  // Add proper TypeScript types
  export let audioData: MediaNode; // Base64 data (without the data URL header)

  let audio: HTMLAudioElement;
  let isPlaying: boolean = false;
  let progress: number = 0;
  let duration: number = 0;
  let currentTime: number = 0;
  let volume: number = 1;
  let isMuted: boolean = false;
  let showVolumeSlider: boolean = false;
  let waveformCanvas: HTMLCanvasElement;

  // Store waveform data globally so we don't have to recompute it
  let waveformData: any[] = [];
  let maxAmp = 0;

  // Decode the audio, downsample it, and draw the waveform onto the canvas.
  // Track mouse position for optional hover preview
  let hoverPosition = -1;

  function togglePlay() {
    if (audio.paused) {
      audio.play();
      isPlaying = true;
    } else {
      audio.pause();
      isPlaying = false;
    }
  }

  function seek(event: MouseEvent) {
    const container = event.currentTarget as HTMLElement;
    if (container == null) {
      console.error("Null seek event target");
      return;
    }
    const seekPosition =
      (event.offsetX / container.offsetWidth) * audio.duration;
    audio.currentTime = seekPosition;
  }

  function changeVolume(event: Event) {
    const target = event.target as HTMLInputElement;
    if (target == null) {
      console.error("Null change volume event target");
      return;
    }
    volume = parseFloat(target.value);

    // If we're adjusting volume, we're unmuting
    if (isMuted && volume > 0) {
      isMuted = false;
    }

    // Apply volume or mute
    audio.volume = isMuted ? 0 : volume;
  }

  $: if (audio) {
    // Reactively update audio volume when isMuted changes
    audio.volume = isMuted ? 0 : volume;
  }

  function formatTime(seconds: number) {
    const min = Math.floor(seconds / 60);
    const sec = Math.floor(seconds % 60);
    return `${min}:${sec < 10 ? "0" : ""}${sec}`;
  }

  // Helper: convert base64 string to ArrayBuffer
  function base64ToArrayBuffer(base64: string) {
    const binaryString = atob(base64);
    const len = binaryString.length;
    const bytes = new Uint8Array(len);
    for (let i = 0; i < len; i++) {
      bytes[i] = binaryString.charCodeAt(i);
    }
    return bytes.buffer;
  }

  function setHoverPosition(event: MouseEvent) {
    const container = event.currentTarget as HTMLElement;
    if (container == null) {
      console.error("Null hover event target");
      return;
    }
    hoverPosition = (event.offsetX / container.offsetWidth) * 100;
  }

  function clearHoverPosition() {
    hoverPosition = -1;
  }

  // Add these new variables
  let staticWaveformCanvas: HTMLCanvasElement;
  let hasRenderedStatic = false;
  let animationFrameId: number | null = null;

  // Modified drawWaveform function
  async function drawWaveform() {
    if (!audioData || !waveformCanvas) return;

    const canvas = waveformCanvas;
    canvas.width = canvas.clientWidth;
    canvas.height = canvas.clientHeight;
    const width = canvas.width;
    const height = canvas.height;
    const ctx = canvas.getContext("2d");
    if (ctx == null) return;

    // Keep existing waveform data computation code
    if (waveformData.length === 0) {
      const audioContext = new AudioContext();
      const arrayBuffer = base64ToArrayBuffer(audioData.value);
      try {
        const decodedData = await audioContext.decodeAudioData(arrayBuffer);
        const rawData = decodedData.getChannelData(0); // use first channel

        // Downsample the raw data to one value per pixel
        const samples = width;
        const blockSize = Math.floor(rawData.length / samples);
        waveformData = new Array(samples);
        for (let i = 0; i < samples; i++) {
          let sum = 0;
          for (let j = 0; j < blockSize; j++) {
            sum += Math.abs(rawData[i * blockSize + j]);
          }
          waveformData[i] = sum / blockSize;
        }

        // Find maximum amplitude for normalization
        maxAmp = Math.max(...waveformData);
        if (maxAmp === 0) maxAmp = 1; // Prevent division by zero
      } catch (error) {
        console.error("Error decoding audio for waveform:", error);
        return;
      }
    }

    // Create static canvas for unplayed portions if needed
    if (!hasRenderedStatic) {
      staticWaveformCanvas = document.createElement("canvas");
      staticWaveformCanvas.width = width;
      staticWaveformCanvas.height = height;
      const staticCtx = staticWaveformCanvas.getContext("2d");

      if (staticCtx) {
        // Draw all bars in unplayed state
        const barWidth = 1.5;
        const gap = 1;
        const totalBars = Math.floor(width / (barWidth + gap));

        for (let i = 0; i < totalBars; i++) {
          const dataIndex = Math.floor((i / totalBars) * waveformData.length);
          const normalizedAmp = waveformData[dataIndex] / maxAmp;

          const barHeight = normalizedAmp * height * 0.8;
          const y = (height - barHeight) / 2;
          const x = i * (barWidth + gap);

          staticCtx.fillStyle = "#E5E5E5"; // Light gray for unplayed
          staticCtx.beginPath();
          staticCtx.roundRect(x, y, barWidth, barHeight, 1);
          staticCtx.fill();
        }
        hasRenderedStatic = true;
      }
    }

    // Clear and redraw
    ctx.clearRect(0, 0, width, height);

    // Draw static background
    if (hasRenderedStatic) {
      ctx.drawImage(staticWaveformCanvas, 0, 0);
    }

    // Calculate progress pixel and draw played portion
    const progressPixel = Math.floor((progress / 100) * width);
    const barWidth = 2;
    const gap = 1;
    const totalBars = Math.floor(width / (barWidth + gap));

    // Only draw played bars if there's actual progress
    if (progress > 0) {
      const barsToRedraw = Math.ceil(progressPixel / (barWidth + gap));

      for (let i = 0; i < barsToRedraw; i++) {
        const dataIndex = Math.floor((i / totalBars) * waveformData.length);
        const normalizedAmp = waveformData[dataIndex] / maxAmp;

        const barHeight = normalizedAmp * height * 0.8;
        const y = (height - barHeight) / 2;
        const x = i * (barWidth + gap);

        ctx.fillStyle = "#717171"; // Gray for played portion
        ctx.beginPath();
        ctx.roundRect(x, y, barWidth, barHeight, 1);
        ctx.fill();
      }
    }

    // Draw progress indicator if playing
    if (progress > 0) {
      ctx.beginPath();
      ctx.moveTo(progressPixel, 0);
      ctx.lineTo(progressPixel, height);
      ctx.strokeStyle = "rgba(80, 80, 80, 0.7)";
      ctx.lineWidth = 2;
      ctx.stroke();
    }

    // Draw hover indicator (keep as is)
    if (hoverPosition >= 0) {
      const hoverPixel = Math.floor((hoverPosition / 100) * width);
      ctx.beginPath();
      ctx.moveTo(hoverPixel, 0);
      ctx.lineTo(hoverPixel, height);
      ctx.strokeStyle = "rgba(0, 0, 0, 0.3)";
      ctx.lineWidth = 1;
      ctx.stroke();
    }
  }

  // Simplified updateProgress function
  function updateProgress() {
    if (audio) {
      progress = (audio.currentTime / audio.duration) * 100;
      currentTime = audio.currentTime;
      duration = audio.duration || 0;
    }
  }

  // New renderLoop function
  function renderLoop() {
    if (isPlaying) {
      updateProgress();
    }

    // Always draw waveform for hover effects
    drawWaveform();

    animationFrameId = requestAnimationFrame(renderLoop);
  }

  // Updated handleEnded function
  function handleEnded() {
    isPlaying = false;
    progress = 0;
    currentTime = 0;

    // Force waveform reset
    hasRenderedStatic = false;
    drawWaveform();
  }

  // Updated onMount
  onMount(() => {
    // Initial waveform drawing
    drawWaveform();

    // Start animation loop
    renderLoop();

    // Add resize observer
    const resizeObserver = new ResizeObserver(() => {
      hasRenderedStatic = false;
      drawWaveform();
    });

    if (waveformCanvas) {
      resizeObserver.observe(waveformCanvas);
    }

    return () => {
      if (waveformCanvas) {
        resizeObserver.disconnect();
      }
      if (animationFrameId) {
        cancelAnimationFrame(animationFrameId);
      }
    };
  });
</script>

<div
  class="bg-white dark:bg-gray-800 px-2 py-1 w-full max-w-6xl rounded-xl shadow-sm border border-gray-100 dark:border-gray-700"
>
  <!-- Main player content -->
  <div class="flex flex-col gap-2">
    <!-- Top row with play button, volume control, and waveform -->
    <div class="flex items-center gap-1">
      <!-- Play Button -->
      <button
        class="w-6 h-6 rounded-full bg-gray-800 dark:bg-gray-200 flex items-center justify-center cursor-pointer transition-all hover:bg-gray-900 dark:hover:bg-gray-300 focus:outline-none focus:ring-2 focus:ring-gray-500 dark:focus:ring-gray-400"
        on:click={togglePlay}
        aria-label="Toggle playback"
      >
        {#if isPlaying}
          <svg class="fill-white dark:fill-gray-900 w-5 h-5" viewBox="0 0 24 24">
            <rect x="7" y="6" width="3" height="12" rx="1" />
            <rect x="14" y="6" width="3" height="12" rx="1" />
          </svg>
        {:else}
          <svg class="fill-white dark:fill-gray-900 w-5 h-5" viewBox="0 0 24 24">
            <path d="M8 5.14v14l11-7-11-7z" />
          </svg>
        {/if}
      </button>

      <!-- Volume Control (moved next to play button) -->
      <div
        class="relative"
        on:mouseenter={() => (showVolumeSlider = true)}
        on:mouseleave={() => (showVolumeSlider = false)}
        role="group"
        aria-label="Volume controls"
      >
        <!-- Volume Button -->
        <button
          class="text-gray-500 dark:text-gray-400 pl-1 py-1 hover:text-gray-700 dark:hover:text-gray-300 relative z-10"
          on:click={() => (isMuted = !isMuted)}
          aria-label={isMuted ? "Unmute" : "Mute"}
          aria-pressed={isMuted}
        >
          <svg class="w-5 h-5" viewBox="0 0 24 24" fill="currentColor">
            {#if isMuted || volume === 0}
              <!-- Muted icon -->
              <path
                d="M16.5 12c0-1.77-1.02-3.29-2.5-4.03v2.21l2.45 2.45c.03-.2.05-.41.05-.63zm2.5 0c0 .94-.2 1.82-.54 2.64l1.51 1.51C20.63 14.91 21 13.5 21 12c0-4.28-2.99-7.86-7-8.77v2.06c2.89.86 5 3.54 5 6.71zM4.27 3L3 4.27 7.73 9H3v6h4l5 5v-6.73l4.25 4.25c-.67.52-1.42.93-2.25 1.18v2.06c1.38-.31 2.63-.95 3.69-1.81L19.73 21 21 19.73l-9-9L4.27 3zM12 4L9.91 6.09 12 8.18V4z"
              ></path>
            {:else if volume < 0.5}
              <!-- Low volume icon -->
              <path d="M7 9v6h4l5 5V4l-5 5H7z"></path>
            {:else}
              <!-- High volume icon -->
              <path
                d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.07c1.48-.74 2.5-2.26 2.5-4.04z"
              ></path>
            {/if}
          </svg>
        </button>

        <!-- Volume Slider (appears on hover) -->
        {#if showVolumeSlider}
          <div
            class="absolute left-0 bottom-[-15px] bg-white dark:bg-gray-800 shadow-md rounded-lg p-2 transform -translate-x-1/4 transition-opacity duration-200 z-20"
            role="slider"
            aria-label="Volume"
            aria-valuemin="0"
            aria-valuemax="100"
            aria-valuenow={volume * 100}
          >
            <div class="w-24 relative h-1 rounded-full bg-gray-200 dark:bg-gray-600">
              <input
                type="range"
                min="0"
                max="1"
                step="0.01"
                bind:value={volume}
                on:input={changeVolume}
                class="absolute inset-0 opacity-0 cursor-pointer z-10 w-full"
                aria-label="Volume"
              />
              <div
                class="absolute inset-y-0 left-0 rounded-full bg-gray-600 dark:bg-gray-300"
                style="width: {volume * 100}%"
              ></div>
              <div
                class="absolute h-2 w-2 bg-gray-50 dark:bg-gray-700 border border-gray-300 dark:border-gray-500 rounded-full shadow-sm"
                style="left: calc({volume * 100}% - 6px); top: -2px"
              ></div>
            </div>
          </div>
        {/if}
      </div>

      <!-- Waveform Canvas (clickable) -->
      <div
        class="flex-grow relative cursor-pointer"
        on:click={seek}
        on:mousemove={setHoverPosition}
        on:mouseleave={clearHoverPosition}
        on:keydown={(e) => {
          // Add keyboard controls for seeking
          if (e.key === "ArrowRight") {
            audio.currentTime = Math.min(audio.duration, audio.currentTime + 5);
          } else if (e.key === "ArrowLeft") {
            audio.currentTime = Math.max(0, audio.currentTime - 5);
          }
        }}
        role="slider"
        tabindex="0"
        aria-label="Audio timeline"
        aria-valuemin="0"
        aria-valuemax="100"
        aria-valuenow={progress}
      >
        <canvas bind:this={waveformCanvas} class="w-full h-12"></canvas>
      </div>

      <!-- Time Display -->
      <div class="text-gray-700 dark:text-gray-300 whitespace-nowrap text-sm">
        {formatTime(currentTime)} / {formatTime(duration)}
      </div>
    </div>
  </div>

  <!-- Hidden audio element -->
  <audio
    bind:this={audio}
    on:timeupdate={updateProgress}
    on:ended={handleEnded}
    src={`data:audio/${audioData.format};base64,` + audioData.value}
    class="hidden"
  ></audio>
</div>


================================================
FILE: client/graphpaper-inline/src/CustomVideo.svelte
================================================
<script lang="ts">
  import "video.js/dist/video-js.css";
  import videojs from "video.js";
  import { onMount, onDestroy } from "svelte";
  import type { MediaNode } from "./interfaces";

  export let videoData: MediaNode;
  let videoElement: HTMLVideoElement;
  let player: any;

  onMount(() => {
    // Debug log
    console.log("videoElement exists?", !!videoElement);

    if (videoElement) {
      // Add a small delay to ensure DOM is ready
      setTimeout(() => {
        try {
          player = videojs(videoElement, {
            controls: true,
            fluid: true,
            playsinline: true,
            controlBar: {
                fullscreenToggle: true
            }
          });
          console.log("Player initialized successfully");
        } catch (e) {
          console.error("Failed to initialize player:", e);
        }
      }, 0);
    } else {
      console.error("Video element not found during mount");
    }
  });

  onDestroy(() => {
    if (player) {
      player.dispose();
    }
  });
</script>

<div class="video-container">
  <video bind:this={videoElement} class="video-js" playsinline allow="fullscreen" controls>
    <source src={`data:video/${videoData.format};base64,${videoData.value}`} type="video/mp4" />
  </video>
</div>

<style>
  .video-container {
    width: 500px; /* Todo: make this more dynamic */
  }
</style>


================================================
FILE: client/graphpaper-inline/src/MetricRecord.svelte
================================================
<!-- Each metric is displayed as a card. -->
<script lang="ts">
  import { type MetricDef, type MetricVal } from './interfaces';
  import Sparkline from './Sparkline.svelte';

  export let metricDef: MetricDef;
  export let value: MetricVal;

  const minibarPadding = {
    'left': 0,
    'right': 0,
    'top': 5,
    'bottom': 3
  };
</script>

<style>
    .dot-divider:not(:last-child)::after {
        content: "•"; /* Dot separator */
        color: #d1d5db; /* Dot color light mode */ 
    }
    .dark .dot-divider:not(:last-child)::after {
        color: #6b7280; /* Dot color dark mode */
        margin-left: 0.5rem;
    }
</style>

<span class={`dot-divider flex items-center text-xs whitespace-nowrap px-1`} title="{metricDef.description}">
    <span>
        {#if value.constructor === Array}
            <span class={`text-gray-600 dark:text-gray-400 whitespace-nowrap pr-[0.125rem]`}>{metricDef.name}</span>
            <Sparkline values={value} svgClass={"w-8 h-4 inline"} padding={minibarPadding} />
        {:else}
            <span class={`text-gray-600 dark:text-gray-400 whitespace-nowrap pr-[0.125rem]`}>{metricDef.name}</span>
            {#if typeof value === "number"}
                <span class={`font-medium text-gray-700 dark:text-gray-300 `}>{value.toFixed(metricDef.precision)}
                  {#if metricDef.units !== ''}
                    <span class="">{metricDef.units}</span>
                  {/if}
                </span>
            {:else}
                <span class={`font-medium text-center text-gray-700 dark:text-gray-300 `}>{value}
                  {#if metricDef.units !== ''}
                    <span class="">{metricDef.units}</span>
                  {/if}
                </span>
          {/if}
        {/if}
    </span>
</span>

================================================
FILE: client/graphpaper-inline/src/ResizeListener.svelte
================================================
<!-- Handles resizing of content, especially important for jupyter notebooks. -->
<script lang="ts">
    import { onMount, onDestroy } from 'svelte';
    import { clientmsg, type StitchMessage} from "./stitch";

    const INTERVAL_MS = 20;
    let interval: any = null;
    let htmlElem: any;

    onMount(() => {
        htmlElem = document.querySelector('html');
        window.addEventListener("load", () => {
            let prevHeight = 0;

            interval = setInterval(() => {
                const height = htmlElem.getBoundingClientRect().height;
                if (height !== prevHeight && htmlElem.checkVisibility()) {
                    const msg: StitchMessage = {
                        'type': 'resize',
                        'content': {
                            height: `${height}px`,
                            width: '100%'
                        }
                    };
                    clientmsg.set(msg);
                }
            }, INTERVAL_MS);
        });
    });
    onDestroy(() => {
        clearInterval(interval);
    });
</script>

================================================
FILE: client/graphpaper-inline/src/Select.svelte
================================================
<!-- Custom select dropdown -->
<script lang="ts">
    import { clickOutside } from "./clickoutside";
    import { createEventDispatcher } from 'svelte';

    export let classes: string = "";
    export let values: Array<string> = [];
    export let defaultValue: string = "";

    let selected = defaultValue;
    let showList = false;

    const dispatch = createEventDispatcher();

    const onDropdownClick = (_: MouseEvent) => {
        showList = !showList;
    };
    const onOutClick = (_: MouseEvent) => {
        showList = false;
    };
    const selectOption = (option: string) => {
        selected = option;
        showList = false;
        dispatch('select', selected);
    }
</script>

<div class="relative" use:clickOutside on:outclick={onOutClick}>
    <button use:clickOutside on:click={onDropdownClick}>
        <span class={`mr-4 inline-flex justify-between items-center hover:bg-gray-700 hover:text-white dark:hover:bg-transparent dark:hover:text-white dark:text-white ${classes}`}>
            <span class="">
                {selected}
            </span>
            <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" class="size-4">
                <path fill-rule="evenodd" d="M4.22 6.22a.75.75 0 0 1 1.06 0L8 8.94l2.72-2.72a.75.75 0 1 1 1.06 1.06l-3.25 3.25a.75.75 0 0 1-1.06 0L4.22 7.28a.75.75 0 0 1 0-1.06Z" clip-rule="evenodd" />
            </svg>
        </span>
    </button>
    {#if showList}
        <ul role="listbox" class="absolute pt-2 pb-3 z-10 mt-1 bg-white dark:bg-[#5A5F72] shadow border border-gray-300 dark:border-gray-600">
            {#each values as value, i}
                <li class={`w-full px-4 py-1 ${i === 0 ? "mt-1" : ""} hover:bg-gray-700 hover:text-white dark:hover:bg-gray-600 dark:hover:text-white dark:text-white text-nowrap`} role="option" aria-selected="false" on:click={(_) => selectOption(value)} on:keypress={(_) => {}}>{value}</li>
            {/each}
        </ul>
    {/if}
</div>


================================================
FILE: client/graphpaper-inline/src/Sparkline.svelte
================================================
<!-- Sparkline for tracking distributions or values over time scaled from 0 to 1. -->

<script lang="ts">
  import { scaleLinear } from 'd3-scale';

  export let values;
  export let svgClass: string;
  export let padding = {
    'left': 0,
    'right': 0,
    'top': 0,
    'bottom': 0
  };
  $: typedValues = values as Array<number>;

  let height = 0;
  let width = 0;
  $: xScale = scaleLinear()
    .domain([0, typedValues.length-1])
    .range([padding.left, padding.left + width - padding.right]);

  $: yScale = scaleLinear()
    .domain([0, 1])
    .range([height - padding.bottom, padding.top]);

  $: pathData = typedValues.map((v, i) => ({
    x: xScale(i),
    y: yScale(v),
  }))
</script>

<div class="inline-block font-medium text-gray-700 dark:text-gray-300" bind:clientHeight={height} bind:clientWidth={width}>
  <svg class={svgClass}>
    <g>
      <path d="{pathData.map((v, i) => `${i === 0 ? 'M' : 'L'} ${v.x} ${v.y}`).join(' ')}" fill="none" stroke-width="1.25" stroke="#374151" class="stroke-gray-700 dark:stroke-gray-300"/>
    </g>
  </svg>
</div>

================================================
FILE: client/graphpaper-inline/src/StitchHandler.svelte
================================================
<!-- Handles stitch client and kernel messages. -->
<script lang="ts">
    import type { Unsubscriber } from 'svelte/store';
    import { kernelmsg, clientmsg, type StitchMessage, state } from './stitch';
    import { onMount, onDestroy } from 'svelte';

    const handleMessage = (event: MessageEvent<any>) => {
        if (event.source === window.parent && 'type' in event.data) {
            if (event.data.type === 'kernelmsg') {
                let stitchMessage: StitchMessage = event.data;
                kernelmsg.set(stitchMessage);
            } else if (event.data.type === 'init_state') {
                let stitchMessage: StitchMessage = event.data;
                state.set(stitchMessage);
                const clientReadyMsg: StitchMessage = {
                    type: 'clientmsg',
                    content: JSON.stringify({ 'class_name': 'ClientReadyMessage' })
                };
                clientmsg.set(clientReadyMsg);
            }
        }
    };

    let unsubscribeClient: Unsubscriber | null = null;
    let unsubscribeState: Unsubscriber | null = null;
    onMount(() => {
        unsubscribeClient = clientmsg.subscribe((msg) => {
            if (msg !== undefined) {
                window.parent.postMessage(msg, "*");
            }
        });
        unsubscribeState = state.subscribe((msg) => {
            if (msg !== undefined) {
                window.parent.postMessage(msg, "*");
            }
        });
    });
    onDestroy(() => {
        if (unsubscribeClient) {
            unsubscribeClient();
        }
        if (unsubscribeState) {
            unsubscribeState();
        }
    });
</script>

<svelte:window on:message={handleMessage} />

================================================
FILE: client/graphpaper-inline/src/TokenGrid.svelte
================================================
<!-- Token grid that exposes each token and hover info. -->
<script lang="ts">
  import {
    isRoleOpenerInput,
    isRoleCloserInput,
    isTokenOutput,
    isTextOutput,
    isAudioOutput,
    type NodeAttr,
    type RoleOpenerInput,
    isImageOutput,
    isVideoOutput,
    type AudioOutput,
    type VideoOutput,
    type ImageOutput,
  } from "./stitch";
  import CustomAudio from "./CustomAudio.svelte";
  import CustomVideo from "./CustomVideo.svelte";
  import TokenGridItem from "./TokenGridItem.svelte";
  import {
    type FlatToken,
    type TokenCallback,
    type MultimodalNode,
    type MediaType,
  } from "./interfaces";
  import { longhover } from "./longhover";
  import DOMPurify from "dompurify";

  import {interpolateGreens, interpolateBlues} from "d3-scale-chromatic";

  export let components: Array<NodeAttr>;
  export let isCompleted: boolean;
  export let isError: boolean;
  export let requireFullReplay: boolean = false;
  export let bgField: string = "Token";
  export let underlineField: string = "Probability";
  export let backtrackCount: number = 0;
  export let resetCount: number = 0;
  export let isDarkMode: boolean = false;

  let underline: TokenCallback = (_: FlatToken) => "";
  let bg: TokenCallback = (_: FlatToken) => "";

  const tokenDisplayValue = (x: FlatToken, s: string) => {
    if (s === "Probability") {
      return x.prob?.toFixed(3);
    } else if (s === "Latency (ms)") {
      return x.latency_ms?.toFixed(0);
    } else if (s === "Type") {
      if (x.is_input) {
        return "Input";
      } else if (x.is_force_forwarded) {
        return "Forwarded";
      } else if (x.is_generated) {
        return "Generated";
      }
    } else if (s === "None") {
      return "";
    }
  };

  const getBrightness = (rgba: string) => {
    const rgbMatch = rgba.match(/rgba?\(\s*(\d+),\s*(\d+),\s*(\d+)/);
    if (!rgbMatch) {
      console.error("Invalid RGBA format.");
      return 0;
    }

    const r = parseInt(rgbMatch[1], 10);
    const g = parseInt(rgbMatch[2], 10);
    const b = parseInt(rgbMatch[3], 10);
    return r * 0.299 + g * 0.587 + b * 0.114;
  };

  const getTextColor = (backgroundColor: string) => {
    const brightness = getBrightness(backgroundColor);
    return brightness > 186 ? "rgba(0, 0, 0, 1)" : "rgba(255, 255, 255, 1)"; // Black for light bg, white for dark bg
  };

  const bgStyle = (
    x: number | undefined,
    color?: ((x: number) => string) | undefined
  ) => {
    if (x === undefined) {
      return "";
    }

    // let colorVal = interpolateYlOrRd(x * 0.85);
    let colorVal = interpolateBlues(x);
    if (color !== undefined) {
      colorVal = color(x);
    }
    let textColor = getTextColor(colorVal);
    return `background-color: ${colorVal}; color: ${textColor};`;
  };

  const underlineStyle = (
    x: number | undefined,
    color?: ((x: number) => string) | undefined
  ) => {
    if (x === undefined) {
      return "";
    }

    let colorVal = interpolateGreens(x * 0.7);
    if (color !== undefined) {
      colorVal = color(x);
    }
    return `border-bottom-color: ${colorVal};`;
  };

  const bgTokenStyle = (x: FlatToken, darkMode: boolean) => {
    let color = "";
    
    if (x.is_input) {
      color = "rgba(255, 255, 255, 0)";
    } else if (x.is_force_forwarded) {
      color = darkMode ? "rgba(88, 119, 173, 1)" : "rgba(243, 244, 246, 1)";
    } else if (x.is_generated) {
      color = darkMode ? "rgba(88, 119, 173, 1)" : "rgba(229, 231, 235, 1)";
    } else {
      // console.log(`ERROR: token ${x.text} does not have emit flags.`);
      // Make slightly off white for error detection without console spam
      color = "rgba(255, 255, 254, 0)";
    }
    return `background-color: ${color};`;
  };

  function findTargetWords(
    text: string,
    targetWords: string[]
  ): [number, number, string][] {
    // NOTE(nopdive): Not the most efficient approach, but there aren't many special words anyway.

    const results: [number, number, string][] = [];
    for (const targetWord of targetWords) {
      let start = 0;
      while ((start = text.indexOf(targetWord, start)) !== -1) {
        results.push([start, start + targetWord.length, targetWord]);
        start += targetWord.length;
      }
    }

    results.sort((a, b) => (a[0] === b[0] ? a[1] - b[1] : a[0] - b[0]));
    return results;
  }

  const checkOverlapped = (
    tokenStart: number,
    tokenEnd: number,
    matchStart: number,
    matchEnd: number
  ) => {
    let overlapped = false;
    let noSpecialOverride = false;

    if (tokenStart <= matchStart && tokenEnd - 1 >= matchStart) {
      // Match with token leading
      overlapped = true;
    } else if (tokenStart <= matchEnd - 1 && tokenEnd >= matchEnd) {
      // Match with token trailing
      overlapped = true;

      // Visually looks bad when next word is also greyed out.
      noSpecialOverride = true;
    } else if (tokenStart >= matchStart && tokenEnd <= matchEnd) {
      // Match with token equal or within
      overlapped = true;
    }

    return [overlapped, noSpecialOverride];
  };

  let multimodalNodes: MultimodalNode[] = [];
  let tokens: Array<FlatToken> = [];
  let activeOpenerRoles: Array<RoleOpenerInput> = [];
  let activeCloserRoleText: Array<string> = [];
  let specialSet: Set<string> = new Set<string>();
  let namedRoleSet: Record<string, string> = {};
  let displayedRoles: Set<string> = new Set<string>();
  let roleCounters: Record<string, number> = {};
  let currentTokenIndex: number = 0;
  let statCounter: Record<string, number> = {};
  let lastBacktrackCount: number = 0;
  let lastResetCount: number = 0;
  
  // Reset currentTokenIndex and clear tokens when backtracking occurs
  $: if (backtrackCount !== lastBacktrackCount) {
    currentTokenIndex = 0;
    tokens = [];  // Clear existing tokens
    multimodalNodes = [];  // Clear existing media
    activeOpenerRoles = [];  // Reset role tracking
    activeCloserRoleText = [];  // Reset role tracking
    specialSet.clear();  // Clear special token set
    namedRoleSet = {};  // Clear named role mapping
    displayedRoles.clear();  // Clear displayed roles tracking
    roleCounters = {};  // Clear role counters
    lastBacktrackCount = backtrackCount;
  }
  
  // Reset state when ResetDisplayMessage is received
  $: if (resetCount !== lastResetCount) {
    currentTokenIndex = 0;
    tokens = [];
    multimodalNodes = [];
    activeOpenerRoles = [];
    activeCloserRoleText = [];
    specialSet.clear();
    namedRoleSet = {};
    displayedRoles.clear();
    roleCounters = {};
    lastResetCount = resetCount;
  }
  
  $: {
    if (components.length === 0) {
      // Reset
      tokens = []
      multimodalNodes = [];
      activeOpenerRoles = [];
      activeCloserRoleText = [];
      specialSet.clear();
      namedRoleSet = {};
      displayedRoles.clear();
      roleCounters = {};
      currentTokenIndex = 0;
    } else if (currentTokenIndex > components.length) {
      // Handle case where components were removed (e.g., after reset + partial replay)
      currentTokenIndex = components.length;
    }

    for (; currentTokenIndex < components.length; currentTokenIndex += 1) {
      const nodeAttr = components[currentTokenIndex];
      const createMediaNode = (
        mediaType: MediaType,
        node: AudioOutput | VideoOutput | ImageOutput
      ): MultimodalNode => {
        return {
          type: "media",
          data: {
            type: mediaType,
            value: node.value,
            format: node.format,
            context: {
              roleStack: [...activeOpenerRoles], // Clone current role stack
              index: currentTokenIndex,
            },
          },
        };
      };

      if (isRoleOpenerInput(nodeAttr)) {
        activeOpenerRoles.push(nodeAttr);
        activeCloserRoleText.push(nodeAttr.closer_text || "");
        
        // Increment counter for this role type
        const roleName = nodeAttr.name || "";
        roleCounters[roleName] = (roleCounters[roleName] || 0) + 1;
      } else if (isRoleCloserInput(nodeAttr)) {
        // Close the most recent role
        if (activeOpenerRoles.length > 0) {
          activeOpenerRoles.pop();
          activeCloserRoleText.pop();
        }
      } else if (isAudioOutput(nodeAttr)) {
        multimodalNodes.push(createMediaNode("audio", nodeAttr));
      } else if (isImageOutput(nodeAttr)) {
        multimodalNodes.push(createMediaNode("image", nodeAttr));
      } else if (isVideoOutput(nodeAttr)) {
        multimodalNodes.push(createMediaNode("video", nodeAttr));
      } else if (isTokenOutput(nodeAttr) || (isTextOutput(nodeAttr) && !nodeAttr.value.includes("<|im_start|>") && !nodeAttr.value.includes("<|im_end|>"))) {
        // console.log("Processing token:", nodeAttr.value, "Active roles:", activeOpenerRoles.map(r => r.name));
        if (activeOpenerRoles.length === 0) {
          if (
            activeCloserRoleText.length !== 0 &&
            activeCloserRoleText[activeCloserRoleText.length - 1] ===
              nodeAttr.value
          ) {
            const token: FlatToken = {
              text: nodeAttr.value,
              prob: isTokenOutput(nodeAttr) ? nodeAttr.token.prob : 0,
              latency_ms: nodeAttr.latency_ms,
              role: "",
              special: true,
              is_input: nodeAttr.is_input,
              is_force_forwarded: nodeAttr.is_force_forwarded,
              is_generated: nodeAttr.is_generated,
              is_masked: isTokenOutput(nodeAttr) ? nodeAttr.token.masked : false,
              top_k: (isTokenOutput(nodeAttr) && nodeAttr.top_k !== null) ? nodeAttr.top_k.map(t => ({
                text: t.token,
                prob: t.prob,
                is_masked: t.masked,
                latency_ms: 0,
              })) : undefined,
            };
            specialSet.add(token.text);
            // TODO: handle interleaving tokens with multimodal data
            // multimodalNodes.push({ type: "token", data: token });
            statCounter["latency.max"] = Math.max(token.latency_ms, statCounter["latency.max"] || 0);
            tokens.push(token)
            activeCloserRoleText.pop();
          } else {
            const token: FlatToken = {
              text: nodeAttr.value,
              prob: isTokenOutput(nodeAttr) ? nodeAttr.token.prob : 0,
              latency_ms: nodeAttr.latency_ms,
              role: "",
              special: false,
              is_input: nodeAttr.is_input,
              is_force_forwarded: nodeAttr.is_force_forwarded,
              is_generated: nodeAttr.is_generated,
              is_masked: isTokenOutput(nodeAttr) ? nodeAttr.token.masked : false,
              top_k: (isTokenOutput(nodeAttr) && nodeAttr.top_k !== null) ? nodeAttr.top_k.map(t => ({
                text: t.token,
                prob: t.prob,
                is_masked: t.masked,
                latency_ms: 0,
              })) : undefined,
            };
            // multimodalNodes.push({ type: "token", data: token });
            statCounter["latency.max"] = Math.max(token.latency_ms, statCounter["latency.max"] || 0);
            tokens.push(token);
          }
        } else {
          const activeOpenerRole =
            activeOpenerRoles[activeOpenerRoles.length - 1];
          
          // Check if this is a role marker token (like "<|im_start|>assistant\n")
          const isRoleMarker = nodeAttr.value.includes("<|im_start|>") || 
                              nodeAttr.value.includes("<|im_end|>");
          
          if (isRoleMarker) {
            // Hide role marker tokens - don't add to display
            specialSet.add(nodeAttr.value);
          } else {
            // Check if we've already displayed this role section
            const roleName = activeOpenerRole.name || "";
            const roleCount = roleCounters[roleName] || 0;
            const roleKey = `${roleName}-${roleCount}`;
            const shouldShowRole = !displayedRoles.has(roleKey);
            
            if (shouldShowRole) {
              displayedRoles.add(roleKey);
            }
            
            const token: FlatToken = {
              text: nodeAttr.value,
              prob: isTokenOutput(nodeAttr) ? nodeAttr.token.prob : 0,
              latency_ms: nodeAttr.latency_ms,
              role: shouldShowRole ? (activeOpenerRole.name || "") : "",
              special: false,
              is_input: nodeAttr.is_input, 
              is_force_forwarded: nodeAttr.is_force_forwarded,
              is_generated: nodeAttr.is_generated,
              is_masked: isTokenOutput(nodeAttr) ? nodeAttr.token.masked : false,
              top_k: (isTokenOutput(nodeAttr) && nodeAttr.top_k !== null) ? nodeAttr.top_k.map(t => ({
                text: t.token,
                prob: t.prob,
                is_masked: t.masked,
                latency_ms: 0,
              })) : undefined,
            };
            statCounter["latency.max"] = Math.max(token.latency_ms, statCounter["latency.max"] || 0);
            tokens.push(token);
          }
        }
            }
        }
        // NOTE(nopdive): Often the closer text is missing at the end of output.
        if (activeOpenerRoles.length !== 0 || activeCloserRoleText.length !== 0) {
            // console.log("Opener and closer role texts did not balance.")
        }

    // Visual updates
    if (!isCompleted || isError) {
      underline = (_: FlatToken) => "border: none;";
    } else if (underlineField === "Probability") {
      underline = (x: FlatToken) => underlineStyle(x.prob);
    } else if (underlineField === "Latency (ms)") {
      underline = (x: FlatToken) =>
        underlineStyle(
          Math.log(x.latency_ms) / Math.log(statCounter["latency.max"])
        );
    } else {
      underline = (_: FlatToken) => "border: none;";
    }

    if (!isCompleted || isError) {
      // bg = (_: Token) => "";
      bg = (x: FlatToken) => bgTokenStyle(x, isDarkMode);
    } else if (bgField === "Type") {
      bg = (x: FlatToken) => bgTokenStyle(x, isDarkMode);
    } else if (bgField === "Probability") {
      bg = (x: FlatToken) => bgStyle(x.prob);
    } else if (bgField === "Latency (ms)") {
      bg = (x: FlatToken) =>
        bgStyle(Math.log(x.latency_ms) / Math.log(statCounter["latency.max"]));
      console.log(statCounter["latency.max"]);
    } else {
      bg = (_: FlatToken) => "";
    }

    // End bookkeeping (svelte)
    isCompleted = isCompleted;
    isError = isError;
    components = components;
    multimodalNodes = multimodalNodes;
    tokens = tokens;
  }

  let tooltip: HTMLElement;
  let tooltipX = 0;
  let tooltipY = 0;
  let tooltipToken: FlatToken;
  const mouseLongHoverDuration = 200;

  const handleLongMouseOver = (event: CustomEvent<MouseEvent>) => {
    const target = event.detail.target as HTMLElement;
    if (target.matches(".token-grid-item")) {
      const index = target.dataset.index;
      const positionXOffset = 15;
      const positionYOffset = 10;

      // Add tooltip
      const rect = target.getBoundingClientRect();
      tooltipX = rect.left + window.scrollX + rect.width / 2 + positionXOffset;
      tooltipY = rect.bottom + window.scrollY + positionYOffset;
      
      const indexNum = Number(index);
    //   const node = multimodalNodes[indexNum];
    //   if (node.type === "token") {
      tooltipToken = tokens[indexNum];
      
      // Show tooltip first to get accurate dimensions
      tooltip.style.display = "block";
      
      // Use requestAnimationFrame to ensure dimensions are calculated after render
      requestAnimationFrame(() => {
        // Adjust if near edge of viewport
        const tooltipRect = tooltip.getBoundingClientRect();
        
        // Check right edge
        if (tooltipX + tooltipRect.width > window.innerWidth) {
          tooltipX = window.innerWidth - tooltipRect.width - 10;
        }
        
        // Check bottom edge - this is the key fix for first hover
        if (tooltipY + tooltipRect.height > window.innerHeight) {
          // Position above the element instead of below
          tooltipY = rect.top + window.scrollY - tooltipRect.height - positionYOffset;
        }
        
        // Ensure tooltip stays within left edge
        if (tooltipX < 10) {
          tooltipX = 10;
        }
        
        // Ensure tooltip stays within top edge
        if (tooltipY < 10) {
          tooltipY = 10;
        }
      });
    }
  };

  let highlightPrevColor = "";
  let highlightPrevBackgroundColor = "";
  const handleMouseOver = (event: MouseEvent) => {
    const target = event.target as HTMLElement;
    if (target.matches(".token-grid-item")) {
      const index = target.dataset.index;
      const siblingsIncludingSelf = target.parentElement?.querySelectorAll(
        `.token-grid-item[data-index="${index}"]`
      );

      // Add highlight
      if (siblingsIncludingSelf) {
        for (const sibling of siblingsIncludingSelf) {
          const htmlSibling = sibling as HTMLElement;
          highlightPrevColor = htmlSibling.style.color;
          highlightPrevBackgroundColor = htmlSibling.style.backgroundColor;
          htmlSibling.style.color = "rgb(249, 250, 251)";
          htmlSibling.style.backgroundColor = "rgb(75, 85, 99)";
        }
      }
    }
  };

  const handleLongMouseOut = (event: CustomEvent<MouseEvent>) => {
    const target = event.detail.target as HTMLElement;
    if (target.matches(".token-grid-item")) {
      // Remove tooltip
      tooltip.style.display = "none";
    }
  };

  const handleMouseOut = (event: MouseEvent) => {
    const target = event.target as HTMLElement;
    if (target.matches(".token-grid-item")) {
      const index = target.dataset.index;
      const siblingsIncludingSelf = target.parentElement?.querySelectorAll(
        `.token-grid-item[data-index="${index}"]`
      );

      // Remove highlight
      if (siblingsIncludingSelf) {
        for (const sibling of siblingsIncludingSelf) {
          const htmlSibling = sibling as HTMLElement;
          htmlSibling.style.color = highlightPrevColor;
          htmlSibling.style.backgroundColor = highlightPrevBackgroundColor;
        }
      }
    }
  };
  const doNothing = (_: any) => {};
  const renderText = (text: string) => {
    return DOMPurify.sanitize(
      text
        .replaceAll(" ", "&nbsp;")
        .replaceAll("\t", "\\t")
        .replaceAll("\n", "\\n")
    );
  };
  const continuationToken = {
    text: "...",
    prob: 1,
    latency_ms: 0,
    role: "",
    special: false,
    is_input: true,
    is_force_forwarded: false,
    is_generated: true,
  };
</script>

<!-- Tooltip -->
<div
  bind:this={tooltip}
  class="px-1 pt-1 pb-3 absolute opacity-95 bg-white dark:bg-[#5A5F72] shadow border border-gray-300 dark:border-gray-600 pointer-events-none z-50"
  style="top: {tooltipY}px; left: {tooltipX}px; display: none;"
>
  <div>
    {#if tooltipToken}
      <div class={`col-1 flex flex-col items-center`}>
        <div class="text-2xl px-1 pb-1 text-left w-full bg-white dark:bg-[#5A5F72] dark:text-white">
          <div class="mb-5 mt-1">
            <TokenGridItem
              token={tooltipToken}
              index={-1}
              underlineStyle={underline(tooltipToken)}
              bgStyle={bg(tooltipToken)}
            />
          </div>
          <table class="w-full">
            <tbody class="text-xs tracking-wider dark:text-white">
              {#if bgField !== "None"}
                <tr>
                  <td>
                    <span style={bg(tooltipToken)}>
                      {bgField}
                    </span>
                  </td>
                  <td class="text-right dark:text-white">
                    <span class="pl-1">
                      {tokenDisplayValue(tooltipToken, bgField) ?? "None"}
                    </span>
                  </td>
                </tr>
              {/if}
              {#if underlineField !== "None"}
                <tr>
                  <td>
                    <span class="border-b-2" style={underline(tooltipToken)}>
                      {underlineField}
                    </span>
                  </td>
                  <td class="text-right dark:text-white">
                    <span>
                      {tokenDisplayValue(tooltipToken, underlineField) ?? "None"}
                    </span>
                  </td>
                </tr>
              {/if}
            </tbody>
          </table>
        </div>
        {#if tooltipToken.top_k !== undefined}
          <hr class="bg-gray-400 dark:bg-gray-600 w-full my-2" />
          <table class="w-full">
            <thead>
              <tr>
                <th
                  class={`px-1 pb-1 font-normal text-xs text-left text-gray-700 dark:text-white tracking-wide`}
                >
                  Candidate
                </th>
                <th
                  class={`px-1 pb-1 font-normal text-xs text-right text-gray-700 dark:text-white tracking-wide`}
                >
                  Prob
                </th>
              </tr>
            </thead>
            <tbody>
              {#each tooltipToken.top_k as candidate, i}
                <tr
                  class={`${i === 5 ? "border-t border-dashed border-gray-300 dark:border-gray-600" : ""}`}
                >
                  <td
                    class={`px-1 text-left font-mono text-sm decoration-2 ${candidate.is_masked ? "line-through" : ""}`}
                  >
                    <span class="bg-gray-200 dark:bg-gray-700 dark:text-white">
                      {@html renderText(candidate.text)}
                    </span>
                  </td>
                  <td
                    class={`px-1 text-right font-mono text-sm decoration-2 dark:text-white ${candidate.is_masked ? "line-through" : ""}`}
                  >
                    {candidate.prob?.toFixed(3)}
                  </td>
                </tr>
              {/each}
            </tbody>
          </table>
        {/if}
      </div>
    {:else}
      <div class="text-sm border-b text-red-700 dark:text-red-400">
        Missing tokens will show on completion.
      </div>
    {/if}
  </div>
</div>

<!-- Tokens view -->
<div class="pt-6 pb-6 flex text-gray-800 dark:text-gray-200 font-token">
  <div class="px-4">
    <span
      class="flex flex-wrap text-sm"
      role="main"
      use:longhover={mouseLongHoverDuration}
      on:longmouseover={handleLongMouseOver}
      on:longmouseout={handleLongMouseOut}
      on:mouseover={handleMouseOver}
      on:mouseout={handleMouseOut}
      on:focus={doNothing}
      on:blur={doNothing}
    >
      {#if requireFullReplay}
        <TokenGridItem token={continuationToken} index={-1} />
        <div class="basis-full h-2"></div>
      {/if}

      <!-- Render tokens first -->
      {#each tokens as token, i}
          {#if token.role !== ""}
            <!-- Add spacing before role (except for first) -->
            {#if i > 0}
              <div class="basis-full py-3"></div>
            {/if}
            <!-- Force line break for role header -->
            <div class="basis-full h-0"></div>
          {/if}
          <TokenGridItem
            token={token}
            index={i}
            underlineStyle={underline(token)}
            bgStyle={bg(token)}
          />
      {/each}

      {#if isCompleted === false}
        <span
          class="inline-block mt-2 border-b-2 border-white dark:border-gray-900 bg-gray-700 dark:bg-gray-300 animate-cpulse">
        >
          &nbsp;
        </span>
      {/if}
    </span>

    <!-- Render media nodes afterward -->
    {#each multimodalNodes as node}
      {#if node.type === "media"}
        {#if node.data.type == "audio"}
          <div class="my-3">
            <CustomAudio audioData={node.data} />
          </div>
        {/if}
        {#if node.data.type == "video"}
          <div class="my-3">
            <CustomVideo videoData={node.data} />
          </div>
        {/if}
        {#if node.data.type == "image"}
          <div class="my-3">
            <img
              src={`data:${node.data.format};base64,${node.data.value}`}
              alt="Image output"
            />
          </div>
        {/if}
      {/if}
    {/each}
  </div>
</div>


================================================
FILE: client/graphpaper-inline/src/TokenGridItem.svelte
================================================
<!-- Token(s) within token grid -->
<script lang="ts">
    import {type FlatToken} from "./interfaces";

    export let token: FlatToken;
    export let index: number;
    export let underlineStyle: string = "";
    export let bgStyle: string = "";
</script>

{#each token.text as ch, i}
    {#if ch === ' '}
        <span data-index="{index}" role="tooltip" class={`token-grid-item inline-block mt-2 border-b-2 text-gray-300 dark:text-gray-500`} style={`${underlineStyle} ${bgStyle}`}>
            {#if i === 0}
                <span class="absolute text-xs uppercase -mt-4 text-purple-800 dark:text-purple-300 font-sans">
                    {token.role}
                </span>
            {/if}
            &nbsp;
        </span>
    {:else if ch === '\t'}
        <span data-index="{index}" role="tooltip" class={`token-grid-item inline-block mt-2 border-b-2 text-gray-300 dark:text-gray-500`} style={`${underlineStyle} ${bgStyle}`}>
            {#if i === 0}
                <span class="absolute text-xs uppercase -mt-4 text-purple-800 dark:text-purple-300 font-sans">
                    {token.role}
                </span>
            {/if}
            \t&nbsp;&nbsp;
        </span>
    {:else if ch === '\n'}
        <span data-index="{index}" role="tooltip" class={`token-grid-item inline-block mt-2 border-b-2 text-gray-300 dark:text-gray-500`} style={`${underlineStyle} ${bgStyle}`}>
            {#if i === 0}
                <span class="absolute text-xs uppercase -mt-4 text-purple-800 dark:text-purple-300 font-sans">
                    {token.role}
                </span>
            {/if}
            \n
        </span>
        <div class="basis-full h-full"></div>
    {:else}
        <span data-index="{index}" role="tooltip" class={`token-grid-item inline-block mt-2 border-b-2 ${token.special ? "text-gray-300 dark:text-gray-500" : ""}`} style={`${underlineStyle} ${bgStyle}`}>
            {#if i === 0}
                <span class="absolute text-xs uppercase -mt-4 text-purple-800 dark:text-purple-300 font-sans">
                    {token.role}
                </span>
            {/if}
            {ch}
        </span>
    {/if}
{/each}

================================================
FILE: client/graphpaper-inline/src/clickoutside.ts
================================================
// Action for clicking outside an element.

export function clickOutside(node: HTMLElement) {
	const handleClick = (event: MouseEvent) => {
		let target = event.target as HTMLElement;
		if (!node.contains(target)) {
			node.dispatchEvent(new CustomEvent('outclick'));
		}
	};

	document.addEventListener('click', handleClick, true);

	return {
		destroy() {
			document.removeEventListener('click', handleClick, true);
		}
	};
}

================================================
FILE: client/graphpaper-inline/src/interfaces.ts
================================================
// Interfaces used within the client. This is separate to messaging interfaces.

import type { RoleOpenerInput} from "./stitch";

export interface MetricDef {
    name: string,
    units: string,
    description: string,
    isScalar: boolean,
    precision: number,
}

export type MetricVal = string | number | Array<number | string>;

export interface FlatToken {
    text: string,
    prob: number,
    latency_ms: number,
    is_input?: boolean,
    is_force_forwarded?: boolean,
    is_generated?: boolean,
    role?: string,
    special?: boolean,
    is_masked?: boolean,
    top_k?: Array<FlatToken>
}
export declare type TokenCallback = (token: FlatToken) => string;

export interface MediaNodeContext {
    roleStack: RoleOpenerInput[];
    index: number;
}

export type MediaType = "audio" | "video" | "image";

export interface MediaNode {
    type: MediaType;
    value: any;
    format: string;
    context: MediaNodeContext;
}

export type MultimodalNode = 
  | { type: 'token', data: FlatToken }
  | { type: 'media', data: MediaNode };


================================================
FILE: client/graphpaper-inline/src/longhover.ts
================================================
// Action for long mouse hovers.

export function longhover(node: HTMLElement, duration: number) {
    let timer: any;

    const handleMouseOver = (event: MouseEvent) => {
        timer = setTimeout(() => {
            node.dispatchEvent(new CustomEvent('longmouseover', {detail: event}));
        }, duration);
    };
    const handleMouseOut = (event: MouseEvent) => {
        clearTimeout(timer);
        node.dispatchEvent(new CustomEvent('longmouseout', {detail: event}));
    }

    node.addEventListener('mouseover', handleMouseOver);
    node.addEventListener('mouseout', handleMouseOut);

    return {
        update(newDuration: number) {
            duration = newDuration
        },
        destroy() {
            node.removeEventListener('mouseover', handleMouseOver);
            node.removeEventListener('mouseout', handleMouseOut);
        }
    };
}


================================================
FILE: client/graphpaper-inline/src/main.css
================================================
/* Custom CSS for web app. */
@import "tailwindcss";
@variant dark (&:where(.dark, .dark *));

/* Note - Tailwind v4 uses CSS configuration */
/* Theme customizations */
@theme {
  /* Custom font family */
  --font-token: 'JetBrains Mono', monospace;
  
  /* Custom animation timing */
  --animate-cpulse: cpulse 3.5s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}

/* Custom keyframes */
@keyframes cpulse {
  50% {
    opacity: 0;
  }
}

/* Any other custom styles you have */


================================================
FILE: client/graphpaper-inline/src/main.js
================================================
// Entrypoint for web app.

import App from './App.svelte';

const app = new App({
	target: document.body,
});

export default app;

================================================
FILE: client/graphpaper-inline/src/metrics.ts
================================================
// Metrics and their definitions.

import type { MetricDef } from './interfaces';

export const metricDefs: Record<string, MetricDef> = {
  'status': {
    name: '',
    units: '',
    description: 'Determines whether engine is running, completed or in error.',
    isScalar: true,
    precision: 0
  },
  'cpu': {
    name: 'CPU',
    units: '%',
    description: 'Average utilization across CPU cores.',
    isScalar: false,
    precision: 1
  },
  'gpu': {
    name: 'GPU',
    units: '%',
    description: 'Average utilization across GPUs.',
    isScalar: false,
    precision: 1
  },
  'ram': {
    name: 'RAM',
    units: 'GB',
    description: 'Utilization of RAM.',
    isScalar: true,
    precision: 1
  },
  'vram': {
    name: 'VRAM',
    units: 'GB',
    description: 'Utilization of video RAM.',
    isScalar: true,
    precision: 1
  },
  'wall time': {
    name: 'Time',
    units: 's',
    description: 'Time taken from initial display to engine completion.',
    isScalar: true,
    precision: 1
  },
  'avg latency': {
    name: 'Latency',
    units: 'ms',
    description: 'Average roundtrip latency per token',
    isScalar: true,
    precision: 0
  },
  'consumed': {
    name: 'Used',
    units: 'tkn',
    description: 'Total tokens consumed by language model.',
    isScalar: true,
    precision: 0
  },
  'token reduction': {
    name: 'Reduced',
    units: '%',
    description: 'Total tokens consumed by language model divided by total tokens.',
    isScalar: true,
    precision: 0
  }
};


================================================
FILE: client/graphpaper-inline/src/mocks.ts
================================================
// Mocks for interactive testing

import {type TextOutput, type RoleOpenerInput, type RoleCloserInput } from './stitch';

export const mockNodeAttrs: Array<RoleCloserInput | RoleOpenerInput | TextOutput> = [
  {
    "class_name": "RoleOpenerInput",
    "name": "user",
    "text": "<|user|>\n",
    "closer_text": "<|end|>\n"
  },
  {
    "class_name": "TextOutput",
    "value": "<|user|>\n",
    "is_input": true,
    "is_generated": false,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "TextOutput",
    "value": "What is the capital of France?",
    "is_input": true,
    "is_generated": false,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "RoleCloserInput",
    "name": "user",
    "text": "<|end|>\n"
  },
  {
    "class_name": "TextOutput",
    "value": "<|end|>\n",
    "is_input": true,
    "is_generated": false,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "RoleOpenerInput",
    "name": "assistant",
    "text": "<|assistant|>\n",
    "closer_text": "<|end|>\n"
  },
  {
    "class_name": "TextOutput",
    "value": "<|assistant|>\n",
    "is_input": true,
    "is_generated": false,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "TextOutput",
    "value": " The",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " capital",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " of",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " France",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " is",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " Paris",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": ".",
    "is_input": false,
    "is_generated": true,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "TextOutput",
    "value": " It",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " is",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " not",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " only",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " the",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " largest",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " city",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " in",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " France",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " but",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " also",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " one",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " of",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " the",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " most",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " important",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " cultural",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " and",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " commercial",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " cent",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": "ers",
    "is_input": false,
    "is_generated": true,
    "is_force_forwarded": false,
    "latency_ms": 0,
  },
  {
    "class_name": "TextOutput",
    "value": " in",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  },
  {
    "class_name": "TextOutput",
    "value": " Europe",
    "is_input": false,
    "is_generated": false,
    "is_force_forwarded": true,
    "latency_ms": 0
  }
];


================================================
FILE: client/graphpaper-inline/src/stitch.ts
================================================
// Interfaces for working with guidance messages and stitch.

import { writable } from 'svelte/store';


export interface NodeAttr {
    class_name: string
}

export interface TextOutput extends NodeAttr {
    class_name: 'TextOutput' | 'TokenOutput',
    value: string,
    is_input: boolean,
    is_generated: boolean,
    is_force_forwarded: boolean,
    latency_ms: number,
}

export interface TokenOutput extends TextOutput {
    class_name: 'TokenOutput',
    token: Token,
    top_k: Array<Token>,
}

export interface Token {
    token: string,
    bytes: string,
    prob: number,
    masked: boolean,
}

export interface Backtrack extends NodeAttr {
    class_name: 'Backtrack',
    n_tokens: number,
    bytes: string,
}

export interface ImageOutput extends NodeAttr {
    class_name: 'ImageOutput',
    value: string,
    format: string,
    is_input: boolean,
}

export interface AudioOutput extends NodeAttr {
    class_name: 'AudioOutput',
    value: string,
    format: string,
    is_input: boolean,
}

export interface VideoOutput extends NodeAttr {
    class_name: 'VideoOutput',
    value: string,
    format: string,
    is_input: boolean,
}

export interface RoleOpenerInput extends NodeAttr {
    class_name: 'RoleOpenerInput',
    name?: string,
    text?: string,
    closer_text?: string,
}

export interface RoleCloserInput extends NodeAttr {
    class_name: 'RoleCloserInput',
    name?: string,
    text?: string,
}

export interface GuidanceMessage {
    message_id: number,
    class_name: string,
}

export interface TraceMessage extends GuidanceMessage {
    class_name: 'TraceMessage',
    trace_id: number,
    parent_trace_id?: number,
    node_attr?: NodeAttr,
}

export interface ResetDisplayMessage extends GuidanceMessage {
    class_name: 'ResetDisplayMessage'
}

export interface ExecutionStartedMessage extends GuidanceMessage {
    class_name: 'ExecutionStartedMessage',
}

export interface ExecutionCompletedMessage extends GuidanceMessage {
    class_name: 'ExecutionCompletedMessage',
    last_trace_id?: number,
}

export interface ClientReadyMessage extends GuidanceMessage {
    class_name: 'ClientReadyMessage'
}

export interface ClientReadyAckMessage extends GuidanceMessage {
    class_name: 'ClientReadyAckMessage'
}

export interface OutputRequestMessage extends GuidanceMessage {
    class_name: 'OutputRequestMessage',
    identifier: string
}

export interface OutputRequestAckMessage extends GuidanceMessage {
    class_name: 'OutputRequestAckMessage'
}

export interface MetricMessage extends GuidanceMessage {
    class_name: 'MetricMessage',
    name: string,
    value: number | string | Array<number> | Array<string>,
    scalar: boolean,
}

export interface StitchMessage {
    type: "resize" | "clientmsg" | "kernelmsg" | "state" | "init_state" | "init_stitch",
    content: any
}

export function isGuidanceMessage(o: GuidanceMessage | undefined | null): o is GuidanceMessage {
    if (o === undefined || o === null) return false;
    return o.hasOwnProperty("class_name") && o.hasOwnProperty("message_id");
}

export function isTraceMessage(o: GuidanceMessage | undefined | null): o is TraceMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "TraceMessage";
}

export function isBacktrack(o: NodeAttr | undefined | null): o is Backtrack {
    if (o === undefined || o === null) return false;
    return o.class_name === "Backtrack";
}

export function isRoleOpenerInput(o: NodeAttr | undefined | null): o is RoleOpenerInput {
    if (o === undefined || o === null) return false;
    return o.class_name === "RoleOpenerInput";
}

export function isRoleCloserInput(o: NodeAttr | undefined | null): o is RoleCloserInput {
    if (o === undefined || o === null) return false;
    return o.class_name === "RoleCloserInput";
}

export function isTextOutput(o: NodeAttr | undefined | null): o is TextOutput {
    if (o === undefined || o === null) return false;
    return o.class_name === "TextOutput" || o.class_name === "TokenOutput";
}

export function isTokenOutput(o: NodeAttr | undefined | null): o is TokenOutput {
    if (o === undefined || o === null) return false;
    return o.class_name === "TokenOutput";
}

export function isImageOutput(o: NodeAttr | undefined | null): o is ImageOutput {
    if (o === undefined || o === null) return false;
    return o.class_name === "ImageOutput";
}

export function isAudioOutput(o: NodeAttr | undefined | null): o is AudioOutput {
    if (o === undefined || o === null) return false;
    return o.class_name === "AudioOutput";
}

export function isVideoOutput(o: NodeAttr | undefined | null): o is VideoOutput {
    if (o === undefined || o === null) return false;
    return o.class_name === "VideoOutput";
}

export function isResetDisplayMessage(o: GuidanceMessage | undefined | null): o is ResetDisplayMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "ResetDisplayMessage";
}

export function isMetricMessage(o: GuidanceMessage | undefined | null): o is MetricMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "MetricMessage";
}

export function isClientReadyAckMessage(o: GuidanceMessage | undefined | null): o is MetricMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "ClientReadyAckMessage";
}

export function isOutputRequestAckMessage(o: GuidanceMessage | undefined | null): o is MetricMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "OutputRequestAckMessage";
}

export function isExecutionCompletedMessage(o: GuidanceMessage | undefined | null): o is ExecutionCompletedMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "ExecutionCompletedMessage";
}

export function isExecutionStartedMessage(o: GuidanceMessage | undefined | null): o is ExecutionStartedMessage {
    if (o === undefined || o === null) return false;
    return o.class_name === "ExecutionStartedMessage";
}

export const kernelmsg = writable<StitchMessage | undefined>(undefined);
export const clientmsg = writable<StitchMessage | undefined>(undefined);
export const state = writable<StitchMessage | undefined>(undefined);

export enum Status {
  Running = 'Running',
  Error = 'Error',
  Done = 'Done',
}

================================================
FILE: client/graphpaper-inline/src/template.html
================================================
<!doctype html>
<html lang="en">
<head>
    <meta charset="utf-8">

    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,100..800;1,100..800&family=Roboto+Mono:ital,wght@0,100..700;1,100..700&display=swap" rel="stylesheet">
</head>
<body>
</body>
</html>


================================================
FILE: client/graphpaper-inline/tsconfig.json
================================================
{
	"compilerOptions": {
		"allowJs": true,
		"checkJs": true,
		"esModuleInterop": true,
		"forceConsistentCasingInFileNames": true,
		"resolveJsonModule": true,
		"skipLibCheck": true,
		"sourceMap": true,
		"strict": true,
        "verbatimModuleSyntax": true,
		"module": "ESNext",
		"moduleResolution": "bundler"
	}
}

================================================
FILE: docs/.readthedocs.yaml
================================================
# version: 2

# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the OS, Python version and other tools you might need
build:
  os: ubuntu-22.04
  tools:
    python: "3.10"
    # You can also specify other tool versions:
    # nodejs: "20"
    # rust: "1.70"
    # golang: "1.20"

# Build documentation in the "docs/" directory with Sphinx
sphinx:
  configuration: docs/conf.py
  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
  # builder: "dirhtml"
  # Fail on all warnings to avoid broken references
  # fail_on_warning: true

# Optionally build your docs in additional formats such as PDF and ePub
# formats:
#   - pdf
#   - epub

# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
# python:
#   install:
#     - requirements: docs/requirements.txt
#    version: 3.8
python:
   install:
      - method: pip
        path: .
        extra_requirements:
            - docs


================================================
FILE: docs/Makefile
================================================
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
PAPER         =
BUILDDIR      = _build

# Internal variables.
PAPEROPT_a4     = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help
help:
	@echo "Please use \`make <target>' where <target> is one of"
	@echo "  html       to make standalone HTML files"
	@echo "  dirhtml    to make HTML files named index.html in directories"
	@echo "  singlehtml to make a single large HTML file"
	@echo "  pickle     to make pickle files"
	@echo "  json       to make JSON files"
	@echo "  htmlhelp   to make HTML files and a HTML help project"
	@echo "  qthelp     to make HTML files and a qthelp project"
	@echo "  applehelp  to make an Apple Help Book"
	@echo "  devhelp    to make HTML files and a Devhelp project"
	@echo "  epub       to make an epub"
	@echo "  epub3      to make an epub3"
	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
	@echo "  text       to make text files"
	@echo "  man        to make manual pages"
	@echo "  texinfo    to make Texinfo files"
	@echo "  info       to make Texinfo files and run them through makeinfo"
	@echo "  gettext    to make PO message catalogs"
	@echo "  changes    to make an overview of all changed/added/deprecated items"
	@echo "  xml        to make Docutils-native XML files"
	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
	@echo "  linkcheck  to check all external links for integrity"
	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
	@echo "  coverage   to run coverage check of the documentation (if enabled)"
	@echo "  dummy      to check syntax errors of document sources"

.PHONY: clean
clean:
	rm -rf $(BUILDDIR)/*

.PHONY: html
html:
	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

.PHONY: dirhtml
dirhtml:
	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

.PHONY: singlehtml
singlehtml:
	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
	@echo
	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

.PHONY: pickle
pickle:
	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
	@echo
	@echo "Build finished; now you can process the pickle files."

.PHONY: json
json:
	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
	@echo
	@echo "Build finished; now you can process the JSON files."

.PHONY: htmlhelp
htmlhelp:
	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
	@echo
	@echo "Build finished; now you can run HTML Help Workshop with the" \
	      ".hhp project file in $(BUILDDIR)/htmlhelp."

.PHONY: qthelp
qthelp:
	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
	@echo
	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/guidance.qhcp"
	@echo "To view the help file:"
	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/guidance.qhc"

.PHONY: applehelp
applehelp:
	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
	@echo
	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
	@echo "N.B. You won't be able to view it unless you put it in" \
	      "~/Library/Documentation/Help or install it in your application" \
	      "bundle."

.PHONY: devhelp
devhelp:
	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
	@echo
	@echo "Build finished."
	@echo "To view the help file:"
	@echo "# mkdir -p $$HOME/.local/share/devhelp/guidance"
	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/guidance"
	@echo "# devhelp"

.PHONY: epub
epub:
	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
	@echo
	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."

.PHONY: epub3
epub3:
	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
	@echo
	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."

.PHONY: latex
latex:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo
	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
	@echo "Run \`make' in that directory to run these through (pdf)latex" \
	      "(use \`make latexpdf' here to do that automatically)."

.PHONY: latexpdf
latexpdf:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo "Running LaTeX files through pdflatex..."
	$(MAKE) -C $(BUILDDIR)/latex all-pdf
	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

.PHONY: latexpdfja
latexpdfja:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo "Running LaTeX files through platex and dvipdfmx..."
	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

.PHONY: text
text:
	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
	@echo
	@echo "Build finished. The text files are in $(BUILDDIR)/text."

.PHONY: man
man:
	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
	@echo
	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

.PHONY: texinfo
texinfo:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo
	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
	@echo "Run \`make' in that directory to run these through makeinfo" \
	      "(use \`make info' here to do that automatically)."

.PHONY: info
info:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo "Running Texinfo files through makeinfo..."
	make -C $(BUILDDIR)/texinfo info
	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

.PHONY: gettext
gettext:
	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
	@echo
	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

.PHONY: changes
changes:
	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
	@echo
	@echo "The overview file is in $(BUILDDIR)/changes."

.PHONY: linkcheck
linkcheck:
	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
	@echo
	@echo "Link check complete; look for any errors in the above output " \
	      "or in $(BUILDDIR)/linkcheck/output.txt."

.PHONY: doctest
doctest:
	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
	@echo "Testing of doctests in the sources finished, look at the " \
	      "results in $(BUILDDIR)/doctest/output.txt."

.PHONY: coverage
coverage:
	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
	@echo "Testing of coverage in the sources finished, look at the " \
	      "results in $(BUILDDIR)/coverage/python.txt."

.PHONY: xml
xml:
	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
	@echo
	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

.PHONY: pseudoxml
pseudoxml:
	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
	@echo
	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

.PHONY: dummy
dummy:
	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
	@echo
	@echo "Build finished. Dummy builder generates no files."

================================================
FILE: docs/_static/css/styles.css
================================================
.wy-side-nav-search > a img.logo, .wy-side-nav-search .wy-dropdown > a img.logo {
    width: 250px;
    margin-top: 20px;
    margin-bottom: 15px;
}

.wy-side-nav-search>div.version {
    color: black;
}
@media screen and (min-width: 767px) {
   .wy-table-responsive table td {
      white-space: normal;
   }
   .wy-table-responsive {
      overflow: visible;
   }
}

/* .wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo {
   max-width: 40%;
} */

.wy-side-nav-search>div.version {
   color: #d9d9d9;
}

.wy-nav-top {
   background: #343131;
}

.highlight {
   background: #f7f7f7;
}

.wy-side-nav-search input[type=text] {
   border-color: #666666;
}

a {
   color: #008bfb;
}

a:hover {
   color: #008bfb;
}

a:visited {
   color: #008bfb;
}

html.writer-html4 .rst-content dl:not(.docutils)>dt, html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt {
   background: #008bfb11;
   color: #0086f6;
   border-top: 3px solid #008bfbaa;
}

.rst-versions .rst-current-version {
   color: #fcfcfc;
}

.wy-menu-vertical a {
   color: #d9d9d9;
}

section h2 {
   margin-top: 30px;
}

.rst-content code.literal, .rst-content tt.literal {
   color: #008bfb;
}

================================================
FILE: docs/api.rst
================================================
.. currentmodule:: guidance

API Reference
=============
This page contains the API reference for public objects and functions in Guidance.


.. _functions_api:

functions
---------
.. autosummary::
    :toctree: generated/

    guidance.gen
    guidance.select
    guidance.json


.. _contexts_api:

context blocks
--------------
.. autosummary::
    :toctree: generated/

    guidance.instruction
    guidance.system
    guidance.user
    guidance.assistant


.. _models_api:

models
------
.. autosummary::
    :toctree: generated/

    guidance.models.Model
    guidance.models.Mock
    guidance.models.LlamaCpp
    guidance.models.Transformers
    guidance.models.Anthropic
    guidance.models.AzureOpenAI
    guidance.models.Cohere
    guidance.models.GoogleAI
    guidance.models.LiteLLM
    guidance.models.OpenAI
    guidance.models.VertexAI


================================================
FILE: docs/api_examples.rst
================================================
.. currentmodule:: guidance

.. _api_examples:

API Examples
------------

These examples parallel the namespace structure of Guidance. Each object or function in Guidance has a 
corresponding example notebook here that demonstrates its API usage. The source notebooks
are `available on GitHub <https://github.com/guidance-ai/guidance/tree/master/notebooks/api_examples>`_.


.. _functions_examples:

functions
=========
.. Examples for built-in guidance functions.

.. toctree::
    :glob:
    :maxdepth: 1

    example_notebooks/api_examples/library/*


.. _models_examples:

models
======
.. Examples for members of :ref:`guidance.models <models_api>`.

.. toctree::
    :glob:
    :maxdepth: 1

    example_notebooks/api_examples/models/*


================================================
FILE: docs/art_of_prompt_design.rst
================================================
.. currentmodule:: guidance

.. _art_of_prompt_design:

The Art of Prompt Design
------------------------

These notebooks demonstrate how to design effective prompts and guidance programs, they also cover common useful
design patterns. The source notebooks are `available on GitHub <https://github.com/guidance-ai/guidance/tree/master/notebooks/art_of_prompt_design>`_.


.. toctree::
    :glob:
    :maxdepth: 1

    example_notebooks/art_of_prompt_design/use_clear_syntax.ipynb
    example_notebooks/art_of_prompt_design/prompt_boundaries_and_token_healing.ipynb
    example_notebooks/art_of_prompt_design/tool_use.ipynb
    example_notebooks/art_of_prompt_design/react.ipynb
    example_notebooks/art_of_prompt_design/rag.ipynb

================================================
FILE: docs/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Guidance documentation build configuration file, created by
# sphinx-quickstart on Tue May 22 10:44:55 2018.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.


# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import shutil
import sys

import sphinx_rtd_theme

print(os.path.abspath("./guidance"))
sys.path.insert(0, os.path.abspath(".."))

# make copy of notebooks in docs folder, as they must be here for sphinx to
# pick them up properly.
NOTEBOOKS_DIR = os.path.abspath("example_notebooks")
if os.path.exists(NOTEBOOKS_DIR):
    import warnings

    warnings.warn("example_notebooks directory exists, replacing...")
    shutil.rmtree(NOTEBOOKS_DIR)
shutil.copytree(
    os.path.abspath("../notebooks"),
    NOTEBOOKS_DIR,
)
if os.path.exists(NOTEBOOKS_DIR + "/local_scratch"):
    shutil.rmtree(NOTEBOOKS_DIR + "/local_scratch")


# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.autosummary",
    "sphinx_rtd_theme",
    "numpydoc",
    "nbsphinx",
]
autodoc_default_options = {"members": True, "inherited-members": True}
autosummary_generate = True
numpydoc_show_class_members = False

# Don't try to execute the notebooks, they overwhelm RTD
nbsphinx_execute = "never"

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"

# The encoding of source files.
#
# source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = "index"

# General information about the project.
project = "Guidance"
copyright = "2023, Guidance contributors"
author = "Scott Lundberg, Marco Tulio Ribeiro"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = "latest"
# The full version, including alpha/beta/rc tags.
release = "latest"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#
# today = ''
#
# Else, today_fmt is used as the format for a strftime call.
#
# today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The reST default role (used for this markup: `text`) to use for all
# documents.
#
# default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
#
# add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#
# add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#
# show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []

# If true, keep warnings as "system message" paragraphs in the built documents.
# keep_warnings = False

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
# html_theme = 'alabaster'
html_theme = "sphinx_rtd_theme"

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    #'canonical_url': '',
    "logo_only": True,
    "display_version": True,
    "prev_next_buttons_location": "bottom",
    "style_external_links": False,
    "style_nav_header_background": "#343131",
    # Toc options
    "collapse_navigation": True,
    "sticky_navigation": True,
    "navigation_depth": 4,
    "includehidden": True,
    "titles_only": False,
}
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = "figures/guidance_logo_white_dark.svg"

# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []

# The name for this set of Sphinx documents.
# "<project> v<release> documentation" by default.
#
# html_title = 'Guidance'

# A shorter title for the navigation bar.  Default is the same as html_title.
#
# html_short_title = None

# The name of an image file (relative to this directory) to use as a favicon of
# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#
html_favicon = "figures/favicon.ico"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]

html_css_files = ["css/styles.css"]

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#
# html_extra_path = []

# If not None, a 'Last updated on:' timestamp is inserted at every page
# bottom, using the given strftime format.
# The empty string is equivalent to '%b %d, %Y'.
#
# html_last_updated_fmt = None

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#
# html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
#
# html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names to
# template names.
#
# html_additional_pages = {}

# If false, no module index is generated.
#
# html_domain_indices = True

# If false, no index is generated.
#
# html_use_index = True

# If true, the index is split into individual pages for each letter.
#
# html_split_index = False

# If true, links to the reST sources are added to the pages.
#
# html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#
# html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#
# html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it.  The value of this option must be the
# base URL from which the finished HTML is served.
#
# html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None

# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
#   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
#   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
#
html_search_language = "en"

# A dictionary with options for the search language support, empty by default.
# 'ja' uses this config value.
# 'zh' user can custom change `jieba` dictionary path.
#
# html_search_options = {'type': 'default'}

# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#
# html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = "Guidance_doc"

# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, "Guidance.tex", "Guidance Documentation", "Scott Lundberg", "manual"),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#
# latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#
# latex_use_parts = False

# If true, show page references after internal links.
#
# latex_show_pagerefs = False

# If true, show URL addresses after external links.
#
# latex_show_urls = False

# Documents to append as an appendix to all manuals.
#
# latex_appendices = []

# It false, will not define \strong, \code, 	itleref, \crossref ... but only
# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
# packages.
#
# latex_keep_old_macro_names = True

# If false, no module index is generated.
#
# latex_domain_indices = True


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "guidance", "Guidance Documentation", [author], 1)]

# If true, show URL addresses after external links.
#
# man_show_urls = False


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        master_doc,
        "Guidance",
        "Guidance Documentation",
        author,
        "Guidance",
        "One line description of project.",
        "Miscellaneous",
    ),
]

# Documents to append as an appendix to all manuals.
#
# texinfo_appendices = []

# If false, no module index is generated.
#
# texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#
# texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
#
# texinfo_no_detailmenu = False


def setup(app):
    from guidance import guidance

    app.connect("build-finished", build_finished)


def build_finished(app, exception):
    shutil.rmtree(NOTEBOOKS_DIR)


================================================
FILE: docs/index.rst
================================================

.. image:: figures/guidance_logo_blue.svg
   :width: 300px
   :align: center
|

**Guidance** enables you to control modern language models more effectively and efficiently than traditional prompting or chaining. Guidance programs allow you to interleave generation, prompting, and logical control into a single continuous flow matching how the language model actually processes the text.

Install
=======

Guidance can be installed from `PyPI <https://pypi.org/project/guidance>`_::

   pip install guidance


Contents
========

.. toctree::
   :maxdepth: 2

   Tutorials <tutorials>
   API reference <api>
   API examples <api_examples>
   The Art of Prompt Design <art_of_prompt_design>


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)

if "%1" == "" goto help

if "%1" == "help" (
	:help
	echo.Please use `make ^<target^>` where ^<target^> is one of
	echo.  html       to make standalone HTML files
	echo.  dirhtml    to make HTML files named index.html in directories
	echo.  singlehtml to make a single large HTML file
	echo.  pickle     to make pickle files
	echo.  json       to make JSON files
	echo.  htmlhelp   to make HTML files and a HTML help project
	echo.  qthelp     to make HTML files and a qthelp project
	echo.  devhelp    to make HTML files and a Devhelp project
	echo.  epub       to make an epub
	echo.  epub3      to make an epub3
	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
	echo.  text       to make text files
	echo.  man        to make manual pages
	echo.  texinfo    to make Texinfo files
	echo.  gettext    to make PO message catalogs
	echo.  changes    to make an overview over all changed/added/deprecated items
	echo.  xml        to make Docutils-native XML files
	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
	echo.  linkcheck  to check all external links for integrity
	echo.  doctest    to run all doctests embedded in the documentation if enabled
	echo.  coverage   to run coverage check of the documentation if enabled
	echo.  dummy      to check syntax errors of document sources
	goto end
)

if "%1" == "clean" (
	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
	del /q /s %BUILDDIR%\*
	goto end
)


REM Check if sphinx-build is available and fallback to Python version if any
%SPHINXBUILD% 1>NUL 2>NUL
if errorlevel 9009 goto sphinx_python
goto sphinx_ok

:sphinx_python

set SPHINXBUILD=python -m sphinx.__init__
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

:sphinx_ok


if "%1" == "html" (
	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
	goto end
)

if "%1" == "dirhtml" (
	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
	goto end
)

if "%1" == "singlehtml" (
	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
	goto end
)

if "%1" == "pickle" (
	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the pickle files.
	goto end
)

if "%1" == "json" (
	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can process the JSON files.
	goto end
)

if "%1" == "htmlhelp" (
	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
	goto end
)

if "%1" == "qthelp" (
	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\guidance.qhcp
	echo.To view the help file:
	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\guidance.ghc
	goto end
)

if "%1" == "devhelp" (
	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished.
	goto end
)

if "%1" == "epub" (
	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The epub file is in %BUILDDIR%/epub.
	goto end
)

if "%1" == "epub3" (
	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
	goto end
)

if "%1" == "latex" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdf" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf
	cd %~dp0
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "latexpdfja" (
	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
	cd %BUILDDIR%/latex
	make all-pdf-ja
	cd %~dp0
	echo.
	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
	goto end
)

if "%1" == "text" (
	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The text files are in %BUILDDIR%/text.
	goto end
)

if "%1" == "man" (
	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The manual pages are in %BUILDDIR%/man.
	goto end
)

if "%1" == "texinfo" (
	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
	goto end
)

if "%1" == "gettext" (
	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
	goto end
)

if "%1" == "changes" (
	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
	if errorlevel 1 exit /b 1
	echo.
	echo.The overview file is in %BUILDDIR%/changes.
	goto end
)

if "%1" == "linkcheck" (
	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
	if errorlevel 1 exit /b 1
	echo.
	echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
	goto end
)

if "%1" == "doctest" (
	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
	if errorlevel 1 exit /b 1
	echo.
	echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
	goto end
)

if "%1" == "coverage" (
	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
	if errorlevel 1 exit /b 1
	echo.
	echo.Testing of coverage in the sources finished, look at the ^
results in %BUILDDIR%/coverage/python.txt.
	goto end
)

if "%1" == "xml" (
	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The XML files are in %BUILDDIR%/xml.
	goto end
)

if "%1" == "pseudoxml" (
	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
	goto end
)

if "%1" == "dummy" (
	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
	if errorlevel 1 exit /b 1
	echo.
	echo.Build finished. Dummy builder generates no files.
	goto end
)

:end

================================================
FILE: docs/tutorials.rst
================================================
.. currentmodule:: guidance

.. _tutorials:

Tutorials
----------------

These notebooks demonstrate various features of `guidance``. The source notebooks
are `available on GitHub <https://github.com/guidance-ai/guidance/tree/master/notebooks/api_examples>`_.


.. toctree::
    :glob:
    :maxdepth: 1

    example_notebooks/tutorials/intro_to_guidance.ipynb
    example_notebooks/tutorials/token_healing.ipynb
    example_notebooks/tutorials/regex_constraints.ipynb
    example_notebooks/tutorials/guidance_acceleration.ipynb
    example_notebooks/tutorials/code_generation.ipynb
    example_notebooks/tutorials/chat.ipynb

================================================
FILE: guidance/__init__.py
================================================
__version__ = "0.3.2"

import sys
from types import ModuleType

from . import library, models
from ._guidance import guidance
from ._tools import Tool

# we expose all the library functions at the top level of the module
from .library import *  # noqa: F403

__all__ = [
    "guidance",
    "library",
    "models",
    "Tool",
    *library.__all__,
]


# This makes the guidance module callable
class _Guidance(ModuleType):
    def __call__(self, f=None, *, stateless=False, cache=None, dedent=True, model=models.Model):
        return guidance(f, stateless=stateless, cache=cache, dedent=dedent, model=model)


sys.modules[__name__].__class__ = _Guidance


================================================
FILE: guidance/_ast.py
================================================
import copy
import json
import re
import textwrap
import warnings
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from functools import cached_property
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Iterator,
    Literal,
    Sequence,
    TypedDict,
    TypeVar,
    Union,
    cast,
)

from llguidance import LLMatcher
from pydantic import Base64Bytes
from typing_extensions import assert_never

from ._parser import ByteParser, ByteParserException
from ._tools import Tool
from .trace import OutputAttr

if TYPE_CHECKING:
    from .models._base import Interpreter, State

# to support the embedding of guidance functions inside Python f-strings we use tags with these delimiters
tag_start = "{{G|"  # start of a call tag
tag_end = "|G}}"  # end of a call tag
_tag_pool: dict[str, Union["Function", "GrammarNode"]] = {}  # the Functions or GrammarNodes associated with the tags
_tag_pattern = re.compile(re.escape(tag_start) + r"([^\|]+)" + re.escape(tag_end))  # the pattern for matching call tags


def _parse_tags(s: str) -> Union["GrammarNode", "Function"]:
    parts = cast(list[str], _tag_pattern.split(s))
    obj: GrammarNode = LiteralNode(parts.pop(0))
    is_tag = True
    for part in parts:
        if is_tag:
            obj += _tag_pool[part]
        else:
            obj += LiteralNode(part)
        is_tag = not is_tag
    return obj


class Tagged:
    def __str__(self):
        """Creates a string tag that can be used to retrieve this object."""

        # save the call in our call pool, ready to be run when it is attached to an LM object
        str_id = str(id(self))
        if str_id not in _tag_pool:
            _tag_pool[str_id] = self

        # return a string representation of this call so it can be combined with other strings/calls
        return tag_start + str_id + tag_end


class Match:
    def __init__(self, captures, log_probs, partial):
        self.captures = captures
        self.log_probs = log_probs
        self.partial = partial

    def __getitem__(self, key):
        return self.captures[key]

    def __len__(self):
        return len(self.captures)

    def __bool__(self):
        return True

    def __str__(self):
        return str(self.captures)

    def __repr__(self):
        return "<guidance.Match object; captures=" + str(self.captures) + "; partial=" + str(self.partial) + ">"


class StatefulException(Exception):
    """This is raised when we try and use the state of a grammar object like it was a live model.

    Note that eventually it would be nice to support stateful parser/grammar constructs directly, but
    such "parser combinators" cannot be run effciently in Python. So we use a traditional parser and
    grammar separation (hence the need for this exception)."""

    pass


@dataclass
class Function(Tagged):
    name: str = field(init=False)
    f: Callable
    args: tuple[Any, ...]
    kwargs: dict[str, Any]

    def __post_init__(self):
        self.name = self.f.__name__

    def __call__(self, model):
        model = self.f(model, *self.args, **self.kwargs)
        if model is None:
            raise Exception(
                f"The guidance function `{self.f.__name__}` did not return a model object! You need to return an updated model object at the end of your guidance function."
            )
        return model

    def __add__(self, other):
        if not isinstance(other, (str, GrammarNode, Function)):
            return NotImplemented

        if isinstance(other, str):
            other = _parse_tags(other)

        if isinstance(other, GrammarNode) and other.is_null:
            return self

        def __add__(model):
            return self(model) + other

        return Function(__add__, [], {})

    def __radd__(self, other):
        if not isinstance(other, (str, GrammarNode, Function)):
            return NotImplemented

        if isinstance(other, str):
            other = _parse_tags(other)

        if isinstance(other, GrammarNode) and other.is_null:
            return self

        def __radd__(model):
            return self(model + other)

        return Function(__radd__, [], {})


S = TypeVar("S", bound="State")


class ASTNode(ABC):
    @abstractmethod
    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        pass

    def simplify(self) -> "ASTNode":
        return self


@dataclass
class RoleStart(ASTNode):
    role: str

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter._role_start(self, **kwargs)


@dataclass
class RoleEnd(ASTNode):
    role: str

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter._role_end(self, **kwargs)


@dataclass
class ImageBlob(ASTNode):
    data: Base64Bytes

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.image_blob(self, **kwargs)


@dataclass
class ImageUrl(ASTNode):
    url: str

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.image_url(self, **kwargs)


@dataclass
class AudioBlob(ASTNode):
    data: Base64Bytes

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.audio_blob(self, **kwargs)


class GenAudio(ASTNode):
    def __init__(self, kwargs: dict[str, Any]):
        self.kwargs = kwargs

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.gen_audio(self, **kwargs)


@dataclass(frozen=True)
class GrammarNode(Tagged, ASTNode):
    @property
    def is_null(self) -> bool:
        """
        If this returns true, then this node matches empty string and empty string only.
        """
        return False

    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        """
        If this returns true, then this node will be compiled down to a regular expression.
        It cannot be recursive.
        """
        return all(child.is_allowed_in_lark_terminal for child in self.children())

    @property
    def is_allowed_in_lark_rule_with_attrs(self) -> bool:
        """
        If this returns true, then this node can be used as a Lark rule with attributes.
        """
        # Typically, not being allowed in terminal implies that a node is not allowed in a rule with attributes,
        # however this is notably false for subgrammars
        return self.is_allowed_in_lark_terminal

    def simplify(self) -> "GrammarNode":
        return self

    def children(self) -> Sequence["GrammarNode"]:
        return ()

    def __add__(self, other) -> "GrammarNode":
        if not isinstance(other, (str, GrammarNode)):
            return NotImplemented

        if isinstance(other, str):
            other = _parse_tags(other)

        if self.is_null:
            return other

        if isinstance(other, Function):
            return other.__radd__(self)

        if other.is_null:
            return self

        return JoinNode((self, other))

    def __radd__(self, other) -> "GrammarNode":
        if not isinstance(other, (str, GrammarNode)):
            return NotImplemented

        if isinstance(other, str):
            other = _parse_tags(other)

        if self.is_null:
            return other

        if isinstance(other, Function):
            return other.__add__(self)

        if other.is_null:
            return self

        return JoinNode((other, self))

    def __getitem__(self, key):
        raise StatefulException("GrammarNodes can't access state!")

    def match(
        self,
        byte_string: str | bytes,
        allow_partial: bool = False,
        raise_exceptions: bool = False,
        enforce_max_tokens: bool = True,
    ) -> Match | None:
        if isinstance(byte_string, str):
            byte_string = byte_string.encode()
        parser = ByteParser(self.ll_grammar(enforce_max_tokens=enforce_max_tokens))

        try:
            parser.consume_bytes(byte_string)
            if not allow_partial:
                parser.force_done()
        except ByteParserException:
            if raise_exceptions:
                raise
            else:
                return None

        if not allow_partial and not parser.matched():
            return None

        if parser.matched():
            parser.force_done()

        return Match(*parser.get_captures(), partial=not parser.matched())  # type: ignore[misc]

    def forced_prefix(self) -> str:
        parser = ByteParser(self.ll_grammar())
        return parser.bytes.decode("utf-8", errors="ignore")

    def ll_grammar(self, enforce_max_tokens: bool = True) -> str:
        lark_str = LarkSerializer(enforce_max_tokens=enforce_max_tokens).serialize(self.simplify())
        return lark_str


@dataclass(frozen=True)
class LiteralNode(GrammarNode):
    value: str

    @property
    def is_null(self) -> bool:
        return self.value == ""

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.text(self, **kwargs)


@dataclass(frozen=True)
class SpecialToken(GrammarNode):
    text: str | None = None
    id: int | None = None
    range: tuple[int, int] | None = None

    def __post_init__(self):
        if [self.text, self.id, self.range].count(None) != 2:
            raise ValueError("Exactly one of text, id, or range must be set")

    def format(self) -> str:
        if self.text is not None:
            return f"<{self.text}>"
        if self.id is not None:
            return f"<[{self.id}]>"
        if self.range is not None:
            return f"<[{self.range[0]}-{self.range[1]}]>"
        raise ValueError("SpecialToken must have either text, id, or range set")

    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        return False

    @property
    def is_allowed_in_lark_rule_with_attrs(self) -> bool:
        return True

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        # Just use grammar -- I don't think we need a special case for this
        return interpreter.grammar(self, **kwargs)


@dataclass(frozen=True)
class RegexNode(GrammarNode):
    regex: str | None

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.regex(self, **kwargs)


@dataclass(frozen=True)
class SelectNode(GrammarNode):
    alternatives: tuple[GrammarNode, ...]

    @property
    def is_null(self) -> bool:
        return all(alt.is_null for alt in self.alternatives)

    def simplify(self) -> "GrammarNode":
        if self.is_null:
            return LiteralNode("")
        alternatives = tuple(alt.simplify() for alt in self.alternatives if not alt.is_null)
        if len(alternatives) == 1:
            node = alternatives[0]
        else:
            node = SelectNode(alternatives)

        if any(alt.is_null for alt in self.alternatives):
            return RepeatNode(node, 0, 1)
        return node

    def children(self) -> Sequence["GrammarNode"]:
        return self.alternatives

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.select(self, **kwargs)


@dataclass(frozen=True)
class JoinNode(GrammarNode):
    nodes: tuple[GrammarNode, ...]

    @property
    def is_null(self) -> bool:
        return all(node.is_null for node in self.nodes)

    def simplify(self) -> "GrammarNode":
        if self.is_null:
            return LiteralNode("")
        nodes = tuple(node.simplify() for node in self.nodes if not node.is_null)
        if len(nodes) == 1:
            return nodes[0]
        return JoinNode(nodes)

    def children(self) -> Sequence["GrammarNode"]:
        return self.nodes

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.join(self, **kwargs)


@dataclass(frozen=True)
class RepeatNode(GrammarNode):
    node: GrammarNode
    min: int
    max: int | None

    @property
    def is_null(self) -> bool:
        return self.node.is_null or self.min == self.max == 0

    def __post_init__(self):
        if self.min < 0:
            raise ValueError("min must be >= 0")
        if self.max is not None and self.max < self.min:
            raise ValueError("max must be >= min")

    def children(self) -> Sequence["GrammarNode"]:
        return (self.node,)

    def simplify(self) -> GrammarNode:
        return RepeatNode(self.node.simplify(), self.min, self.max)

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.repeat(self, **kwargs)


@dataclass(frozen=True)
class SubstringNode(GrammarNode):
    chunks: tuple[str, ...]

    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        # this can be used as part of bigger regexes
        return True

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.substring(self, **kwargs)


# This creates a name for the given grammar node (value), which can be referenced
# via RuleRefNode (or directly).
# In Lark syntax this results in approx. "{name}: {value}"
# This can either Lark rule (non-terminal) or terminal definition
# (meaning name can be upper- or lowercase).
@dataclass(frozen=True)
class RuleNode(GrammarNode):
    name: str
    value: GrammarNode
    capture: str | None = None
    list_append: bool = False
    temperature: float | None = None
    max_tokens: int | None = None
    stop: RegexNode | LiteralNode | None = None
    suffix: LiteralNode | None = None
    stop_capture: str | None = None
    lazy: bool = False

    def __post_init__(self) -> None:
        if (
            # Note: capture is very intentionally missing from this list, as it's not like the other attributes
            self.temperature is not None
            or self.max_tokens is not None
            or self.stop is not None
            or self.suffix is not None
            or self.stop_capture is not None
            or self.lazy
        ) and not self.value.is_allowed_in_lark_rule_with_attrs:
            raise ValueError("RuleNode is not terminal, so it cannot have a temperature, max_tokens, or stop condition")

    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        check_self = (
            self.capture is None
            and self.temperature is None
            and self.max_tokens is None
            and self.stop is None
            and self.suffix is None
            and self.stop_capture is None
            and not self.lazy
        )
        return check_self and super().is_allowed_in_lark_terminal

    def children(self) -> tuple[GrammarNode]:
        return (self.value,)

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.rule(self, **kwargs)


@dataclass(frozen=True, eq=False)
class RuleRefNode(GrammarNode):
    target: RuleNode | None = field(default=None, init=False)

    def set_target(self, target: RuleNode) -> None:
        if self.target is not None:
            raise ValueError("RuleRefNode target already set")
        # Side-step frozen=True to set target
        object.__setattr__(self, "target", target)

    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        # RuleRefNode should only ever be used to enable recursive rule definitions,
        # so it should never be terminal.
        return False

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        if self.target is None:
            raise ValueError("RuleRefNode target not set")
        return interpreter.rule(self.target)


@dataclass(frozen=True)
class BaseSubgrammarNode(GrammarNode):
    @property
    def is_allowed_in_lark_terminal(self) -> bool:
        return False

    @property
    def is_allowed_in_lark_rule_with_attrs(self) -> bool:
        # Typically, not being allowed in terminal implies that a node is not allowed in a rule with attributes,
        # however this is notably false for subgrammars
        return True


@dataclass(frozen=True)
class SubgrammarNode(BaseSubgrammarNode):
    body: GrammarNode
    skip_regex: str | None = None

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.subgrammar(self, **kwargs)


class LLGJsonCompileOptions(TypedDict):
    # defaults to ","
    item_separator: str | None
    # defaults to ":"
    key_separator: str | None
    # defaults to None - depends on whitespace_flexible
    whitespace_pattern: str | None
    # defaults to true (r"[\x20\x0A\x0D\x09]+"); if false, no whitespace is allowed
    whitespace_flexible: bool | None
    # defaults to false
    coerce_one_of: bool | None
    # ignore unimplemented keywords; defaults to false
    lenient: bool | None


@dataclass(frozen=True, eq=False)
class JsonNode(BaseSubgrammarNode):
    schema: dict[str, Any] | None = None
    llg_options: LLGJsonCompileOptions | None = None

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.json(self, **kwargs)

    @cached_property
    def _llguidance_json(self) -> dict[str, Any]:
        if self.schema is None:
            # The user did not pass a schema. Let's assume that they want an object
            # (this should match the behavior of most remote providers)
            schema = {"type": "object"}
        else:
            # shallow copy is ok
            schema = copy.copy(self.schema)

        if self.llg_options is not None:
            # Maybe TODO: let LLGJsonCompileOptions be non-total
            # and update the schema with any present options
            # (in case x-guidance was already set with some options)
            schema["x-guidance"] = self.llg_options
        return schema

    def _llguidance_validate(self) -> None:
        """Validate the JSON schema with `llguidance` and warn about any issues."""
        grm = LLMatcher.grammar_from_json_schema(self._llguidance_json)
        is_err, messages = LLMatcher.validate_grammar_with_warnings(grm)
        if is_err:
            raise ValueError(messages[0])
        else:
            # this will warn about oneOf coercion, and any other unsupported features if lenient is enabled
            for message in messages:
                warnings.warn(message, stacklevel=2)


@dataclass(frozen=True, eq=False)
class LarkNode(BaseSubgrammarNode):
    lark_grammar: str

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.lark(self, **kwargs)


@dataclass(frozen=True)
class ToolCallNode(ASTNode):
    tools: dict[str, Tool]
    tool_choice: Literal["auto", "required"] = "auto"
    parallel_tool_calls: bool = False
    plaintext_regex: str | None = None

    @classmethod
    def from_tools(
        cls,
        tools: list[Callable | Tool],
        tool_choice: Literal["auto", "required"] = "auto",
        parallel_tool_calls: bool = False,
        plaintext_regex: str | None = None,
    ) -> "ToolCallNode":
        tool_defs = {}
        for tool in tools:
            if isinstance(tool, Tool):
                tool_def = tool
            elif callable(tool):
                tool_def = Tool.from_callable(tool)
            else:
                raise ValueError(f"Unsupported tool type: {type(tool)}")
            if tool_def.name in tool_defs:
                raise ValueError(f"Duplicate tool name: {tool_def.name}")
            tool_defs[tool_def.name] = tool_def
        return cls(
            tools=tool_defs,
            tool_choice=tool_choice,
            parallel_tool_calls=parallel_tool_calls,
            plaintext_regex=plaintext_regex,
        )

    def __post_init__(self):
        if not self.tools:
            raise ValueError("ToolCallNode must have at least one tool")

    def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[OutputAttr]:
        return interpreter.tool_call(self, **kwargs)


class LarkSerializer:
    def __init__(self, enforce_max_tokens: bool = True):
        self.enforce_max_tokens = enforce_max_tokens
        self.rules: dict[str, str] = {}
        self.names: dict[RuleNode, str] = {}

    def serialize(self, node: GrammarNode) -> str:
        if isinstance(node, RuleNode) and node.name == "start":
            self.visit(node)
        else:
            self.visit(RuleNode("start", node))

        res = "%llguidance {}\n\n"
        if "start" not in self.rules:
            assert "START" in self.rules
            res += "start: START\n"

        prev_nl = True
        for name in self.names.values():
            s = self.rules[name]
            if not prev_nl and "\n" in s:
                res += "\n"
            res += s + "\n"
            prev_nl = "\n" in s
            if prev_nl:
                res += "\n"

        return res

    def visit(self, node: GrammarNode, top=False) -> str:
        if isinstance(node, RuleNode):
            if node in self.names:
                return self.names[node]

            name = self.normalize_name(node.name, node.is_allowed_in_lark_terminal)
            names = set(self.names.values())
            if name in names:
                i = 1
                while f"{name}_{i}" in names:
                    i += 1
                name = f"{name}_{i}"
            self.names[node] = name

            res = name
            attrs = []
            if node.capture is not None:
                capture_name = node.capture
                if node.list_append:
                    capture_name = f"__LIST_APPEND:{capture_name}"
                if capture_name != name:
                    attrs.append(f"capture={json.dumps(capture_name)}")
                else:
                    attrs.append("capture")
            if node.temperature is not None:
                attrs.append(f"temperature={node.temperature}")
            if self.enforce_max_tokens and node.max_tokens is not None:
                attrs.append(f"max_tokens={node.max_tokens}")
            if node.stop:
                attrs.append(f"stop={self.visit(node.stop)}")
            if node.suffix:
                attrs.append(f"suffix={self.visit(node.suffix)}")
            if node.stop_capture:
                attrs.append(f"stop_capture={json.dumps(node.stop_capture)}")
            if node.lazy:
                attrs.append("lazy")
            if attrs:
                res += f"[{', '.join(attrs)}]"

            res += ": "
            target = node.value
            if isinstance(target, JsonNode):
                res += "%json " + json.dumps(target._llguidance_json, indent=2)
            elif isinstance(target, LarkNode):
                # TODO: we can't decide whether or not to enforce max tokens here easily.
                # We could in principle parse the grammar and/or use a regex?
                res += f"%lark {{\n{textwrap.indent(target.lark_grammar, '  ').strip()}\n}}"
            elif isinstance(target, SubgrammarNode):
                lark_grammar = LarkSerializer(enforce_max_tokens=self.enforce_max_tokens).serialize(target.body)
                if target.skip_regex:
                    lark_grammar += f"\n%ignore /{target.skip_regex}/"
                res += f"%lark {{\n{textwrap.indent(lark_grammar, '  ').strip()}\n}}"
            elif isinstance(target, GrammarNode):
                if (
                    not isinstance(target, RuleNode)
                    and target.is_allowed_in_lark_terminal
                    and not node.is_allowed_in_lark_terminal
                ):
                    """
                    If the RHS could be written as a terminal, but the presence of attributes on the LHS
                    prevents it, we wrap the RHS in a new rule like so:
                    ```
                    rule[attr]: TERMINAL | TERMINAL | TERMINAL
                    ```
                    gets rewritten as:
                    ```
                    rule[attr]: RULE
                    RULE: TERMINAL | TERMINAL | TERMINAL
                    ```
                    In particular, this lets us ensure that large alternations are handled as single lexemes
                    rather than a choice between multiple lexemes. Keeping the number of individual lexemes
                    to a minimum is important for performance.
                    Indeed, llguidance imposes a limit to maintain performance: see issue #1320
                    """
                    target = RuleNode(
                        name=node.name,
                        value=target,
                    )
                res += self.visit(target.simplify(), top=True)
            else:
                if TYPE_CHECKING:
                    assert_never(target)
                raise TypeError(f"Unknown rule value type: {target}")
            self.rules[name] = res
            return name
        if node.is_null:
            return '""'

        if isinstance(node, LiteralNode):
            return json.dumps(node.value)

        if isinstance(node, SpecialToken):
            return node.format()

        if isinstance(node, RegexNode):
            rx = node.regex
            if rx is None:
                rx = "(?s:.*)"
            return self.regex(rx)

        if isinstance(node, SelectNode):
            if top:
                return "\n     | ".join(self.visit(alt) for alt in node.alternatives)
            else:
                return "(" + " | ".join(self.visit(alt) for alt in node.alternatives) + ")"

        if isinstance(node, JoinNode):
            return " ".join(self.visit(n) for n in node.nodes if not n.is_null)

        if isinstance(node, RepeatNode):
            inner = self.visit(node.node)
 
Download .txt
gitextract_c7vutqob/

├── .git-blame-ignore-revs
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── bug_report.md
│   └── workflows/
│       ├── call_cpu_tests.yml
│       ├── call_gpu_tests.yml
│       ├── ci_credentials.yml
│       ├── ci_docs.yml
│       ├── ci_linux.yml
│       ├── ci_macos.yml
│       ├── ci_windows.yml
│       ├── code_quality.yml
│       ├── notebook_tests.yml
│       ├── pull_request.yml
│       ├── pypi_upload.yml
│       └── widget_build.yml
├── .gitignore
├── CONTRIBUTING.md
├── GOVERNANCE.md
├── LICENSE.md
├── MAINTAINERS.md
├── README.md
├── client/
│   └── graphpaper-inline/
│       ├── .gitignore
│       ├── build-to-guidance.sh
│       ├── dist/
│       │   └── .gitignore
│       ├── package.json
│       ├── postcss.config.js
│       ├── rollup.config.mjs
│       ├── src/
│       │   ├── App.svelte
│       │   ├── CustomAudio.svelte
│       │   ├── CustomVideo.svelte
│       │   ├── MetricRecord.svelte
│       │   ├── ResizeListener.svelte
│       │   ├── Select.svelte
│       │   ├── Sparkline.svelte
│       │   ├── StitchHandler.svelte
│       │   ├── TokenGrid.svelte
│       │   ├── TokenGridItem.svelte
│       │   ├── clickoutside.ts
│       │   ├── interfaces.ts
│       │   ├── longhover.ts
│       │   ├── main.css
│       │   ├── main.js
│       │   ├── metrics.ts
│       │   ├── mocks.ts
│       │   ├── stitch.ts
│       │   └── template.html
│       └── tsconfig.json
├── docs/
│   ├── .readthedocs.yaml
│   ├── Makefile
│   ├── _static/
│   │   └── css/
│   │       └── styles.css
│   ├── api.rst
│   ├── api_examples.rst
│   ├── art_of_prompt_design.rst
│   ├── conf.py
│   ├── index.rst
│   ├── make.bat
│   └── tutorials.rst
├── guidance/
│   ├── __init__.py
│   ├── _ast.py
│   ├── _bg/
│   │   └── __init__.py
│   ├── _grammar.py
│   ├── _guidance.py
│   ├── _guidance.pyi
│   ├── _parser.py
│   ├── _schema.py
│   ├── _tools.py
│   ├── _topics.py
│   ├── _utils.py
│   ├── chat.py
│   ├── debug.py
│   ├── library/
│   │   ├── __init__.py
│   │   ├── _audio.py
│   │   ├── _block.py
│   │   ├── _capture.py
│   │   ├── _ebnf.py
│   │   ├── _gen.py
│   │   ├── _image.py
│   │   ├── _json.py
│   │   ├── _optional.py
│   │   ├── _pydantic.py
│   │   ├── _role.py
│   │   ├── _sequences.py
│   │   ├── _subgrammar.py
│   │   ├── _substring.py
│   │   └── _video.py
│   ├── metrics/
│   │   ├── __init__.py
│   │   └── _metrics.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _azureai.py
│   │   ├── _base/
│   │   │   ├── __init__.py
│   │   │   ├── _interpreter.py
│   │   │   ├── _model.py
│   │   │   └── _state.py
│   │   ├── _byte_tokenizer.py
│   │   ├── _engine/
│   │   │   ├── __init__.py
│   │   │   ├── _engine.py
│   │   │   ├── _interpreter.py
│   │   │   ├── _state.py
│   │   │   └── _tokenizer.py
│   │   ├── _llama_cpp.py
│   │   ├── _mock.py
│   │   ├── _onnxruntime.py
│   │   ├── _openai.py
│   │   ├── _openai_base.py
│   │   ├── _transformers.py
│   │   ├── broken_models/
│   │   │   ├── README.MD
│   │   │   ├── _Gemini.py
│   │   │   ├── _anthropic.py
│   │   │   ├── _azure_openai.py
│   │   │   ├── _azureai_studio.py
│   │   │   ├── _cohere.py
│   │   │   ├── _googleai.py
│   │   │   ├── _lite_llm.py
│   │   │   ├── _togetherai.py
│   │   │   └── _vertexai.py
│   │   └── experimental/
│   │       ├── __init__.py
│   │       ├── _litellm.py
│   │       ├── _sglang.py
│   │       └── _vllm.py
│   ├── py.typed
│   ├── registry/
│   │   ├── __init__.py
│   │   └── _registry.py
│   ├── resources/
│   │   └── graphpaper-inline.html
│   ├── trace/
│   │   ├── __init__.py
│   │   └── _trace.py
│   └── visual/
│       ├── __init__.py
│       ├── _environment.py
│       ├── _exchange.py
│       ├── _jupyter.py
│       ├── _message.py
│       ├── _renderer.py
│       └── _trace.py
├── notebooks/
│   ├── anachronism.ipynb
│   ├── api_examples/
│   │   ├── library/
│   │   │   └── gen.ipynb
│   │   └── models/
│   │       ├── AzureOpenAI.ipynb
│   │       ├── OpenAI.ipynb
│   │       └── TogetherAI.ipynb
│   ├── art_of_prompt_design/
│   │   ├── prompt_boundaries_and_token_healing.ipynb
│   │   ├── rag.ipynb
│   │   ├── react.ipynb
│   │   ├── tool_use.ipynb
│   │   └── use_clear_syntax.ipynb
│   ├── chatgpt_vs_open_source_on_harder_tasks.ipynb
│   ├── engine_chat_completion.ipynb
│   ├── guaranteeing_valid_syntax.ipynb
│   ├── proverb.ipynb
│   ├── testing_lms.ipynb
│   ├── tutorials/
│   │   ├── adding_new_models.ipynb
│   │   ├── chat.ipynb
│   │   ├── code_generation.ipynb
│   │   ├── guidance_acceleration.ipynb
│   │   ├── intro_to_guidance.ipynb
│   │   ├── litellm_models.ipynb
│   │   ├── onnxruntime_models.ipynb
│   │   ├── regex_constraints.ipynb
│   │   ├── token_healing.ipynb
│   │   └── tool_calling.ipynb
│   └── unstable/
│       └── .gitignore
├── packages/
│   └── python/
│       └── stitch/
│           ├── .coveragerc
│           ├── .eslintignore
│           ├── .eslintrc.js
│           ├── .github/
│           │   └── workflows/
│           │       └── build.yml
│           ├── .gitignore
│           ├── .npmignore
│           ├── .prettierignore
│           ├── .prettierrc
│           ├── .yarnrc.yml
│           ├── LICENSE.txt
│           ├── MANIFEST.in
│           ├── README.md
│           ├── babel.config.js
│           ├── codecov.yml
│           ├── css/
│           │   └── widget.css
│           ├── docs/
│           │   ├── Makefile
│           │   ├── environment.yml
│           │   ├── make.bat
│           │   └── source/
│           │       ├── _static/
│           │       │   └── helper.js
│           │       ├── conf.py
│           │       ├── develop-install.rst
│           │       ├── examples/
│           │       │   ├── index.rst
│           │       │   └── introduction.nblink
│           │       ├── index.rst
│           │       ├── installing.rst
│           │       └── introduction.rst
│           ├── examples/
│           │   └── introduction.ipynb
│           ├── install.json
│           ├── jest.config.js
│           ├── package.json
│           ├── pyproject.toml
│           ├── pytest.ini
│           ├── readthedocs.yml
│           ├── setup.py
│           ├── src/
│           │   ├── __tests__/
│           │   │   ├── index.spec.ts
│           │   │   └── utils.ts
│           │   ├── extension.ts
│           │   ├── index.ts
│           │   ├── plugin.ts
│           │   ├── version.ts
│           │   └── widget.ts
│           ├── stitch/
│           │   ├── __init__.py
│           │   ├── _frontend.py
│           │   ├── _version.py
│           │   ├── nbextension/
│           │   │   └── extension.js
│           │   ├── stitch.py
│           │   └── tests/
│           │       ├── __init__.py
│           │       ├── conftest.py
│           │       ├── test_example.py
│           │       └── test_nbextension_path.py
│           ├── stitch.json
│           ├── tsconfig.eslint.json
│           ├── tsconfig.json
│           └── webpack.config.js
├── pyproject.toml
├── ruff.toml
├── scripts/
│   └── extract_python_from_readme.py
└── tests/
    ├── ReadMe.md
    ├── __init__.py
    ├── conftest.py
    ├── model_integration/
    │   ├── __init__.py
    │   ├── library/
    │   │   ├── test_gen.py
    │   │   ├── test_json.py
    │   │   ├── test_subgrammar.py
    │   │   └── test_substring.py
    │   ├── test_engine.py
    │   ├── test_grammar.py
    │   ├── test_model.py
    │   └── test_tokenizers.py
    ├── model_specific/
    │   ├── __init__.py
    │   ├── common_chat_testing.py
    │   ├── llama_cpp_tests/
    │   │   ├── __init__.py
    │   │   ├── test_chat_templates.py
    │   │   ├── test_llama_cpp.py
    │   │   └── test_think.py
    │   ├── test_onnxruntime_genai.py
    │   ├── test_transformers.py
    │   └── test_visual.py
    ├── need_credentials/
    │   ├── __init__.py
    │   ├── test_anthropic.py
    │   ├── test_azureai_openai.py
    │   ├── test_azureai_studio.py
    │   ├── test_chat_templates.py
    │   ├── test_cohere.py
    │   ├── test_googleai.py
    │   ├── test_lite_llm.py
    │   ├── test_openai.py
    │   ├── test_togetherai.py
    │   ├── test_tokenizers.py
    │   └── test_vertexai.py
    ├── notebooks/
    │   ├── __init__.py
    │   ├── nb_test_utils.py
    │   ├── test_notebooks.py
    │   └── test_notebooks_azure.py
    ├── tokenizer_common.py
    ├── unit/
    │   ├── __init__.py
    │   ├── library/
    │   │   ├── __init__.py
    │   │   ├── json/
    │   │   │   ├── __init__.py
    │   │   │   ├── test_allOf.py
    │   │   │   ├── test_json.py
    │   │   │   ├── test_refs.py
    │   │   │   ├── test_string_format.py
    │   │   │   └── utils.py
    │   │   ├── test_block.py
    │   │   ├── test_capture.py
    │   │   ├── test_gen.py
    │   │   ├── test_image.py
    │   │   ├── test_one_or_more.py
    │   │   ├── test_pydantic.py
    │   │   ├── test_regex.py
    │   │   ├── test_sequences.py
    │   │   ├── test_special.py
    │   │   ├── test_subgrammar.py
    │   │   └── test_substring.py
    │   ├── test_ast.py
    │   ├── test_decorator.py
    │   ├── test_grammar.py
    │   ├── test_ll.py
    │   ├── test_model.py
    │   ├── test_parser.py
    │   ├── test_trace.py
    │   └── test_visual.py
    └── utils.py
Download .txt
SYMBOL INDEX (790 symbols across 59 files)

FILE: client/graphpaper-inline/src/clickoutside.ts
  function clickOutside (line 3) | function clickOutside(node: HTMLElement) {

FILE: client/graphpaper-inline/src/interfaces.ts
  type MetricDef (line 5) | interface MetricDef {
  type MetricVal (line 13) | type MetricVal = string | number | Array<number | string>;
  type FlatToken (line 15) | interface FlatToken {
  type TokenCallback (line 27) | type TokenCallback = (token: FlatToken) => string;
  type MediaNodeContext (line 29) | interface MediaNodeContext {
  type MediaType (line 34) | type MediaType = "audio" | "video" | "image";
  type MediaNode (line 36) | interface MediaNode {
  type MultimodalNode (line 43) | type MultimodalNode =

FILE: client/graphpaper-inline/src/longhover.ts
  function longhover (line 3) | function longhover(node: HTMLElement, duration: number) {

FILE: client/graphpaper-inline/src/stitch.ts
  type NodeAttr (line 6) | interface NodeAttr {
  type TextOutput (line 10) | interface TextOutput extends NodeAttr {
  type TokenOutput (line 19) | interface TokenOutput extends TextOutput {
  type Token (line 25) | interface Token {
  type Backtrack (line 32) | interface Backtrack extends NodeAttr {
  type ImageOutput (line 38) | interface ImageOutput extends NodeAttr {
  type AudioOutput (line 45) | interface AudioOutput extends NodeAttr {
  type VideoOutput (line 52) | interface VideoOutput extends NodeAttr {
  type RoleOpenerInput (line 59) | interface RoleOpenerInput extends NodeAttr {
  type RoleCloserInput (line 66) | interface RoleCloserInput extends NodeAttr {
  type GuidanceMessage (line 72) | interface GuidanceMessage {
  type TraceMessage (line 77) | interface TraceMessage extends GuidanceMessage {
  type ResetDisplayMessage (line 84) | interface ResetDisplayMessage extends GuidanceMessage {
  type ExecutionStartedMessage (line 88) | interface ExecutionStartedMessage extends GuidanceMessage {
  type ExecutionCompletedMessage (line 92) | interface ExecutionCompletedMessage extends GuidanceMessage {
  type ClientReadyMessage (line 97) | interface ClientReadyMessage extends GuidanceMessage {
  type ClientReadyAckMessage (line 101) | interface ClientReadyAckMessage extends GuidanceMessage {
  type OutputRequestMessage (line 105) | interface OutputRequestMessage extends GuidanceMessage {
  type OutputRequestAckMessage (line 110) | interface OutputRequestAckMessage extends GuidanceMessage {
  type MetricMessage (line 114) | interface MetricMessage extends GuidanceMessage {
  type StitchMessage (line 121) | interface StitchMessage {
  function isGuidanceMessage (line 126) | function isGuidanceMessage(o: GuidanceMessage | undefined | null): o is ...
  function isTraceMessage (line 131) | function isTraceMessage(o: GuidanceMessage | undefined | null): o is Tra...
  function isBacktrack (line 136) | function isBacktrack(o: NodeAttr | undefined | null): o is Backtrack {
  function isRoleOpenerInput (line 141) | function isRoleOpenerInput(o: NodeAttr | undefined | null): o is RoleOpe...
  function isRoleCloserInput (line 146) | function isRoleCloserInput(o: NodeAttr | undefined | null): o is RoleClo...
  function isTextOutput (line 151) | function isTextOutput(o: NodeAttr | undefined | null): o is TextOutput {
  function isTokenOutput (line 156) | function isTokenOutput(o: NodeAttr | undefined | null): o is TokenOutput {
  function isImageOutput (line 161) | function isImageOutput(o: NodeAttr | undefined | null): o is ImageOutput {
  function isAudioOutput (line 166) | function isAudioOutput(o: NodeAttr | undefined | null): o is AudioOutput {
  function isVideoOutput (line 171) | function isVideoOutput(o: NodeAttr | undefined | null): o is VideoOutput {
  function isResetDisplayMessage (line 176) | function isResetDisplayMessage(o: GuidanceMessage | undefined | null): o...
  function isMetricMessage (line 181) | function isMetricMessage(o: GuidanceMessage | undefined | null): o is Me...
  function isClientReadyAckMessage (line 186) | function isClientReadyAckMessage(o: GuidanceMessage | undefined | null):...
  function isOutputRequestAckMessage (line 191) | function isOutputRequestAckMessage(o: GuidanceMessage | undefined | null...
  function isExecutionCompletedMessage (line 196) | function isExecutionCompletedMessage(o: GuidanceMessage | undefined | nu...
  function isExecutionStartedMessage (line 201) | function isExecutionStartedMessage(o: GuidanceMessage | undefined | null...
  type Status (line 210) | enum Status {

FILE: docs/conf.py
  function setup (line 389) | def setup(app):
  function build_finished (line 395) | def build_finished(app, exception):

FILE: guidance/__init__.py
  class _Guidance (line 23) | class _Guidance(ModuleType):
    method __call__ (line 24) | def __call__(self, f=None, *, stateless=False, cache=None, dedent=True...

FILE: guidance/_ast.py
  function _parse_tags (line 40) | def _parse_tags(s: str) -> Union["GrammarNode", "Function"]:
  class Tagged (line 53) | class Tagged:
    method __str__ (line 54) | def __str__(self):
  class Match (line 66) | class Match:
    method __init__ (line 67) | def __init__(self, captures, log_probs, partial):
    method __getitem__ (line 72) | def __getitem__(self, key):
    method __len__ (line 75) | def __len__(self):
    method __bool__ (line 78) | def __bool__(self):
    method __str__ (line 81) | def __str__(self):
    method __repr__ (line 84) | def __repr__(self):
  class StatefulException (line 88) | class StatefulException(Exception):
  class Function (line 99) | class Function(Tagged):
    method __post_init__ (line 105) | def __post_init__(self):
    method __call__ (line 108) | def __call__(self, model):
    method __add__ (line 116) | def __add__(self, other):
    method __radd__ (line 131) | def __radd__(self, other):
  class ASTNode (line 150) | class ASTNode(ABC):
    method _run (line 152) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
    method simplify (line 155) | def simplify(self) -> "ASTNode":
  class RoleStart (line 160) | class RoleStart(ASTNode):
    method _run (line 163) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class RoleEnd (line 168) | class RoleEnd(ASTNode):
    method _run (line 171) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class ImageBlob (line 176) | class ImageBlob(ASTNode):
    method _run (line 179) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class ImageUrl (line 184) | class ImageUrl(ASTNode):
    method _run (line 187) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class AudioBlob (line 192) | class AudioBlob(ASTNode):
    method _run (line 195) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class GenAudio (line 199) | class GenAudio(ASTNode):
    method __init__ (line 200) | def __init__(self, kwargs: dict[str, Any]):
    method _run (line 203) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class GrammarNode (line 208) | class GrammarNode(Tagged, ASTNode):
    method is_null (line 210) | def is_null(self) -> bool:
    method is_allowed_in_lark_terminal (line 217) | def is_allowed_in_lark_terminal(self) -> bool:
    method is_allowed_in_lark_rule_with_attrs (line 225) | def is_allowed_in_lark_rule_with_attrs(self) -> bool:
    method simplify (line 233) | def simplify(self) -> "GrammarNode":
    method children (line 236) | def children(self) -> Sequence["GrammarNode"]:
    method __add__ (line 239) | def __add__(self, other) -> "GrammarNode":
    method __radd__ (line 257) | def __radd__(self, other) -> "GrammarNode":
    method __getitem__ (line 275) | def __getitem__(self, key):
    method match (line 278) | def match(
    method forced_prefix (line 307) | def forced_prefix(self) -> str:
    method ll_grammar (line 311) | def ll_grammar(self, enforce_max_tokens: bool = True) -> str:
  class LiteralNode (line 317) | class LiteralNode(GrammarNode):
    method is_null (line 321) | def is_null(self) -> bool:
    method _run (line 324) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class SpecialToken (line 329) | class SpecialToken(GrammarNode):
    method __post_init__ (line 334) | def __post_init__(self):
    method format (line 338) | def format(self) -> str:
    method is_allowed_in_lark_terminal (line 348) | def is_allowed_in_lark_terminal(self) -> bool:
    method is_allowed_in_lark_rule_with_attrs (line 352) | def is_allowed_in_lark_rule_with_attrs(self) -> bool:
    method _run (line 355) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class RegexNode (line 361) | class RegexNode(GrammarNode):
    method _run (line 364) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class SelectNode (line 369) | class SelectNode(GrammarNode):
    method is_null (line 373) | def is_null(self) -> bool:
    method simplify (line 376) | def simplify(self) -> "GrammarNode":
    method children (line 389) | def children(self) -> Sequence["GrammarNode"]:
    method _run (line 392) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class JoinNode (line 397) | class JoinNode(GrammarNode):
    method is_null (line 401) | def is_null(self) -> bool:
    method simplify (line 404) | def simplify(self) -> "GrammarNode":
    method children (line 412) | def children(self) -> Sequence["GrammarNode"]:
    method _run (line 415) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class RepeatNode (line 420) | class RepeatNode(GrammarNode):
    method is_null (line 426) | def is_null(self) -> bool:
    method __post_init__ (line 429) | def __post_init__(self):
    method children (line 435) | def children(self) -> Sequence["GrammarNode"]:
    method simplify (line 438) | def simplify(self) -> GrammarNode:
    method _run (line 441) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class SubstringNode (line 446) | class SubstringNode(GrammarNode):
    method is_allowed_in_lark_terminal (line 450) | def is_allowed_in_lark_terminal(self) -> bool:
    method _run (line 454) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class RuleNode (line 464) | class RuleNode(GrammarNode):
    method __post_init__ (line 476) | def __post_init__(self) -> None:
    method is_allowed_in_lark_terminal (line 489) | def is_allowed_in_lark_terminal(self) -> bool:
    method children (line 501) | def children(self) -> tuple[GrammarNode]:
    method _run (line 504) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class RuleRefNode (line 509) | class RuleRefNode(GrammarNode):
    method set_target (line 512) | def set_target(self, target: RuleNode) -> None:
    method is_allowed_in_lark_terminal (line 519) | def is_allowed_in_lark_terminal(self) -> bool:
    method _run (line 524) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class BaseSubgrammarNode (line 531) | class BaseSubgrammarNode(GrammarNode):
    method is_allowed_in_lark_terminal (line 533) | def is_allowed_in_lark_terminal(self) -> bool:
    method is_allowed_in_lark_rule_with_attrs (line 537) | def is_allowed_in_lark_rule_with_attrs(self) -> bool:
  class SubgrammarNode (line 544) | class SubgrammarNode(BaseSubgrammarNode):
    method _run (line 548) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class LLGJsonCompileOptions (line 552) | class LLGJsonCompileOptions(TypedDict):
  class JsonNode (line 568) | class JsonNode(BaseSubgrammarNode):
    method _run (line 572) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
    method _llguidance_json (line 576) | def _llguidance_json(self) -> dict[str, Any]:
    method _llguidance_validate (line 592) | def _llguidance_validate(self) -> None:
  class LarkNode (line 605) | class LarkNode(BaseSubgrammarNode):
    method _run (line 608) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class ToolCallNode (line 613) | class ToolCallNode(ASTNode):
    method from_tools (line 620) | def from_tools(
    method __post_init__ (line 645) | def __post_init__(self):
    method _run (line 649) | def _run(self, interpreter: "Interpreter[S]", **kwargs) -> Iterator[Ou...
  class LarkSerializer (line 653) | class LarkSerializer:
    method __init__ (line 654) | def __init__(self, enforce_max_tokens: bool = True):
    method serialize (line 659) | def serialize(self, node: GrammarNode) -> str:
    method visit (line 682) | def visit(self, node: GrammarNode, top=False) -> str:
    method normalize_name (line 815) | def normalize_name(self, name: str, terminal: bool) -> str:
    method regex (line 825) | def regex(self, pattern: str) -> str:

FILE: guidance/_bg/__init__.py
  function _start_asyncio_loop (line 15) | def _start_asyncio_loop(loop: AbstractEventLoop):
  function _asyncio_background_thread (line 20) | def _asyncio_background_thread() -> tuple[threading.Thread, AbstractEven...
  class BackgroundAsync (line 27) | class BackgroundAsync:
    method __init__ (line 30) | def __init__(self):
    method _thread_and_loop (line 35) | def _thread_and_loop(self) -> tuple[threading.Thread, AbstractEventLoop]:
    method call_soon_threadsafe (line 41) | def call_soon_threadsafe(self, cb, *args, context=None):
    method run_async_coroutine (line 47) | def run_async_coroutine(self, coroutine: Coroutine[Any, Any, T]) -> Fu...
    method async_task (line 61) | async def async_task(coroutine: Coroutine[Any, Any, T]) -> Task[T]:
    method print_all_tasks (line 74) | async def print_all_tasks():  # pragma: no cover

FILE: guidance/_grammar.py
  function string (line 19) | def string(s: str) -> LiteralNode:
  function regex (line 23) | def regex(pattern: str) -> RegexNode:
  function gen (line 27) | def gen(
  function select (line 61) | def select(
  function repeat (line 117) | def repeat(value: str | int | float | GrammarNode, min: int, max: int | ...
  function token_limit (line 141) | def token_limit(value: GrammarNode, max_tokens: int) -> RuleNode:
  function with_temperature (line 156) | def with_temperature(value: GrammarNode, temperature: float) -> RuleNode:
  function capture (line 175) | def capture(value: GrammarNode, name: str, list_append: bool = False) ->...
  function subgrammar (line 182) | def subgrammar(
  function special_token (line 201) | def special_token(token: str) -> SpecialToken:
  function quote_regex (line 211) | def quote_regex(value: str) -> str:

FILE: guidance/_guidance.py
  function guidance (line 17) | def guidance(
  class GuidanceFunction (line 44) | class GuidanceFunction:
    method __init__ (line 45) | def __init__(
    method __call__ (line 65) | def __call__(self, *args, **kwargs):
    method __get__ (line 68) | def __get__(self, instance, owner=None, /):
    method __repr__ (line 76) | def __repr__(self):
  class GuidanceMethod (line 80) | class GuidanceMethod:
    method __init__ (line 83) | def __init__(self, impl, instance):
    method from_guidance_function (line 95) | def from_guidance_function(cls, guidance_function: GuidanceFunction, i...
    method __call__ (line 119) | def __call__(self, *args, **kwargs):
    method __repr__ (line 122) | def __repr__(self):
  function _decorator (line 129) | def _decorator(f, *, stateless, cache, model):

FILE: guidance/_guidance.pyi
  function guidance (line 30) | def guidance(
  function guidance (line 39) | def guidance(
  function guidance (line 48) | def guidance(
  function guidance (line 57) | def guidance(
  function guidance (line 66) | def guidance(
  function guidance (line 75) | def guidance(

FILE: guidance/_parser.py
  class TokenParserException (line 16) | class TokenParserException(Exception):
  class InvalidTokenException (line 20) | class InvalidTokenException(TokenParserException):
    method __init__ (line 21) | def __init__(self, token: int, valid_tokens: list[int]):
  class TokenParser (line 27) | class TokenParser:
    method __init__ (line 28) | def __init__(
    method is_accepting (line 49) | def is_accepting(self) -> bool:
    method done (line 52) | def done(self) -> bool:
    method advance (line 55) | def advance(
    method has_pending_stop (line 67) | def has_pending_stop(self) -> bool:
    method process_prompt (line 70) | def process_prompt(
    method compute_mask (line 113) | def compute_mask(self) -> tuple[bytes | None, LLInterpreterResponse, f...
    method _parse (line 119) | def _parse(
    method cleanup (line 171) | def cleanup(self):
  class ByteParserException (line 189) | class ByteParserException(Exception):
    method __init__ (line 190) | def __init__(self, *args, **kwargs):
  class ByteParser (line 197) | class ByteParser:
    method __init__ (line 198) | def __init__(
    method matched (line 215) | def matched(self) -> bool:
    method valid_next_bytes (line 220) | def valid_next_bytes(self) -> set[bytes]:
    method next_byte_mask (line 227) | def next_byte_mask(self) -> NDArray[np.uint8]:
    method _advance (line 233) | def _advance(self, token_id: int | None) -> None:
    method consume_bytes (line 264) | def consume_bytes(self, bts: bytes) -> None:
    method force_done (line 311) | def force_done(self):
    method get_captures (line 321) | def get_captures(self):
    method _update_capture (line 324) | def _update_capture(self, response: LegacyEngineCallResponse):

FILE: guidance/_schema.py
  class TokenUsage (line 9) | class TokenUsage(BaseModel):
    method output_tokens (line 48) | def output_tokens(self) -> NonNegativeInt:
    method token_savings (line 57) | def token_savings(self) -> Annotated[float, Ge(0), Le(1)] | None:
    method avg_latency_ms (line 67) | def avg_latency_ms(self) -> float:
    method __add__ (line 73) | def __add__(self, other: "TokenUsage") -> "TokenUsage":
  class EngineResponse (line 93) | class EngineResponse(BaseModel):
  class LegacyEngineCallResponse (line 103) | class LegacyEngineCallResponse(BaseModel):
  class GenToken (line 118) | class GenToken(BaseModel):
  class GenTokenExtra (line 130) | class GenTokenExtra(GenToken):
  class GenData (line 134) | class GenData(BaseModel):
    method valid_next_tokens (line 141) | def valid_next_tokens(self) -> list[int]:
  class LLProgressCapture (line 145) | class LLProgressCapture(BaseModel):
    method strip_list_append_prefix (line 153) | def strip_list_append_prefix(cls, values):
  class LLProgressText (line 162) | class LLProgressText(BaseModel):
  class LLProgressFinalText (line 170) | class LLProgressFinalText(BaseModel):
  class LLProgress (line 181) | class LLProgress(RootModel):
    method to_engine_call_response (line 184) | def to_engine_call_response(self) -> LegacyEngineCallResponse:
  class LLInterpreterResponse (line 227) | class LLInterpreterResponse(BaseModel):
  class SamplingParams (line 233) | class SamplingParams(TypedDict):
  class StepContext (line 240) | class StepContext(TypedDict):
  class StepFeedback (line 249) | class StepFeedback(TypedDict, total=False):
  class StepConfig (line 255) | class StepConfig(TypedDict, total=False):

FILE: guidance/_tools.py
  class GrammarFormat (line 14) | class GrammarFormat(BaseModel):
  class CustomTool (line 24) | class CustomTool(BaseModel):
  class FunctionTool (line 29) | class FunctionTool(BaseModel):
    method from_callable (line 34) | def from_callable(cls, callable: Callable) -> "FunctionTool":
    method get_schema (line 56) | def get_schema(self) -> dict[str, Any]:
    method serialize_parameters (line 65) | def serialize_parameters(self, parameters: builtins.type[BaseModel] | ...
  class Tool (line 77) | class Tool(BaseModel):
    method call (line 84) | def call(self, *args, **kwargs) -> Any:
    method from_callable (line 96) | def from_callable(
    method from_regex (line 117) | def from_regex(
    method from_lark (line 138) | def from_lark(
    method from_grammar (line 159) | def from_grammar(
    method to_openai_style (line 176) | def to_openai_style(self) -> dict[str, Any]:
    method with_name (line 205) | def with_name(self, name: str) -> "Tool":

FILE: guidance/_utils.py
  function bytes_from (line 26) | def bytes_from(src: str | pathlib.Path | bytes, allow_local: bool) -> by...
  class _Rewrite (line 47) | class _Rewrite(ast.NodeTransformer):
    method __init__ (line 48) | def __init__(self, source_lines):
    method visit_JoinedStr (line 52) | def visit_JoinedStr(self, node):
    method visit_Constant (line 58) | def visit_Constant(self, node):
    method _dedent_constant (line 63) | def _dedent_constant(self, node, lineno):
  class normalize_notebook_stdout_stderr (line 78) | class normalize_notebook_stdout_stderr:
    method __enter__ (line 84) | def __enter__(self):
    method __exit__ (line 97) | def __exit__(self, exc_type, exc_value, traceback):
  function strip_multiline_string_indents (line 104) | def strip_multiline_string_indents(f):
  function make_weak_bound_method (line 155) | def make_weak_bound_method(f, instance):
  function signature_pop (line 172) | def signature_pop(signature, index):
  class JupyterComm (line 178) | class JupyterComm:
    method __init__ (line 179) | def __init__(self, target_id, ipython_handle, callback=None, on_open=N...
    method clear_send_queue (line 214) | def clear_send_queue(self):
    method _fire_callback (line 219) | def _fire_callback(self, msg):
    method send (line 222) | def send(self, data):
    method _send_loop (line 225) | async def _send_loop(self):
  function is_interactive (line 247) | def is_interactive():
  function log_softmax (line 253) | def log_softmax(array: np.ndarray, axis: int = -1) -> np.ndarray:
  function softmax (line 269) | def softmax(array: np.ndarray, axis: int = -1) -> np.ndarray:
  function pydantic_no_default_repr (line 276) | def pydantic_no_default_repr(obj: pydantic.BaseModel, target_fields=None):
  function pydantic_no_default_str (line 293) | def pydantic_no_default_str(obj: pydantic.BaseModel, target_fields=None):
  function log_init (line 310) | def log_init(s: str):
  function log_copy (line 315) | def log_copy(s: str):
  function log_cleanup (line 320) | def log_cleanup(s: str):
  function to_utf8_or_bytes_string (line 325) | def to_utf8_or_bytes_string(_bytes: bytes) -> str:
  function apply_repetition_penalty (line 347) | def apply_repetition_penalty(input_ids: list[int], logits: np.ndarray, s...
  function apply_top_k_only (line 373) | def apply_top_k_only(logits: np.ndarray, k: int) -> np.ndarray:
  function apply_min_p_filter (line 382) | def apply_min_p_filter(logits: np.ndarray, sampling_params: Optional["Sa...
  function apply_top_k_and_top_p_filter (line 399) | def apply_top_k_and_top_p_filter(logits: np.ndarray, sampling_params: Op...

FILE: guidance/chat.py
  class ChatTemplate (line 5) | class ChatTemplate:
    method get_role_start (line 8) | def get_role_start(self, role_name: str, **kwargs):
    method get_role_end (line 11) | def get_role_end(self, role_name: str | None = None):
  class ChatTemplateCache (line 15) | class ChatTemplateCache:
    method __init__ (line 16) | def __init__(self) -> None:
    method __getitem__ (line 19) | def __getitem__(self, key: str) -> ChatTemplate:
    method __setitem__ (line 23) | def __setitem__(self, key: str, value):
    method __contains__ (line 27) | def __contains__(self, key: str):
  class UnsupportedRoleException (line 37) | class UnsupportedRoleException(Exception):
    method __init__ (line 38) | def __init__(self, role_name, instance):
    method _format_message (line 43) | def _format_message(self):
  function load_template_class (line 47) | def load_template_class(chat_template=None):
  function _template_class_from_string (line 83) | def _template_class_from_string(template_str):
  class ChatMLTemplate (line 98) | class ChatMLTemplate(ChatTemplate):
    method get_role_start (line 101) | def get_role_start(self, role_name):
    method get_role_end (line 104) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Llama2ChatTemplate (line 118) | class Llama2ChatTemplate(ChatTemplate):
    method get_role_start (line 122) | def get_role_start(self, role_name):
    method get_role_end (line 132) | def get_role_end(self, role_name=None):
  class Llama3ChatTemplate (line 153) | class Llama3ChatTemplate(ChatTemplate):
    method get_role_start (line 157) | def get_role_start(self, role_name):
    method get_role_end (line 167) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Phi3MiniChatTemplate (line 181) | class Phi3MiniChatTemplate(ChatTemplate):
    method get_role_start (line 185) | def get_role_start(self, role_name):
    method get_role_end (line 195) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Phi3SmallMediumChatTemplate (line 211) | class Phi3SmallMediumChatTemplate(ChatTemplate):
    method get_role_start (line 215) | def get_role_start(self, role_name):
    method get_role_end (line 223) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Phi4MiniChatTemplate (line 238) | class Phi4MiniChatTemplate(ChatTemplate):
    method get_role_start (line 242) | def get_role_start(self, role_name):
    method get_role_end (line 245) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Mistral7BInstructChatTemplate (line 259) | class Mistral7BInstructChatTemplate(ChatTemplate):
    method get_role_start (line 263) | def get_role_start(self, role_name):
    method get_role_end (line 273) | def get_role_end(self, role_name=None):
  class Gemma29BInstructChatTemplate (line 292) | class Gemma29BInstructChatTemplate(ChatTemplate):
    method get_role_start (line 296) | def get_role_start(self, role_name):
    method get_role_end (line 306) | def get_role_end(self, role_name=None):
  class Qwen2dot5ChatTemplate (line 327) | class Qwen2dot5ChatTemplate(ChatTemplate):
    method get_role_start (line 331) | def get_role_start(self, role_name):
    method get_role_end (line 337) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Qwen3ChatTemplate (line 354) | class Qwen3ChatTemplate(ChatTemplate):
    method get_role_start (line 358) | def get_role_start(self, role_name):
    method get_role_end (line 364) | def get_role_end(self, role_name=None):  # noqa ARG002
  class Llama3dot2ChatTemplate (line 381) | class Llama3dot2ChatTemplate(ChatTemplate):
    method get_role_start (line 385) | def get_role_start(self, role_name):
    method get_role_end (line 396) | def get_role_end(self, role_name=None):  # noqa ARG002

FILE: guidance/debug.py
  function enable_widget_debug (line 11) | def enable_widget_debug() -> None:
  function dump_widget_debug (line 36) | def dump_widget_debug() -> str | None:
  function clear_widget_debug (line 64) | def clear_widget_debug() -> None:
  function widget_debug_info (line 90) | def widget_debug_info() -> None:

FILE: guidance/library/_audio.py
  function audio (line 10) | def audio(lm, src: str | pathlib.Path | bytes, allow_local: bool = True):
  function gen_audio (line 18) | def gen_audio(lm, **kwargs):

FILE: guidance/library/_block.py
  class Block (line 8) | class Block:
    method __init__ (line 9) | def __init__(self, name: str | None, opener: str | Function | ASTNode,...
  function block (line 16) | def block(name=None, opener=None, closer=None):

FILE: guidance/library/_capture.py
  function capture (line 8) | def capture(lm, value, name):

FILE: guidance/library/_ebnf.py
  function lark (line 7) | def lark(
  function gbnf_to_lark (line 31) | def gbnf_to_lark(gbnf_grammar: str) -> str:

FILE: guidance/library/_gen.py
  function gen (line 13) | def gen(
  function regex (line 155) | def regex(lm, pattern, *, name=None):

FILE: guidance/library/_image.py
  function image (line 13) | def image(lm, src: str | pathlib.Path | bytes, allow_local: bool = True):
  function gen_image (line 24) | def gen_image(lm):

FILE: guidance/library/_json.py
  function json (line 13) | def json(

FILE: guidance/library/_optional.py
  function optional (line 6) | def optional(lm, value):

FILE: guidance/library/_pydantic.py
  class GenerateJsonSchemaSafe (line 7) | class GenerateJsonSchemaSafe(pydantic.json_schema.GenerateJsonSchema):
    method generate_inner (line 18) | def generate_inner(self, schema):
  function pydantic_to_json_schema (line 26) | def pydantic_to_json_schema(schema: Union[type["pydantic.BaseModel"], "p...

FILE: guidance/library/_role.py
  function role (line 8) | def role(role: str) -> AbstractContextManager:
  function system (line 16) | def system() -> AbstractContextManager:
  function user (line 32) | def user() -> AbstractContextManager:
  function assistant (line 48) | def assistant() -> AbstractContextManager:

FILE: guidance/library/_sequences.py
  function exactly_n_repeats (line 6) | def exactly_n_repeats(model, value, n_repeats: int):
  function at_most_n_repeats (line 11) | def at_most_n_repeats(model, value, n_repeats: int):
  function sequence (line 16) | def sequence(model, value, min_length: int = 0, max_length: int | None =...
  function one_or_more (line 22) | def one_or_more(model, value):
  function zero_or_more (line 27) | def zero_or_more(model, value):

FILE: guidance/library/_subgrammar.py
  function as_regular_grammar (line 7) | def as_regular_grammar(node: GrammarNode, lexeme=False):
  function lexeme (line 17) | def lexeme(body_regex: str, json_string: bool = False):

FILE: guidance/library/_substring.py
  function chunk_on_word (line 7) | def chunk_on_word(text: str) -> list[str]:
  function substring (line 11) | def substring(

FILE: guidance/library/_video.py
  function video (line 13) | def video(lm, src: str | pathlib.Path | bytes, allow_local: bool = True):
  function gen_video (line 23) | def gen_video(lm):

FILE: guidance/metrics/_metrics.py
  class PeriodicMetricsGenerator (line 19) | class PeriodicMetricsGenerator:
    method __init__ (line 20) | def __init__(self, monitor: "Monitor", sleep_sec=0.5):
    method start (line 27) | def start(self):
    method stop (line 33) | def stop(self):
    method pause (line 38) | def pause(self):
    method resume (line 46) | def resume(self):
    method _emit (line 54) | async def _emit(self):
  class MonitoringMetric (line 100) | class MonitoringMetric(str, Enum):
  class Monitor (line 108) | class Monitor:
    method __init__ (line 111) | def __init__(self, interval_ms: int = 1000, **kwargs):
    method _monitor_fn (line 126) | async def _monitor_fn(self):
    method start (line 181) | def start(self):
    method stop (line 189) | def stop(self):
    method reset (line 198) | def reset(self):
    method get_metrics (line 203) | def get_metrics(
    method get_metric (line 217) | def get_metric(self, metric: MonitoringMetric) -> Any:
  function emit_usage (line 221) | def emit_usage(usage: TokenUsage) -> None:

FILE: guidance/models/_azureai.py
  class AzureOpenAIInterpreter (line 30) | class AzureOpenAIInterpreter(OpenAIRuleMixin, OpenAIJSONMixin, OpenAIReg...
    method __init__ (line 33) | def __init__(
  class AzureOpenAIAudioInterpreter (line 64) | class AzureOpenAIAudioInterpreter(OpenAIAudioMixin, AzureOpenAIInterpret...
  class AzureOpenAIImageInterpreter (line 70) | class AzureOpenAIImageInterpreter(OpenAIImageMixin, AzureOpenAIInterpret...
  function create_azure_openai_model (line 76) | def create_azure_openai_model(
  class AzureAIClientWrapper (line 158) | class AzureAIClientWrapper(BaseOpenAIClientWrapper):
    method __init__ (line 159) | def __init__(self, client: "azure.ai.inference.ChatCompletionsClient"):
    method streaming_chat_completions (line 162) | def streaming_chat_completions(
  class AzureInferenceInterpreter (line 188) | class AzureInferenceInterpreter(OpenAIRuleMixin, OpenAIJSONMixin, OpenAI...
    method __init__ (line 189) | def __init__(
    method json (line 209) | def json(self, node: JsonNode, **kwargs) -> Iterator[OutputAttr]:
  function create_azure_aifoundry_model (line 220) | def create_azure_aifoundry_model(

FILE: guidance/models/_base/_interpreter.py
  class Interpreter (line 32) | class Interpreter(Generic[S]):
    method __init__ (line 33) | def __init__(self, state: S):
    method run (line 36) | def run(self, node: ASTNode, **kwargs) -> Iterator[OutputAttr]:
    method _role_start (line 39) | def _role_start(self, node: RoleStart, **kwargs) -> Iterator[OutputAttr]:
    method role_start (line 44) | def role_start(self, node: RoleStart, **kwargs) -> Iterator[OutputAttr...
    method _role_end (line 47) | def _role_end(self, node: RoleEnd, **kwargs) -> Iterator[OutputAttr]:
    method role_end (line 54) | def role_end(self, node: RoleEnd, **kwargs) -> Iterator[OutputAttr]:  ...
    method text (line 57) | def text(self, node: LiteralNode, **kwargs) -> Iterator[OutputAttr]:  ...
    method image_blob (line 60) | def image_blob(self, node: ImageBlob, **kwargs) -> Iterator[OutputAttr...
    method image_url (line 63) | def image_url(self, node: ImageUrl, **kwargs) -> Iterator[OutputAttr]:
    method grammar (line 67) | def grammar(self, node: GrammarNode, **kwargs) -> Iterator[OutputAttr]...
    method regex (line 70) | def regex(self, node: RegexNode, **kwargs) -> Iterator[OutputAttr]:
    method select (line 73) | def select(self, node: SelectNode, **kwargs) -> Iterator[OutputAttr]:
    method join (line 76) | def join(self, node: JoinNode, **kwargs) -> Iterator[OutputAttr]:
    method repeat (line 79) | def repeat(self, node: RepeatNode, **kwargs) -> Iterator[OutputAttr]:
    method substring (line 82) | def substring(self, node: SubstringNode, **kwargs) -> Iterator[OutputA...
    method rule (line 85) | def rule(self, node: RuleNode, **kwargs) -> Iterator[OutputAttr]:
    method subgrammar (line 88) | def subgrammar(self, node: SubgrammarNode, **kwargs) -> Iterator[Outpu...
    method json (line 91) | def json(self, node: JsonNode, **kwargs) -> Iterator[OutputAttr]:
    method lark (line 94) | def lark(self, node: LarkNode, **kwargs) -> Iterator[OutputAttr]:
    method audio_blob (line 97) | def audio_blob(self, node: AudioBlob, **kwargs) -> Iterator[OutputAttr...
    method gen_audio (line 100) | def gen_audio(self, node: GenAudio, **kwargs) -> Iterator[OutputAttr]:...
    method tool_call (line 103) | def tool_call(self, node: ToolCallNode, **kwargs) -> Iterator[OutputAt...
  class UnsupportedNodeError (line 107) | class UnsupportedNodeError(ValueError):
    method __init__ (line 108) | def __init__(self, interpreter: Interpreter, node: ASTNode):

FILE: guidance/models/_base/_model.py
  function _gen_id (line 45) | def _gen_id():
  class Model (line 58) | class Model:
    method __init__ (line 59) | def __init__(
    method _update_trace_node (line 81) | def _update_trace_node(
    method __add__ (line 100) | def __add__(self, other: str | Function | ASTNode) -> Self:
    method _apply_node (line 115) | def _apply_node(self, node: ASTNode) -> Self:
    method _send_to_event_queue (line 152) | def _send_to_event_queue(self) -> None:
    method stream (line 157) | def stream(self) -> "ModelStream":
    method _apply_blocks (line 161) | def _apply_blocks(self) -> Self:
    method _update_open_block_captures (line 192) | def _update_open_block_captures(self) -> Self:
    method copy (line 199) | def copy(self) -> Self:
    method __str__ (line 212) | def __str__(self) -> str:
    method __len__ (line 215) | def __len__(self):
    method __setitem__ (line 218) | def __setitem__(self, key, value):
    method __getitem__ (line 223) | def __getitem__(self, key: str) -> Any:
    method __contains__ (line 233) | def __contains__(self, key: str) -> bool:
    method get (line 236) | def get(self, key: str, default: D | None = None) -> str | list[str] |...
    method set (line 251) | def set(self, key: str, value: str | list[str]) -> Self:
    method remove (line 268) | def remove(self, key: str) -> Self:
    method log_prob (line 280) | def log_prob(self, key: str, default: D | None = None) -> float | list...
    method with_sampling_params (line 299) | def with_sampling_params(self, sampling_params: SamplingParams) -> Self:
    method with_step_config (line 305) | def with_step_config(self, step_config: StepConfig) -> Self:
    method __getattribute__ (line 315) | def __getattribute__(self, name):
    method _get_usage (line 321) | def _get_usage(self) -> TokenUsage:
    method _reset_usage (line 326) | def _reset_usage(self) -> None:
  class ModelStream (line 330) | class ModelStream:
    method __init__ (line 331) | def __init__(
    method __add__ (line 345) | def __add__(self, grammar: str | ASTNode) -> Self:
    method _inner_run (line 352) | def _inner_run(self, model):
    method __iter__ (line 361) | def __iter__(self) -> Iterator[Model]:

FILE: guidance/models/_base/_state.py
  class CaptureVar (line 9) | class CaptureVar(TypedDict):
  class State (line 14) | class State(ABC):
    method __init__ (line 15) | def __init__(self, token_usage: TokenUsage | None = None) -> None:
    method add_usage (line 20) | def add_usage(self, usage: TokenUsage) -> None:
    method get_usage (line 26) | def get_usage(self) -> TokenUsage:
    method reset_usage (line 30) | def reset_usage(self) -> None:
    method __str__ (line 35) | def __str__(self) -> str:
    method apply_capture (line 38) | def apply_capture(

FILE: guidance/models/_byte_tokenizer.py
  class ByteTokenizer (line 7) | class ByteTokenizer(Tokenizer):
    method __init__ (line 8) | def __init__(self, chat_template=None):
    method encode (line 28) | def encode(self, byte_string: bytes, *, parse_special: bool = True) ->...

FILE: guidance/models/_engine/_engine.py
  class LogitsOutput (line 32) | class LogitsOutput(TypedDict):
  class Engine (line 38) | class Engine(ABC):
    method __init__ (line 47) | def __init__(
    method enable_backtrack (line 78) | def enable_backtrack(self):
    method enable_ff_tokens (line 82) | def enable_ff_tokens(self):
    method enable_monitoring (line 86) | def enable_monitoring(self):
    method get_chat_template (line 89) | def get_chat_template(
    method __call__ (line 94) | def __call__(
    method get_next_token_with_top_k (line 542) | def get_next_token_with_top_k(
    method chat_completion_streaming (line 659) | def chat_completion_streaming(
    method chat_completion (line 691) | def chat_completion(
    method get_logits (line 710) | def get_logits(self, token_ids: list[int], include_all_uncached_tokens...
  function get_top_k (line 723) | def get_top_k(_probs: NDArray, _k: int = 5) -> list[int]:
  function apply_temp_and_sampling_params (line 732) | def apply_temp_and_sampling_params(

FILE: guidance/models/_engine/_interpreter.py
  class EngineInterpreter (line 16) | class EngineInterpreter(Interpreter[EngineState]):
    method __init__ (line 17) | def __init__(self, engine: Engine):
    method __deepcopy__ (line 23) | def __deepcopy__(self, memo):
    method get_role_start (line 36) | def get_role_start(self, role: str) -> str:
    method get_role_end (line 41) | def get_role_end(self, role: str) -> str:
    method role_start (line 46) | def role_start(self, node: RoleStart, **kwargs) -> Iterator[OutputAttr]:
    method role_end (line 54) | def role_end(self, node: RoleEnd, **kwargs) -> Iterator[OutputAttr]:
    method text (line 62) | def text(self, node: LiteralNode, **kwargs) -> Iterator[OutputAttr]:
    method grammar (line 65) | def grammar(self, node: GrammarNode, **kwargs) -> Iterator[OutputAttr]:
    method tool_call (line 154) | def tool_call(self, node: ToolCallNode, **kwargs) -> Iterator[OutputAt...
  class Llama3VisionInterpreter (line 158) | class Llama3VisionInterpreter(EngineInterpreter):
    method image_blob (line 159) | def image_blob(self, node: ImageBlob, **kwargs) -> Iterator[OutputAttr]:
  class Phi3VisionInterpreter (line 175) | class Phi3VisionInterpreter(EngineInterpreter):
    method image_blob (line 176) | def image_blob(self, node: ImageBlob, **kwargs) -> Iterator[OutputAttr]:
  function partial_decode (line 197) | def partial_decode(data: bytes) -> tuple[str, bytes]:
  function recode_special_tokens (line 209) | def recode_special_tokens(tokenizer: Tokenizer, data: bytes) -> bytes:
  function text_to_grammar (line 214) | def text_to_grammar(tokenizer: Tokenizer, text: str) -> GrammarNode:

FILE: guidance/models/_engine/_state.py
  class EngineState (line 7) | class EngineState(State):
    method __init__ (line 8) | def __init__(self) -> None:
    method __str__ (line 17) | def __str__(self) -> str:

FILE: guidance/models/_engine/_tokenizer.py
  class TokenizerWrappable (line 11) | class TokenizerWrappable:
    method __call__ (line 18) | def __call__(self, byte_string: bytes) -> list[int]:
    method as_ll_tokenizer (line 21) | def as_ll_tokenizer(self) -> "llguidance.LLTokenizer":
  class Tokenizer (line 26) | class Tokenizer:
    method __init__ (line 33) | def __init__(
    method is_special_token (line 45) | def is_special_token(self, token_id: int) -> bool:
    method bos_token_id (line 50) | def bos_token_id(self) -> int | None:
    method eos_token_id (line 56) | def eos_token_id(self) -> int:
    method bos_token (line 60) | def bos_token(self) -> bytes | None:
    method eos_token (line 66) | def eos_token(self) -> bytes:
    method chat_template (line 70) | def chat_template(self) -> Any | None:
    method __call__ (line 73) | def __call__(self, byte_string: bytes):
    method encode (line 76) | def encode(self, byte_string: bytes, *, parse_special: bool = True) ->...
    method decode (line 80) | def decode(self, tokens: Sequence[int]) -> bytes:
    method recode (line 84) | def recode(self, tokens: Sequence[int]) -> list[int]:

FILE: guidance/models/_llama_cpp.py
  function set_shutdown_flag (line 35) | def set_shutdown_flag():
  class _LlamaBatchContext (line 44) | class _LlamaBatchContext:
    method __init__ (line 45) | def __init__(self, n_batch, n_ctx):
    method __del__ (line 51) | def __del__(self):
  class LlamaCppTokenizer (line 61) | class LlamaCppTokenizer(Tokenizer):
    method __init__ (line 62) | def __init__(self, model_obj: "Llama", chat_template: str | ChatTempla...
  class LlamaCppEngine (line 78) | class LlamaCppEngine(Engine):
    method __init__ (line 81) | def __init__(
    method get_logits (line 153) | def get_logits(self, token_ids: list[int], include_all_uncached_tokens...
  class LlamaCpp (line 239) | class LlamaCpp(Model):
    method __init__ (line 240) | def __init__(

FILE: guidance/models/_mock.py
  class MockTokenizer (line 14) | class MockTokenizer(Tokenizer):
    method __init__ (line 15) | def __init__(self, tokens: Sequence[bytes], special_token_ids: list[in...
    method encode (line 34) | def encode(self, byte_string: bytes, *, parse_special: bool = True) ->...
    method recode (line 61) | def recode(self, tokens: Sequence[int]) -> list[int]:
  class MockEngine (line 66) | class MockEngine(Engine):
    method __init__ (line 67) | def __init__(self, tokenizer, byte_patterns, force):
    method get_next_token_with_top_k (line 94) | def get_next_token_with_top_k(
    method get_logits (line 117) | def get_logits(self, token_ids: list[int], include_all_uncached_tokens...
    method _get_next_tokens (line 150) | def _get_next_tokens(self, byte_string):
  class Mock (line 165) | class Mock(Model):
    method __init__ (line 166) | def __init__(
  class ByteTrie (line 192) | class ByteTrie:
    method __init__ (line 195) | def __init__(self, byte_strings=None, values=None, parent=None):
    method keys (line 212) | def keys(self):
    method has_child (line 215) | def has_child(self, byte):
    method child (line 218) | def child(self, byte):
    method parent (line 221) | def parent(self):
    method size (line 224) | def size(self):
    method __len__ (line 227) | def __len__(self):
    method insert (line 230) | def insert(self, s, value, pos=0):
    method compute_probs (line 240) | def compute_probs(self, probs):

FILE: guidance/models/_onnxruntime.py
  class OnnxRuntimeGenAIEngine (line 28) | class OnnxRuntimeGenAIEngine(Engine):
    method __init__ (line 31) | def __init__(
    method get_logits (line 90) | def get_logits(self, token_ids: list[int], include_all_uncached_tokens...
  class OnnxRuntimeGenAI (line 148) | class OnnxRuntimeGenAI(Model):
    method __init__ (line 149) | def __init__(

FILE: guidance/models/_openai.py
  class OpenAIInterpreter (line 15) | class OpenAIInterpreter(OpenAIRuleMixin, OpenAIJSONMixin, OpenAIRegexMix...
    method __init__ (line 16) | def __init__(
  class OpenAI (line 34) | class OpenAI(Model):
    method __init__ (line 35) | def __init__(

FILE: guidance/models/_openai_base.py
  function get_role_start (line 38) | def get_role_start(role: str) -> str:
  function get_role_end (line 43) | def get_role_end(role: str) -> str:
  class AssistantAudio (line 48) | class AssistantAudio(BaseModel):
  class AssistantAudioMessage (line 55) | class AssistantAudioMessage(BaseModel):
  class TextContent (line 60) | class TextContent(BaseModel):
  class InputAudio (line 65) | class InputAudio(BaseModel):
  class AudioContent (line 70) | class AudioContent(BaseModel):
  class ImageUrlContentInner (line 75) | class ImageUrlContentInner(BaseModel):
  class ImageUrlContent (line 79) | class ImageUrlContent(BaseModel):
  class ContentMessage (line 87) | class ContentMessage(BaseModel):
  class Function (line 92) | class Function(BaseModel):
  class Custom (line 97) | class Custom(BaseModel):
  class FunctionCall (line 102) | class FunctionCall(BaseModel):
  class CustomCall (line 108) | class CustomCall(BaseModel):
  class ToolCallMessage (line 117) | class ToolCallMessage(BaseModel):
  class ToolCallResult (line 122) | class ToolCallResult(BaseModel):
  class OpenAIState (line 131) | class OpenAIState(State):
    method __init__ (line 132) | def __init__(self) -> None:
    method apply_text (line 138) | def apply_text(self, text: str) -> None:
    method get_active_message (line 144) | def get_active_message(self) -> Message | None:
    method __str__ (line 168) | def __str__(self) -> str:
  class BaseOpenAIClientWrapper (line 215) | class BaseOpenAIClientWrapper(ABC):
    method streaming_chat_completions (line 217) | def streaming_chat_completions(
  class OpenAIClientWrapper (line 228) | class OpenAIClientWrapper(BaseOpenAIClientWrapper):
    method __init__ (line 229) | def __init__(self, client: "openai.OpenAI"):
    method streaming_chat_completions (line 232) | def streaming_chat_completions(
  class BaseOpenAIInterpreter (line 251) | class BaseOpenAIInterpreter(Interpreter[OpenAIState]):
    method __init__ (line 258) | def __init__(self, model: str, client: BaseOpenAIClientWrapper, *, rea...
    method run (line 268) | def run(self, node: ASTNode, **kwargs) -> Iterator[OutputAttr]:
    method role_start (line 273) | def role_start(self, node: RoleStart, **kwargs) -> Iterator[OutputAttr]:
    method role_end (line 281) | def role_end(self, node: RoleEnd, **kwargs) -> Iterator[OutputAttr]:
    method text (line 290) | def text(self, node: LiteralNode, **kwargs) -> Iterator[OutputAttr]:
    method _run (line 294) | def _run(self, tools: dict[str, Tool] | None = None, **kwargs) -> Iter...
    method _handle_stream (line 336) | def _handle_stream(
    method tool_call (line 537) | def tool_call(self, node: ToolCallNode, **kwargs) -> Iterator[OutputAt...
    method __deepcopy__ (line 545) | def __deepcopy__(self, memo):
  class OpenAIRuleMixin (line 559) | class OpenAIRuleMixin(BaseOpenAIInterpreter):
    method rule (line 560) | def rule(self, node: RuleNode, **kwargs) -> Iterator[OutputAttr]:
  class OpenAIRegexMixin (line 593) | class OpenAIRegexMixin(BaseOpenAIInterpreter):
    method regex (line 594) | def regex(self, node: RegexNode, **kwargs) -> Iterator[OutputAttr]:
  class OpenAIJSONMixin (line 601) | class OpenAIJSONMixin(BaseOpenAIInterpreter):
    method json (line 602) | def json(self, node: JsonNode, **kwargs) -> Iterator[OutputAttr]:
  class OpenAIImageMixin (line 620) | class OpenAIImageMixin(BaseOpenAIInterpreter):
    method image_blob (line 621) | def image_blob(self, node: ImageBlob, **kwargs) -> Iterator[OutputAttr]:
    method image_url (line 646) | def image_url(self, node: ImageUrl, **kwargs) -> Iterator[OutputAttr]:
  class OpenAIAudioMixin (line 652) | class OpenAIAudioMixin(BaseOpenAIInterpreter):
    method audio_blob (line 656) | def audio_blob(self, node: AudioBlob, **kwargs) -> Iterator[OutputAttr]:
    method gen_audio (line 669) | def gen_audio(self, node: GenAudio, **kwargs) -> Iterator[OutputAttr]:

FILE: guidance/models/_transformers.py
  class ByteDecoderError (line 49) | class ByteDecoderError(Exception):
  class ByteTokensError (line 53) | class ByteTokensError(Exception):
  class TransformersTokenizer (line 57) | class TransformersTokenizer(Tokenizer):
    method __init__ (line 58) | def __init__(
    method from_pretrained (line 95) | def from_pretrained(
    method recode (line 110) | def recode(self, tokens: list[int]) -> list[int]:
    method _byte_tokens (line 139) | def _byte_tokens(
    method _byte_tokens_from_byte_decoder (line 179) | def _byte_tokens_from_byte_decoder(
    method _byte_tokens_from_sp_model (line 194) | def _byte_tokens_from_sp_model(
    method _byte_tokens_by_encoding_token_strings (line 217) | def _byte_tokens_by_encoding_token_strings(
    method _fallback_byte_decoder (line 255) | def _fallback_byte_decoder(cls) -> dict[str, int]:
    method _check_byte_decoder (line 270) | def _check_byte_decoder(
    method _bytes_to_unicode (line 328) | def _bytes_to_unicode(cls):
  class TransformersEngine (line 345) | class TransformersEngine(Engine):
    method __init__ (line 346) | def __init__(
    method _model (line 433) | def _model(self, model, **kwargs) -> "PreTrainedModel":
    method get_logits (line 444) | def get_logits(self, token_ids: list[int], include_all_uncached_tokens...
  class Transformers (line 581) | class Transformers(Model):
    method __init__ (line 582) | def __init__(

FILE: guidance/models/broken_models/_vertexai.py
  class VertexAIEngine (line 14) | class VertexAIEngine(GrammarlessEngine):
    method __init__ (line 15) | def __init__(self, tokenizer, max_streaming_tokens, timeout, compute_l...
  class VertexAI (line 20) | class VertexAI(Grammarless):
    method __init__ (line 21) | def __init__(
  class VertexAICompletion (line 113) | class VertexAICompletion(VertexAI):
  class VertexAICompletionEngine (line 117) | class VertexAICompletionEngine(VertexAIEngine):
    method _generator (line 118) | def _generator(self, prompt, temperature):
  class VertexAIInstruct (line 139) | class VertexAIInstruct(VertexAI, Instruct):
    method get_role_start (line 140) | def get_role_start(self, name):
    method get_role_end (line 143) | def get_role_end(self, name):
  class VertexAIInstructEngine (line 150) | class VertexAIInstructEngine(VertexAIEngine):
    method _generator (line 151) | def _generator(self, prompt, temperature):
  class VertexAIChat (line 169) | class VertexAIChat(VertexAI, Chat):
  class VertexAIChatEngine (line 173) | class VertexAIChatEngine(VertexAIEngine):
    method _generator (line 174) | def _generator(self, prompt, temperature):
    method _start_generator (line 247) | def _start_generator(self, system_text, messages, temperature):

FILE: guidance/models/experimental/_litellm.py
  class LiteLLMOpenAIClientWrapper (line 15) | class LiteLLMOpenAIClientWrapper(BaseOpenAIClientWrapper):
    method __init__ (line 16) | def __init__(self, router):
    method _wrapped_completion (line 20) | def _wrapped_completion(
    method streaming_chat_completions (line 46) | def streaming_chat_completions(
  class LiteLLMInterpreter (line 62) | class LiteLLMInterpreter(BaseOpenAIInterpreter):
    method __init__ (line 75) | def __init__(self, model_description: dict, **kwargs):
    method _check_model (line 95) | def _check_model(self, model_desc: dict) -> str:
    method rule (line 107) | def rule(self, node: RuleNode, **kwargs) -> Iterator[OutputAttr]:
    method regex (line 147) | def regex(self, node: RegexNode, **kwargs) -> Iterator[OutputAttr]:
    method _regex_vllm (line 158) | def _regex_vllm(self, node: RegexNode, **kwargs):
    method json (line 171) | def json(self, node: JsonNode, **kwargs) -> Iterator[OutputAttr]:
    method grammar (line 202) | def grammar(self, node: GrammarNode, **kwargs) -> Iterator[OutputAttr]:
    method _grammar_vllm (line 209) | def _grammar_vllm(self, node: GrammarNode, **kwargs) -> Iterator[Outpu...
    method _process_kwargs (line 242) | def _process_kwargs(self, **kwargs):
  class LiteLLM (line 265) | class LiteLLM(Model):
    method __init__ (line 266) | def __init__(

FILE: guidance/models/experimental/_sglang.py
  class SglangInterpreter (line 11) | class SglangInterpreter(BaseOpenAIInterpreter):
    method __init__ (line 12) | def __init__(
    method rule (line 29) | def rule(self, node: RuleNode, **kwargs) -> Iterator[OutputAttr]:
    method regex (line 64) | def regex(self, node: RegexNode, **kwargs) -> Iterator[OutputAttr]:
    method json (line 78) | def json(self, node: JsonNode, **kwargs) -> Iterator[OutputAttr]:
    method grammar (line 99) | def grammar(self, node: GrammarNode, **kwargs) -> Iterator[OutputAttr]:
    method _process_kwargs (line 131) | def _process_kwargs(self, **kwargs):
  class SglangModel (line 158) | class SglangModel(Model):
    method __init__ (line 159) | def __init__(self, model: str, sampling_params: SamplingParams | None ...

FILE: guidance/models/experimental/_vllm.py
  class VLLMInterpreter (line 11) | class VLLMInterpreter(BaseOpenAIInterpreter):
    method __init__ (line 12) | def __init__(
    method grammar (line 29) | def grammar(self, node: GrammarNode, **kwargs) -> Iterator[OutputAttr]:
    method _process_kwargs (line 70) | def _process_kwargs(self, **kwargs):
  class VLLMModel (line 96) | class VLLMModel(Model):
    method __init__ (line 97) | def __init__(self, model: str, sampling_params: SamplingParams | None ...

FILE: guidance/registry/_registry.py
  function get_monitor (line 27) | def get_monitor() -> Monitor:
  function get_bg_async (line 41) | def get_bg_async() -> BackgroundAsync:
  function get_exchange (line 51) | def get_exchange() -> TopicExchange:
  function get_trace_handler (line 61) | def get_trace_handler() -> TraceHandler:
  function get_renderer (line 71) | def get_renderer() -> Renderer:
  function set_renderer (line 82) | def set_renderer(renderer: Renderer) -> None:

FILE: guidance/trace/_trace.py
  class NodeAttr (line 15) | class NodeAttr(BaseModel):
    method __init_subclass__ (line 20) | def __init_subclass__(cls, **kwargs):
    method class_name (line 26) | def class_name(self) -> str:
    method validate_class_name (line 31) | def validate_class_name(cls, data):
    method as_discriminated_union (line 38) | def as_discriminated_union(cls) -> type["NodeAttr"]:
    method __repr__ (line 46) | def __repr__(self):
    method __str__ (line 49) | def __str__(self):
  class InputAttr (line 53) | class InputAttr(NodeAttr):
  class OutputAttr (line 59) | class OutputAttr(NodeAttr):
  class StatelessGuidanceInput (line 65) | class StatelessGuidanceInput(InputAttr):
    method __repr__ (line 71) | def __repr__(self):
  class StatefulGuidanceInput (line 75) | class StatefulGuidanceInput(InputAttr):
    method __repr__ (line 81) | def __repr__(self):
  class LiteralInput (line 85) | class LiteralInput(InputAttr):
  class ImageInput (line 91) | class ImageInput(InputAttr):
  class AudioInput (line 98) | class AudioInput(InputAttr):
  class VideoInput (line 105) | class VideoInput(InputAttr):
  class EmbeddedInput (line 112) | class EmbeddedInput(InputAttr):
  class RoleOpenerInput (line 118) | class RoleOpenerInput(InputAttr):
  class RoleCloserInput (line 129) | class RoleCloserInput(InputAttr):
  class AudioOutput (line 139) | class AudioOutput(OutputAttr):
  class VideoOutput (line 147) | class VideoOutput(OutputAttr):
  class ImageOutput (line 155) | class ImageOutput(OutputAttr):
  class TextOutput (line 163) | class TextOutput(OutputAttr):
    method __str__ (line 172) | def __str__(self):
  class Token (line 176) | class Token(BaseModel):
  class TokenOutput (line 183) | class TokenOutput(TextOutput):
  class Backtrack (line 188) | class Backtrack(OutputAttr):
  class CaptureOutput (line 193) | class CaptureOutput(OutputAttr):
    method __str__ (line 204) | def __str__(self):
  class WeakRefList (line 208) | class WeakRefList(list):
    method append (line 214) | def append(self, item):
    method __getitem__ (line 217) | def __getitem__(self, index):
    method __iter__ (line 224) | def __iter__(self):
    method remove (line 227) | def remove(self, item):
  function _cleanup (line 238) | def _cleanup(log_msg: str):
  class TraceNode (line 242) | class TraceNode(BaseModel):
    method __init__ (line 251) | def __init__(self, **kwargs):
    method add_child (line 255) | def add_child(self, child: "TraceNode") -> None:
    method remove_child (line 264) | def remove_child(self, child: "TraceNode") -> None:
    method ancestors (line 273) | def ancestors(self) -> Generator["TraceNode", None, None]:
    method path (line 284) | def path(self) -> Generator["TraceNode", None, None]:
    method root (line 293) | def root(self) -> "TraceNode":
    method traverse (line 305) | def traverse(self, bfs: bool = True):
    method __repr__ (line 325) | def __repr__(self):
    method __hash__ (line 328) | def __hash__(self):
  class TraceHandler (line 332) | class TraceHandler(BaseModel):
    method __init__ (line 347) | def __init__(self, **kwargs):
    method __getitem__ (line 351) | def __getitem__(self, item):
    method __hash__ (line 354) | def __hash__(self):
    method update_node (line 357) | def update_node(self, identifier: int, parent_id: int | None, node_att...
    method root (line 397) | def root(self) -> TraceNode:

FILE: guidance/visual/_environment.py
  class EnvFlags (line 14) | class EnvFlags(BaseModel):
  class Environment (line 21) | class Environment:
    method __init__ (line 24) | def __init__(self):
    method detected_envs (line 42) | def detected_envs(self) -> list[str]:
    method is_notebook (line 50) | def is_notebook(self) -> bool:
    method is_cloud (line 58) | def is_cloud(self) -> bool:
    method is_terminal (line 66) | def is_terminal(self) -> bool:
  function _detect_vscode (line 75) | def _detect_vscode(flags: EnvFlags) -> bool:
  function _detect_ipython (line 89) | def _detect_ipython(flags: EnvFlags) -> bool:
  function _detect_ipython_zmq (line 110) | def _detect_ipython_zmq(flags: EnvFlags) -> bool:

FILE: guidance/visual/_exchange.py
  class TopicExchange (line 16) | class TopicExchange:
    method __init__ (line 23) | def __init__(self):
    method subscribe (line 27) | def subscribe(self, callback: Callable[[GuidanceMessage], None], topic...
    method unsubscribe (line 38) | def unsubscribe(self, callback: Callable[[GuidanceMessage], None], top...
    method publish (line 56) | def publish(self, message: GuidanceMessage, topic: str = DEFAULT_TOPIC):

FILE: guidance/visual/_jupyter.py
  function ipy_handle_event_once (line 17) | def ipy_handle_event_once(cb: IPythonCallback, event_name: str) -> tuple...

FILE: guidance/visual/_message.py
  class GuidanceMessage (line 14) | class GuidanceMessage(BaseModel):
    method __init_subclass__ (line 21) | def __init_subclass__(cls, **kwargs):
    method class_name (line 27) | def class_name(self) -> str:
    method validate_class_name (line 32) | def validate_class_name(cls, data):
    method as_discriminated_union (line 39) | def as_discriminated_union(cls) -> type["GuidanceMessage"]:
  class TraceMessage (line 48) | class TraceMessage(GuidanceMessage):
  class MetricMessage (line 56) | class MetricMessage(GuidanceMessage):
  class ExecutionStartedMessage (line 64) | class ExecutionStartedMessage(GuidanceMessage):
  class ExecutionCompletedMessage (line 68) | class ExecutionCompletedMessage(GuidanceMessage):
  class ResetDisplayMessage (line 78) | class ResetDisplayMessage(GuidanceMessage):
  class ClientReadyMessage (line 84) | class ClientReadyMessage(GuidanceMessage):
  class ClientReadyAckMessage (line 90) | class ClientReadyAckMessage(GuidanceMessage):
  class OutputRequestMessage (line 96) | class OutputRequestMessage(GuidanceMessage):
  class OutputRequestAckMessage (line 102) | class OutputRequestAckMessage(GuidanceMessage):
  function serialize_message (line 108) | def serialize_message(message: GuidanceMessage) -> str:
  function deserialize_message (line 120) | def deserialize_message(data: str) -> GuidanceMessage:

FILE: guidance/visual/_renderer.py
  class Renderer (line 68) | class Renderer:
    method __init__ (line 71) | def __init__(self):
    method update (line 74) | def update(self, message: GuidanceMessage, topic: str = DEFAULT_TOPIC)...
  function _get_src_doc_template (line 85) | def _get_src_doc_template() -> str:
  function _create_stitch_widget (line 96) | def _create_stitch_widget() -> "StitchWidget":
  function _put_nowait_queue (line 108) | def _put_nowait_queue(queue: Queue, val: object) -> None:
  function _cleanup (line 114) | def _cleanup(recv_queue: Queue | None, send_queue: Queue | None, log_msg...
  function _create_queue (line 126) | async def _create_queue() -> Queue:
  function _on_stitch_clientmsg (line 131) | def _on_stitch_clientmsg(recv_queue_weakref: ReferenceType["Queue"], ide...
  function _on_cell_completion (line 138) | def _on_cell_completion(renderer_weakref: ReferenceType["JupyterWidgetRe...
  function _handle_recv_messages (line 157) | async def _handle_recv_messages(
  function _handle_send_messages (line 199) | async def _handle_send_messages(
  function _trace_path_to_messages (line 241) | def _trace_path_to_messages(trace_id: int) -> list["TraceMessage"]:
  class JupyterWidgetRenderer (line 276) | class JupyterWidgetRenderer(Renderer):
    method __init__ (line 279) | def __init__(self, trace_handler: TraceHandler) -> None:
    method _on_exchange (line 318) | def _on_exchange(self, message: GuidanceMessage) -> None:
    method has_divergence (line 329) | def has_divergence(self, message: GuidanceMessage) -> tuple[bool, int]:
    method _replay (line 398) | def _replay(self, message: OutputRequestMessage, topic=DEFAULT_TOPIC) ...
    method update (line 419) | def update(self, message: GuidanceMessage, topic=DEFAULT_TOPIC) -> None:
    method enable_debug (line 513) | def enable_debug(self) -> None:
    method clear_debug_data (line 519) | def clear_debug_data(self) -> None:
    method get_debug_data (line 524) | def get_debug_data(self) -> str | None:
  class DoNothingRenderer (line 570) | class DoNothingRenderer(Renderer):
    method __init__ (line 573) | def __init__(self, trace_handler: TraceHandler) -> None:
    method update (line 582) | def update(self, message: GuidanceMessage, topic: str = DEFAULT_TOPIC)...
  class AutoRenderer (line 586) | class AutoRenderer(Renderer):
    method __init__ (line 589) | def __init__(self, trace_handler: TraceHandler):
    method update (line 615) | def update(self, message: GuidanceMessage, topic: str = DEFAULT_TOPIC)...
    method renderer_type (line 618) | def renderer_type(self) -> type:

FILE: guidance/visual/_trace.py
  function trace_node_to_html (line 17) | def trace_node_to_html(node: TraceNode, prettify_roles=False) -> str:
  function trace_node_to_str (line 111) | def trace_node_to_str(node: TraceNode) -> str:
  function display_trace_tree (line 129) | def display_trace_tree(trace_handler: TraceHandler) -> None:
Copy disabled (too large) Download .json
Condensed preview — 280 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (20,302K chars).
[
  {
    "path": ".git-blame-ignore-revs",
    "chars": 395,
    "preview": "# .git-blame-ignore-revs\n# Ran black on major files to standardize codebase\n57da386795bc94a34275b333da586f171f96d7c8\n# R"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "chars": 550,
    "preview": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**The bug**\nA cl"
  },
  {
    "path": ".github/workflows/call_cpu_tests.yml",
    "chars": 2823,
    "preview": "name: call_cpu_tests\n\non:\n  workflow_call:\n    inputs:\n      os:\n        required: true\n        type: string\n      pytho"
  },
  {
    "path": ".github/workflows/call_gpu_tests.yml",
    "chars": 3160,
    "preview": "name: call_gpu_tests\n\non:\n  workflow_call:\n    inputs:\n      os:\n        required: true\n        type: string\n      pytho"
  },
  {
    "path": ".github/workflows/ci_credentials.yml",
    "chars": 2276,
    "preview": "# These access secrets, so should only be run on local branches.\n\nname: CI Tests - Credentialed\npermissions:\n  contents:"
  },
  {
    "path": ".github/workflows/ci_docs.yml",
    "chars": 1406,
    "preview": "# These access secrets, so should only be run on local branches.\n\nname: CI Tests - Docs\npermissions:\n  contents: read\n\n\n"
  },
  {
    "path": ".github/workflows/ci_linux.yml",
    "chars": 2758,
    "preview": "# CI Tests which run on Linux machines\n\n# These access secrets, so should only be run on local branches.\n\n# Ideally, the"
  },
  {
    "path": ".github/workflows/ci_macos.yml",
    "chars": 1272,
    "preview": "# CI Tests which run on MacOS machines\n\n# These access secrets, so should only be run on local branches.\n\n# Ideally, the"
  },
  {
    "path": ".github/workflows/ci_windows.yml",
    "chars": 1987,
    "preview": "# CI Tests which run on Windows machines\n\n# These access secrets, so should only be run on local branches.\n\n# Ideally, t"
  },
  {
    "path": ".github/workflows/code_quality.yml",
    "chars": 4526,
    "preview": "name: Code Quality\n\nenv:\n  PYTHON_VERSION: \"3.12\"\n\non:\n  pull_request:\n  workflow_dispatch:\n    inputs:\n      commit_id:"
  },
  {
    "path": ".github/workflows/notebook_tests.yml",
    "chars": 2112,
    "preview": "# These access secrets, so should only be run on local branches.\n\n# Not part of the regular CI run, since notebook tests"
  },
  {
    "path": ".github/workflows/pull_request.yml",
    "chars": 2571,
    "preview": "name: Pull Request\n\non:\n  pull_request:\n  workflow_dispatch:\n    inputs:\n      commit_id:\n        description: 'Branch o"
  },
  {
    "path": ".github/workflows/pypi_upload.yml",
    "chars": 3765,
    "preview": "name: Build wheels\n\non:\n  release:\n    types: [published]\n  workflow_dispatch:  # Enable manual run\n    inputs:\n      co"
  },
  {
    "path": ".github/workflows/widget_build.yml",
    "chars": 933,
    "preview": "name: Widget Build\npermissions:\n  contents: read\n\non:\n  pull_request:\n  workflow_dispatch:\n    inputs:\n      commit_id:\n"
  },
  {
    "path": ".gitignore",
    "chars": 394,
    "preview": "notebooks/local_scratch\n__pycache__/\n.vscode\n.vs\n.idea/\n/build\n/dist\n*.egg-info\n*.diskcache\n.ipynb_checkpoints\nnode_modu"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 4024,
    "preview": "# Contributing\n\nThis Project welcomes contributions, suggestions, and feedback. All contributions, suggestions, and feed"
  },
  {
    "path": "GOVERNANCE.md",
    "chars": 3670,
    "preview": "# Governance Policy\n\nThis document provides the governance policy for the Project. Maintainers agree to this policy and "
  },
  {
    "path": "LICENSE.md",
    "chars": 1077,
    "preview": "MIT License\n\nCopyright (c) The Guidance Contributors\n\nPermission is hereby granted, free of charge, to any person obtain"
  },
  {
    "path": "MAINTAINERS.md",
    "chars": 1243,
    "preview": "# Maintainers\n\nThis document lists the Maintainers of the Project. Maintainers may be added once approved by the existin"
  },
  {
    "path": "README.md",
    "chars": 12617,
    "preview": "<div align=\"right\">\n  <a href=\"https://discord.gg/cjPfAK43dz\"><img src=\"https://img.shields.io/badge/Discord-Join%20Us-5"
  },
  {
    "path": "client/graphpaper-inline/.gitignore",
    "chars": 63,
    "preview": "node_modules/\nbuild/\n.DS_Store\ntest-results/\nplaywright-report/"
  },
  {
    "path": "client/graphpaper-inline/build-to-guidance.sh",
    "chars": 101,
    "preview": "#!/bin/bash\nset -x\n\nnpm run build\ncp dist/index.html ../../guidance/resources/graphpaper-inline.html\n"
  },
  {
    "path": "client/graphpaper-inline/dist/.gitignore",
    "chars": 13,
    "preview": "*\n!.gitignore"
  },
  {
    "path": "client/graphpaper-inline/package.json",
    "chars": 1409,
    "preview": "{\n    \"name\": \"graphpaper\",\n    \"version\": \"0.0.1\",\n    \"scripts\": {\n        \"build\": \"rollup -c\",\n        \"dev\": \"rollu"
  },
  {
    "path": "client/graphpaper-inline/postcss.config.js",
    "chars": 142,
    "preview": "module.exports = {\n    plugins: {\n        '@tailwindcss/postcss': {},\n        autoprefixer: {},\n        cssnano: { prese"
  },
  {
    "path": "client/graphpaper-inline/rollup.config.mjs",
    "chars": 1464,
    "preview": "import svelte from 'rollup-plugin-svelte';\nimport { sveltePreprocess } from 'svelte-preprocess';\nimport resolve from '@r"
  },
  {
    "path": "client/graphpaper-inline/src/App.svelte",
    "chars": 9391,
    "preview": "<!-- Main app that handles token rendering and metrics.\n\nThis has bidirectional communication between the guidance serve"
  },
  {
    "path": "client/graphpaper-inline/src/CustomAudio.svelte",
    "chars": 13506,
    "preview": "<script lang=\"ts\">\n  import { onMount } from \"svelte\";\n  import type { MediaNode } from \"./interfaces\";\n\n  // Add proper"
  },
  {
    "path": "client/graphpaper-inline/src/CustomVideo.svelte",
    "chars": 1376,
    "preview": "<script lang=\"ts\">\n  import \"video.js/dist/video-js.css\";\n  import videojs from \"video.js\";\n  import { onMount, onDestro"
  },
  {
    "path": "client/graphpaper-inline/src/MetricRecord.svelte",
    "chars": 1785,
    "preview": "<!-- Each metric is displayed as a card. -->\n<script lang=\"ts\">\n  import { type MetricDef, type MetricVal } from './inte"
  },
  {
    "path": "client/graphpaper-inline/src/ResizeListener.svelte",
    "chars": 1085,
    "preview": "<!-- Handles resizing of content, especially important for jupyter notebooks. -->\n<script lang=\"ts\">\n    import { onMoun"
  },
  {
    "path": "client/graphpaper-inline/src/Select.svelte",
    "chars": 1983,
    "preview": "<!-- Custom select dropdown -->\n<script lang=\"ts\">\n    import { clickOutside } from \"./clickoutside\";\n    import { creat"
  },
  {
    "path": "client/graphpaper-inline/src/Sparkline.svelte",
    "chars": 1073,
    "preview": "<!-- Sparkline for tracking distributions or values over time scaled from 0 to 1. -->\n\n<script lang=\"ts\">\n  import { sca"
  },
  {
    "path": "client/graphpaper-inline/src/StitchHandler.svelte",
    "chars": 1701,
    "preview": "<!-- Handles stitch client and kernel messages. -->\n<script lang=\"ts\">\n    import type { Unsubscriber } from 'svelte/sto"
  },
  {
    "path": "client/graphpaper-inline/src/TokenGrid.svelte",
    "chars": 24322,
    "preview": "<!-- Token grid that exposes each token and hover info. -->\n<script lang=\"ts\">\n  import {\n    isRoleOpenerInput,\n    isR"
  },
  {
    "path": "client/graphpaper-inline/src/TokenGridItem.svelte",
    "chars": 2166,
    "preview": "<!-- Token(s) within token grid -->\n<script lang=\"ts\">\n    import {type FlatToken} from \"./interfaces\";\n\n    export let "
  },
  {
    "path": "client/graphpaper-inline/src/clickoutside.ts",
    "chars": 428,
    "preview": "// Action for clicking outside an element.\n\nexport function clickOutside(node: HTMLElement) {\n\tconst handleClick = (even"
  },
  {
    "path": "client/graphpaper-inline/src/interfaces.ts",
    "chars": 1052,
    "preview": "// Interfaces used within the client. This is separate to messaging interfaces.\n\nimport type { RoleOpenerInput} from \"./"
  },
  {
    "path": "client/graphpaper-inline/src/longhover.ts",
    "chars": 869,
    "preview": "// Action for long mouse hovers.\n\nexport function longhover(node: HTMLElement, duration: number) {\n    let timer: any;\n\n"
  },
  {
    "path": "client/graphpaper-inline/src/main.css",
    "chars": 473,
    "preview": "/* Custom CSS for web app. */\n@import \"tailwindcss\";\n@variant dark (&:where(.dark, .dark *));\n\n/* Note - Tailwind v4 use"
  },
  {
    "path": "client/graphpaper-inline/src/main.js",
    "chars": 131,
    "preview": "// Entrypoint for web app.\n\nimport App from './App.svelte';\n\nconst app = new App({\n\ttarget: document.body,\n});\n\nexport d"
  },
  {
    "path": "client/graphpaper-inline/src/metrics.ts",
    "chars": 1517,
    "preview": "// Metrics and their definitions.\n\nimport type { MetricDef } from './interfaces';\n\nexport const metricDefs: Record<strin"
  },
  {
    "path": "client/graphpaper-inline/src/mocks.ts",
    "chars": 6217,
    "preview": "// Mocks for interactive testing\n\nimport {type TextOutput, type RoleOpenerInput, type RoleCloserInput } from './stitch';"
  },
  {
    "path": "client/graphpaper-inline/src/stitch.ts",
    "chars": 6348,
    "preview": "// Interfaces for working with guidance messages and stitch.\n\nimport { writable } from 'svelte/store';\n\n\nexport interfac"
  },
  {
    "path": "client/graphpaper-inline/src/template.html",
    "chars": 423,
    "preview": "<!doctype html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"utf-8\">\n\n    <link rel=\"preconnect\" href=\"https://fonts.googl"
  },
  {
    "path": "client/graphpaper-inline/tsconfig.json",
    "chars": 321,
    "preview": "{\n\t\"compilerOptions\": {\n\t\t\"allowJs\": true,\n\t\t\"checkJs\": true,\n\t\t\"esModuleInterop\": true,\n\t\t\"forceConsistentCasingInFileN"
  },
  {
    "path": "docs/.readthedocs.yaml",
    "chars": 1169,
    "preview": "# version: 2\n\n# Read the Docs configuration file for Sphinx projects\n# See https://docs.readthedocs.io/en/stable/config-"
  },
  {
    "path": "docs/Makefile",
    "chars": 7613,
    "preview": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD "
  },
  {
    "path": "docs/_static/css/styles.css",
    "chars": 1245,
    "preview": ".wy-side-nav-search > a img.logo, .wy-side-nav-search .wy-dropdown > a img.logo {\n    width: 250px;\n    margin-top: 20px"
  },
  {
    "path": "docs/api.rst",
    "chars": 851,
    "preview": ".. currentmodule:: guidance\n\nAPI Reference\n=============\nThis page contains the API reference for public objects and fun"
  },
  {
    "path": "docs/api_examples.rst",
    "chars": 743,
    "preview": ".. currentmodule:: guidance\n\n.. _api_examples:\n\nAPI Examples\n------------\n\nThese examples parallel the namespace structu"
  },
  {
    "path": "docs/art_of_prompt_design.rst",
    "chars": 731,
    "preview": ".. currentmodule:: guidance\n\n.. _art_of_prompt_design:\n\nThe Art of Prompt Design\n------------------------\n\nThese noteboo"
  },
  {
    "path": "docs/conf.py",
    "chars": 11397,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n#\n# Guidance documentation build configuration file, created by\n# sphinx-"
  },
  {
    "path": "docs/index.rst",
    "chars": 690,
    "preview": "\n.. image:: figures/guidance_logo_blue.svg\n   :width: 300px\n   :align: center\n|\n\n**Guidance** enables you to control mod"
  },
  {
    "path": "docs/make.bat",
    "chars": 7454,
    "preview": "@ECHO OFF\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-build\n)\nset BUI"
  },
  {
    "path": "docs/tutorials.rst",
    "chars": 624,
    "preview": ".. currentmodule:: guidance\n\n.. _tutorials:\n\nTutorials\n----------------\n\nThese notebooks demonstrate various features of"
  },
  {
    "path": "guidance/__init__.py",
    "chars": 657,
    "preview": "__version__ = \"0.3.2\"\n\nimport sys\nfrom types import ModuleType\n\nfrom . import library, models\nfrom ._guidance import gui"
  },
  {
    "path": "guidance/_ast.py",
    "chars": 27223,
    "preview": "import copy\nimport json\nimport re\nimport textwrap\nimport warnings\nfrom abc import ABC, abstractmethod\nfrom dataclasses i"
  },
  {
    "path": "guidance/_bg/__init__.py",
    "chars": 2285,
    "preview": "\"\"\"Background thread for asyncio handling.\n\nThis is currently being used for messaging, visualization and metrics.\n\"\"\"\n\n"
  },
  {
    "path": "guidance/_grammar.py",
    "chars": 7021,
    "preview": "import dataclasses\nimport re\nfrom typing import Sequence\n\nfrom ._ast import (\n    Function,\n    GrammarNode,\n    Literal"
  },
  {
    "path": "guidance/_guidance.py",
    "chars": 7494,
    "preview": "import dataclasses\nimport functools\nimport inspect\nimport threading\nimport weakref\nfrom contextvars import ContextVar\nfr"
  },
  {
    "path": "guidance/_guidance.pyi",
    "chars": 2086,
    "preview": "import sys\nfrom contextvars import ContextVar\nfrom typing import (\n    Any,\n    Callable,\n    Literal,\n    TypeVar,\n    "
  },
  {
    "path": "guidance/_parser.py",
    "chars": 13777,
    "preview": "import os\nimport time\nfrom concurrent.futures import Future, ThreadPoolExecutor\nfrom typing import TYPE_CHECKING, Any, G"
  },
  {
    "path": "guidance/_schema.py",
    "chars": 9273,
    "preview": "from functools import cached_property\nfrom typing import Any, Callable, Literal, Set, TypedDict\n\nfrom annotated_types im"
  },
  {
    "path": "guidance/_tools.py",
    "chars": 7052,
    "preview": "import builtins\nimport inspect\nimport textwrap\nimport traceback\nfrom types import TracebackType\nfrom typing import TYPE_"
  },
  {
    "path": "guidance/_topics.py",
    "chars": 493,
    "preview": "\"\"\"Exchange topic constants for guidance message routing.\n\nThis module centralizes all topic constants used throughout t"
  },
  {
    "path": "guidance/_utils.py",
    "chars": 15223,
    "preview": "import ast\nimport asyncio\nimport functools\nimport http\nimport inspect\nimport json\nimport logging\nimport pathlib\nimport r"
  },
  {
    "path": "guidance/chat.py",
    "chars": 38243,
    "preview": "import inspect\nimport warnings\n\n\nclass ChatTemplate:\n    \"\"\"Contains template for all chat and instruct tuned models.\"\"\""
  },
  {
    "path": "guidance/debug.py",
    "chars": 5421,
    "preview": "\"\"\"Debug utilities for the guidance widget.\"\"\"\n\nimport logging\n\nfrom .registry import get_renderer\nfrom .visual._rendere"
  },
  {
    "path": "guidance/library/__init__.py",
    "chars": 1162,
    "preview": "# import functions that can be called directly\n# core grammar functions\nfrom .._grammar import select, special_token, st"
  },
  {
    "path": "guidance/library/_audio.py",
    "chars": 460,
    "preview": "import base64\nimport pathlib\n\nfrom .._ast import AudioBlob, GenAudio\nfrom .._guidance import guidance\nfrom .._utils impo"
  },
  {
    "path": "guidance/library/_block.py",
    "chars": 803,
    "preview": "from contextlib import contextmanager\n\nfrom .._ast import ASTNode, Function\nfrom .._guidance import _in_stateless_contex"
  },
  {
    "path": "guidance/library/_capture.py",
    "chars": 420,
    "preview": "from .._grammar import GrammarNode\nfrom .._grammar import capture as grammar_capture\nfrom .._guidance import guidance\nfr"
  },
  {
    "path": "guidance/library/_ebnf.py",
    "chars": 1415,
    "preview": "from llguidance.gbnf_to_lark import gbnf_to_lark as _gbnf_to_lark\n\nfrom .._ast import GrammarNode, LarkNode, RuleNode\nfr"
  },
  {
    "path": "guidance/library/_gen.py",
    "chars": 6545,
    "preview": "import logging\nfrom typing import Literal\n\nfrom .._ast import ToolCallNode\nfrom .._grammar import capture, quote_regex\nf"
  },
  {
    "path": "guidance/library/_image.py",
    "chars": 936,
    "preview": "import base64\nimport importlib.resources\nimport pathlib\nimport re\n\nfrom .._ast import ImageBlob, ImageUrl\nfrom .._guidan"
  },
  {
    "path": "guidance/library/_json.py",
    "chars": 4834,
    "preview": "from json import loads as json_loads\nfrom typing import Any, Mapping, TypeAlias\n\nimport pydantic\n\nfrom .._ast import Jso"
  },
  {
    "path": "guidance/library/_optional.py",
    "chars": 152,
    "preview": "from .._grammar import repeat\nfrom .._guidance import guidance\n\n\n@guidance(stateless=True)\ndef optional(lm, value):\n    "
  },
  {
    "path": "guidance/library/_pydantic.py",
    "chars": 1323,
    "preview": "import inspect\nfrom typing import Any, Union\n\nimport pydantic\n\n\nclass GenerateJsonSchemaSafe(pydantic.json_schema.Genera"
  },
  {
    "path": "guidance/library/_role.py",
    "chars": 1631,
    "preview": "from contextlib import AbstractContextManager\n\nfrom .._ast import RoleEnd, RoleStart\nfrom ._block import block\n\n\n# TODO "
  },
  {
    "path": "guidance/library/_sequences.py",
    "chars": 771,
    "preview": "from .._grammar import repeat\nfrom .._guidance import guidance\n\n\n@guidance(stateless=True)\ndef exactly_n_repeats(model, "
  },
  {
    "path": "guidance/library/_subgrammar.py",
    "chars": 593,
    "preview": "from .._ast import GrammarNode, RuleNode\nfrom .._grammar import regex, subgrammar\n\n__all__ = [\"as_regular_grammar\", \"lex"
  },
  {
    "path": "guidance/library/_substring.py",
    "chars": 1051,
    "preview": "import re\nfrom typing import Callable, Iterable, Literal\n\nfrom .._ast import RuleNode, SubstringNode\n\n\ndef chunk_on_word"
  },
  {
    "path": "guidance/library/_video.py",
    "chars": 933,
    "preview": "import base64\nimport importlib.resources\nimport pathlib\n\nfrom .._guidance import guidance\nfrom .._utils import bytes_fro"
  },
  {
    "path": "guidance/metrics/__init__.py",
    "chars": 231,
    "preview": "\"\"\"Metrics that arise from both language models and its execution environment.\"\"\"\n\nfrom ._metrics import Monitor, Period"
  },
  {
    "path": "guidance/metrics/_metrics.py",
    "chars": 8213,
    "preview": "import asyncio\nimport logging\nimport time\nfrom asyncio import CancelledError\nfrom enum import Enum\nfrom typing import An"
  },
  {
    "path": "guidance/models/__init__.py",
    "chars": 501,
    "preview": "from . import experimental\nfrom ._azureai import create_azure_aifoundry_model, create_azure_openai_model\nfrom ._base imp"
  },
  {
    "path": "guidance/models/_azureai.py",
    "chars": 9735,
    "preview": "import logging\nfrom typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterator, Optional, Union, cast\n\nfrom gu"
  },
  {
    "path": "guidance/models/_base/__init__.py",
    "chars": 232,
    "preview": "from ._interpreter import Interpreter\nfrom ._model import Model\nfrom ._state import State\n\n__all__ = [\n    \"ASTNode\",\n  "
  },
  {
    "path": "guidance/models/_base/_interpreter.py",
    "chars": 4170,
    "preview": "import base64\nfrom typing import Generic, Iterator, TypeVar\n\nfrom ..._ast import (\n    ASTNode,\n    AudioBlob,\n    GenAu"
  },
  {
    "path": "guidance/models/_base/_model.py",
    "chars": 14693,
    "preview": "# TODO(nopdive): This module requires a memory review.\n\nimport queue\nimport threading\nfrom contextvars import ContextVar"
  },
  {
    "path": "guidance/models/_base/_state.py",
    "chars": 1847,
    "preview": "from abc import ABC, abstractmethod\nfrom typing import TypedDict\n\nfrom ..._schema import TokenUsage\nfrom ...metrics impo"
  },
  {
    "path": "guidance/models/_byte_tokenizer.py",
    "chars": 1400,
    "preview": "import numpy as np\n\nfrom ._engine import Tokenizer\nfrom ._engine._tokenizer import TokenizerWrappable\n\n\nclass ByteTokeni"
  },
  {
    "path": "guidance/models/_engine/__init__.py",
    "chars": 384,
    "preview": "from ._tokenizer import Tokenizer  # isort:skip\nfrom ._engine import Engine, LogitsOutput\nfrom ._interpreter import Engi"
  },
  {
    "path": "guidance/models/_engine/_engine.py",
    "chars": 34952,
    "preview": "# TODO(nopdive): This module requires a memory review.\n\nimport logging\nimport time\nfrom abc import ABC, abstractmethod\nf"
  },
  {
    "path": "guidance/models/_engine/_interpreter.py",
    "chars": 9811,
    "preview": "import re\nfrom base64 import b64decode, b64encode\nfrom copy import deepcopy\nfrom io import BytesIO\nfrom typing import It"
  },
  {
    "path": "guidance/models/_engine/_state.py",
    "chars": 516,
    "preview": "from typing import Any\n\nfrom ..._schema import TokenUsage\nfrom .._base import State\n\n\nclass EngineState(State):\n    def "
  },
  {
    "path": "guidance/models/_engine/_tokenizer.py",
    "chars": 3600,
    "preview": "from dataclasses import dataclass\nfrom functools import cached_property\nfrom typing import Any, Callable, Sequence\n\nimpo"
  },
  {
    "path": "guidance/models/_llama_cpp.py",
    "chars": 10021,
    "preview": "import atexit\nimport logging\nimport operator\nimport os\nimport sys\nfrom itertools import takewhile\nfrom pathlib import Pa"
  },
  {
    "path": "guidance/models/_mock.py",
    "chars": 8504,
    "preview": "import logging\nfrom typing import Sequence\n\nimport numpy as np\n\nfrom .._schema import GenTokenExtra, SamplingParams\nfrom"
  },
  {
    "path": "guidance/models/_onnxruntime.py",
    "chars": 5905,
    "preview": "import operator\nfrom itertools import takewhile\nfrom typing import TYPE_CHECKING, Union\n\ntry:\n    from ._transformers im"
  },
  {
    "path": "guidance/models/_openai.py",
    "chars": 2549,
    "preview": "from guidance._schema import SamplingParams\n\nfrom ._base import Model\nfrom ._openai_base import (\n    BaseOpenAIInterpre"
  },
  {
    "path": "guidance/models/_openai_base.py",
    "chars": 26552,
    "preview": "import base64\nimport json\nimport time\nimport wave\nfrom abc import ABC, abstractmethod\nfrom copy import deepcopy\nfrom io "
  },
  {
    "path": "guidance/models/_transformers.py",
    "chars": 25227,
    "preview": "import operator\nimport os\nimport re\nimport textwrap\nimport warnings\nfrom itertools import takewhile\nfrom typing import T"
  },
  {
    "path": "guidance/models/broken_models/README.MD",
    "chars": 265,
    "preview": "These model files use an older version of guidance's internal API design, and need to be updated. They're kept here comm"
  },
  {
    "path": "guidance/models/broken_models/_Gemini.py",
    "chars": 5468,
    "preview": "# import os\n# from pathlib import Path\n# import multiprocessing\n# from itertools import takewhile\n# import operator\n# im"
  },
  {
    "path": "guidance/models/broken_models/_anthropic.py",
    "chars": 5478,
    "preview": "# import os\n# import tiktoken\n\n# from .._engine._engine import Chat, Instruct\n# from .._grammarless import GrammarlessEn"
  },
  {
    "path": "guidance/models/broken_models/_azure_openai.py",
    "chars": 3580,
    "preview": "# import pathlib\n\n# from typing import Union\n# from urllib.parse import parse_qs, urlparse\n\n# import tiktoken\n\n# from .."
  },
  {
    "path": "guidance/models/broken_models/_azureai_studio.py",
    "chars": 8957,
    "preview": "# import hashlib\n# import pathlib\n# import urllib.parse\n\n# import diskcache as dc\n# import platformdirs\n# import request"
  },
  {
    "path": "guidance/models/broken_models/_cohere.py",
    "chars": 1398,
    "preview": "# from ._lite_llm import LiteLLMEngine, LiteLLM, LiteLLMCompletion, LiteLLMInstruct\n\n\n# class Cohere(LiteLLM):\n#     def"
  },
  {
    "path": "guidance/models/broken_models/_googleai.py",
    "chars": 8700,
    "preview": "# import re\n# from .._engine._engine import Chat, Instruct\n# from .._grammarless import Grammarless, GrammarlessEngine\n#"
  },
  {
    "path": "guidance/models/broken_models/_lite_llm.py",
    "chars": 7775,
    "preview": "# import tiktoken\n\n# from .._engine._engine import Chat, Instruct\n# from .._grammarless import GrammarlessTokenizer, Gra"
  },
  {
    "path": "guidance/models/broken_models/_togetherai.py",
    "chars": 2966,
    "preview": "# import os\n# from .._engine._engine import Chat, Instruct\n# from .._openai import (\n#     OpenAI,\n#     OpenAIEngine,\n#"
  },
  {
    "path": "guidance/models/broken_models/_vertexai.py",
    "chars": 9627,
    "preview": "import re\n\nfrom .._engine._engine import Chat, Instruct\nfrom .._grammarless import Grammarless, GrammarlessEngine\n\ntry:\n"
  },
  {
    "path": "guidance/models/experimental/__init__.py",
    "chars": 143,
    "preview": "from ._litellm import LiteLLM\nfrom ._sglang import SglangModel\nfrom ._vllm import VLLMModel\n\n__all__ = [\"LiteLLM\", \"Sgla"
  },
  {
    "path": "guidance/models/experimental/_litellm.py",
    "chars": 10469,
    "preview": "from typing import TYPE_CHECKING, Any, ContextManager, Iterator\n\nfrom guidance._schema import SamplingParams\n\nfrom ..._a"
  },
  {
    "path": "guidance/models/experimental/_sglang.py",
    "chars": 5936,
    "preview": "from typing import Iterator\n\nfrom guidance._schema import SamplingParams\n\nfrom ..._ast import GrammarNode, JsonNode, Reg"
  },
  {
    "path": "guidance/models/experimental/_vllm.py",
    "chars": 3828,
    "preview": "from typing import Iterator\n\nfrom guidance._schema import SamplingParams\n\nfrom ..._ast import GrammarNode\nfrom ...trace "
  },
  {
    "path": "guidance/py.typed",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "guidance/registry/__init__.py",
    "chars": 298,
    "preview": "\"\"\"Registry module that contains singletons.\"\"\"\n\nfrom ._registry import get_bg_async, get_exchange, get_monitor, get_ren"
  },
  {
    "path": "guidance/registry/_registry.py",
    "chars": 1986,
    "preview": "# NOTE(nopdive): Consider moving singleton factories to registry static class.\n\nimport threading\n\nfrom .._bg import Back"
  },
  {
    "path": "guidance/resources/graphpaper-inline.html",
    "chars": 846958,
    "preview": "<!doctype html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"utf-8\">\n\n    <link rel=\"preconnect\" href=\"https://fonts.googl"
  },
  {
    "path": "guidance/trace/__init__.py",
    "chars": 1082,
    "preview": "\"\"\"Trace tree of inputs & outputs generated from a guidance program.\n\nThe first implementation aims for simplicity.\nOnce"
  },
  {
    "path": "guidance/trace/_trace.py",
    "chars": 11556,
    "preview": "# TODO(nopdive): Consider integrating token operations into trace nodes (handles token healing cleaner).\n# TODO(nopdive)"
  },
  {
    "path": "guidance/visual/__init__.py",
    "chars": 1053,
    "preview": "\"\"\"UI and other visual UX considerations.\n\nUsers should have few reasons to be accessing this module.\n\"\"\"\n\nfrom ._exchan"
  },
  {
    "path": "guidance/visual/_environment.py",
    "chars": 3822,
    "preview": "\"\"\"Rendering environment detection.\n\nDetection logic is inspired from both plotly and interpretml environment detection."
  },
  {
    "path": "guidance/visual/_exchange.py",
    "chars": 2413,
    "preview": "\"\"\"Poor man's exchanges for routing messages.\"\"\"\n\nimport logging\nimport re\nfrom collections import defaultdict\nfrom typi"
  },
  {
    "path": "guidance/visual/_jupyter.py",
    "chars": 655,
    "preview": "\"\"\"Jupyter specific utilities.\"\"\"\n\nimport logging\nfrom typing import Any, Callable\nfrom uuid import uuid4\n\ntry:\n    from"
  },
  {
    "path": "guidance/visual/_message.py",
    "chars": 3473,
    "preview": "\"\"\"Messages that used between server (usually Jupyter Python kernel) and client.\n\nMessages are required to be added to t"
  },
  {
    "path": "guidance/visual/_renderer.py",
    "chars": 23478,
    "preview": "\"\"\"Renderer is responsible for displaying output.\n\nOur main focus is on jupyter notebooks and later terminal.\n\"\"\"\n# NOTE"
  },
  {
    "path": "guidance/visual/_trace.py",
    "chars": 6953,
    "preview": "\"\"\"Visualization related to trace.\"\"\"\n\nimport html\nimport json\n\nfrom ..trace import (\n    ImageOutput,\n    RoleCloserInp"
  },
  {
    "path": "notebooks/anachronism.ipynb",
    "chars": 2823980,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Anachronism"
  },
  {
    "path": "notebooks/api_examples/library/gen.ipynb",
    "chars": 35389,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# `gen` API E"
  },
  {
    "path": "notebooks/api_examples/models/AzureOpenAI.ipynb",
    "chars": 7375,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"882fb634-0efc-42bf-8cc5-79d6a7312e26\",\n   \"metadata\": {},\n   \"so"
  },
  {
    "path": "notebooks/api_examples/models/OpenAI.ipynb",
    "chars": 2143,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# `OpenAI` AP"
  },
  {
    "path": "notebooks/api_examples/models/TogetherAI.ipynb",
    "chars": 9968,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# `TogetherAI"
  },
  {
    "path": "notebooks/art_of_prompt_design/prompt_boundaries_and_token_healing.ipynb",
    "chars": 30888,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"`The Art of P"
  },
  {
    "path": "notebooks/art_of_prompt_design/rag.ipynb",
    "chars": 206070,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"`The Art of Prompt Design`\"\n   ]\n  "
  },
  {
    "path": "notebooks/art_of_prompt_design/react.ipynb",
    "chars": 9308,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"`The Art of Prompt Design`\"\n   ]\n  "
  },
  {
    "path": "notebooks/art_of_prompt_design/tool_use.ipynb",
    "chars": 85125,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting lan"
  },
  {
    "path": "notebooks/art_of_prompt_design/use_clear_syntax.ipynb",
    "chars": 9214598,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Use clear syntax\\n\",\n    \"\\n\",\n  "
  },
  {
    "path": "notebooks/chatgpt_vs_open_source_on_harder_tasks.ipynb",
    "chars": 633674,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exploring C"
  },
  {
    "path": "notebooks/engine_chat_completion.ipynb",
    "chars": 6483,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"a2b6b7d0-dd86-4275-be30-c83df4b925ea\",\n   \"metadata\": {},\n   \"so"
  },
  {
    "path": "notebooks/guaranteeing_valid_syntax.ipynb",
    "chars": 10719,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Guaranteein"
  },
  {
    "path": "notebooks/proverb.ipynb",
    "chars": 5821,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# GPT Proverb"
  },
  {
    "path": "notebooks/tutorials/adding_new_models.ipynb",
    "chars": 17230,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Adding support for a new models\\n"
  },
  {
    "path": "notebooks/tutorials/chat.ipynb",
    "chars": 13819,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Chat dialog"
  },
  {
    "path": "notebooks/tutorials/code_generation.ipynb",
    "chars": 52869,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Example: generating and running "
  },
  {
    "path": "notebooks/tutorials/guidance_acceleration.ipynb",
    "chars": 70101,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Guidance ac"
  },
  {
    "path": "notebooks/tutorials/litellm_models.ipynb",
    "chars": 5224,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"9881451c\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Example: Us"
  },
  {
    "path": "notebooks/tutorials/onnxruntime_models.ipynb",
    "chars": 5457,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"1eed4a21\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Example: Us"
  },
  {
    "path": "notebooks/tutorials/regex_constraints.ipynb",
    "chars": 70539,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Regular exp"
  },
  {
    "path": "notebooks/tutorials/token_healing.ipynb",
    "chars": 880819,
    "preview": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Token heali"
  },
  {
    "path": "notebooks/tutorials/tool_calling.ipynb",
    "chars": 3579717,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"213de727\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Defining Too"
  },
  {
    "path": "notebooks/unstable/.gitignore",
    "chars": 13,
    "preview": "*\n!.gitignore"
  },
  {
    "path": "packages/python/stitch/.eslintignore",
    "chars": 42,
    "preview": "node_modules\ndist\ncoverage\n**/*.d.ts\ntests"
  },
  {
    "path": "packages/python/stitch/.eslintrc.js",
    "chars": 807,
    "preview": "module.exports = {\n  extends: [\n    'eslint:recommended',\n    'plugin:@typescript-eslint/eslint-recommended',\n    'plugi"
  },
  {
    "path": "packages/python/stitch/.github/workflows/build.yml",
    "chars": 1070,
    "preview": "name: Build\n\non:\n  push:\n    branches: main\n  pull_request:\n    branches: \"*\"\n\njobs:\n  build:\n    runs-on: ${{ matrix.os"
  },
  {
    "path": "packages/python/stitch/.gitignore",
    "chars": 1960,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": "packages/python/stitch/.npmignore",
    "chars": 107,
    "preview": ".DS_Store\nnode_modules/\ntests/\n.jshintrc\n# Ignore any build output from python:\ndist/*.tar.gz\ndist/*.wheel\n"
  },
  {
    "path": "packages/python/stitch/MANIFEST.in",
    "chars": 655,
    "preview": "include LICENSE.txt\ninclude README.md\n\ninclude setup.py\ninclude pyproject.toml\ninclude pytest.ini\ninclude .coverage.rc\n\n"
  },
  {
    "path": "packages/python/stitch/stitch/tests/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/model_integration/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/model_specific/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/need_credentials/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/notebooks/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/unit/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/unit/library/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/unit/library/json/__init__.py",
    "chars": 0,
    "preview": ""
  }
]

// ... and 110 more files (download for full content)

About this extraction

This page contains the full source code of the guidance-ai/guidance GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 280 files (29.2 MB), approximately 4.8M tokens, and a symbol index with 790 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!