main e2e3a3f102ab cached
366 files
2.5 MB
681.3k tokens
2672 symbols
1 requests
Download .txt
Showing preview only (2,718K chars total). Download the full file or copy to clipboard to get everything.
Repository: thinking-machines-lab/tinker-cookbook
Branch: main
Commit: e2e3a3f102ab
Files: 366
Total size: 2.5 MB

Directory structure:
gitextract_vc456gw1/

├── .claude/
│   └── skills/
│       ├── checkpoints/
│       │   └── SKILL.md
│       ├── ci/
│       │   └── SKILL.md
│       ├── completers/
│       │   └── SKILL.md
│       ├── contributing/
│       │   └── SKILL.md
│       ├── datasets/
│       │   └── SKILL.md
│       ├── distillation/
│       │   └── SKILL.md
│       ├── dpo/
│       │   └── SKILL.md
│       ├── environments/
│       │   └── SKILL.md
│       ├── evals/
│       │   └── SKILL.md
│       ├── grpo/
│       │   └── SKILL.md
│       ├── hyperparams/
│       │   └── SKILL.md
│       ├── logging/
│       │   └── SKILL.md
│       ├── manage-skills/
│       │   └── SKILL.md
│       ├── models/
│       │   └── SKILL.md
│       ├── multiturn-rl/
│       │   └── SKILL.md
│       ├── new-recipe/
│       │   └── SKILL.md
│       ├── renderers/
│       │   └── SKILL.md
│       ├── rlhf/
│       │   └── SKILL.md
│       ├── setup/
│       │   └── SKILL.md
│       ├── sft/
│       │   └── SKILL.md
│       ├── tinker-cli/
│       │   └── SKILL.md
│       ├── tinker-sdk/
│       │   └── SKILL.md
│       ├── tinker-types/
│       │   └── SKILL.md
│       └── weights/
│           └── SKILL.md
├── .github/
│   └── workflows/
│       ├── claude-review.yml
│       ├── downstream-compat.yaml
│       ├── nightly.yaml
│       ├── pre-commit.yaml
│       ├── publish-pypi.yaml
│       ├── pyright.yaml
│       ├── pytest.yaml
│       ├── smoke-test-evals.yaml
│       └── smoke-test-recipes.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .sync_state
├── AGENTS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs/
│   ├── api-reference/
│   │   ├── apifuture.md
│   │   ├── exceptions.md
│   │   ├── restclient.md
│   │   ├── samplingclient.md
│   │   ├── serviceclient.md
│   │   ├── trainingclient.md
│   │   └── types.md
│   ├── async.mdx
│   ├── compatible-apis/
│   │   └── openai.mdx
│   ├── completers.mdx
│   ├── dev-tips.mdx
│   ├── docs-outline.mdx
│   ├── download-weights.mdx
│   ├── evals.mdx
│   ├── index.mdx
│   ├── install.mdx
│   ├── lora-primer.mdx
│   ├── losses.mdx
│   ├── model-lineup.mdx
│   ├── overview-building.mdx
│   ├── preferences/
│   │   ├── dpo-guide.mdx
│   │   └── rlhf-example.mdx
│   ├── preferences.mdx
│   ├── publish-weights.mdx
│   ├── rendering.mdx
│   ├── rl/
│   │   ├── rl-basic.mdx
│   │   ├── rl-envs.mdx
│   │   ├── rl-hyperparams.mdx
│   │   ├── rl-logging.mdx
│   │   ├── rl-loops.mdx
│   │   └── sequence-extension.mdx
│   ├── rl.mdx
│   ├── save-load.mdx
│   ├── supervised-learning/
│   │   ├── prompt-distillation.mdx
│   │   ├── sl-basic.mdx
│   │   ├── sl-hyperparams.mdx
│   │   ├── sl-loop.mdx
│   │   └── sweep-case-study.mdx
│   ├── supervised-learning.mdx
│   ├── support.mdx
│   ├── training-sampling.mdx
│   └── under-the-hood.mdx
├── pyproject.toml
├── tests/
│   ├── __init__.py
│   ├── compare_sampling_training_logprobs.py
│   ├── conftest.py
│   ├── downstream_compat/
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── sig_helpers.py
│   │   ├── test_checkpoint_utils.py
│   │   ├── test_cli_and_hyperparam.py
│   │   ├── test_completers.py
│   │   ├── test_model_info.py
│   │   ├── test_recipes.py
│   │   ├── test_renderers.py
│   │   ├── test_rl_train.py
│   │   ├── test_rl_types.py
│   │   ├── test_supervised.py
│   │   ├── test_tokenizer_utils.py
│   │   └── test_utils.py
│   ├── helpers.py
│   ├── recipes/
│   │   ├── __init__.py
│   │   ├── test_recipe_chat_sl.py
│   │   ├── test_recipe_dpo.py
│   │   ├── test_recipe_guess_number.py
│   │   ├── test_recipe_math_rl.py
│   │   ├── test_recipe_off_policy_reasoning.py
│   │   ├── test_recipe_on_policy_distillation.py
│   │   ├── test_recipe_on_policy_multi_teacher.py
│   │   ├── test_recipe_rlhf_pipeline.py
│   │   ├── test_recipe_shorter.py
│   │   ├── test_recipe_text_arena.py
│   │   ├── test_recipe_twenty_questions.py
│   │   └── test_recipe_vlm_classifier.py
│   ├── test_inspect_eval.py
│   ├── test_modal_sandbox.py
│   ├── third_party/
│   │   ├── __init__.py
│   │   └── test_litellm.py
│   ├── validate_temperature_logprobs.py
│   └── weights/
│       ├── __init__.py
│       ├── test_download.py
│       ├── test_export.py
│       ├── test_lifecycle.py
│       ├── test_publish.py
│       ├── test_quantized.py
│       ├── test_quantized_equivalence.py
│       └── test_strategy_consistency.py
└── tinker_cookbook/
    ├── __init__.py
    ├── chat_app/
    │   ├── README.md
    │   └── tinker_chat_cli.py
    ├── checkpoint_utils.py
    ├── checkpoint_utils_test.py
    ├── cli_utils.py
    ├── cli_utils_test.py
    ├── completers.py
    ├── display.py
    ├── distillation/
    │   ├── __init__.py
    │   ├── datasets.py
    │   └── train_on_policy.py
    ├── eval/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── custom_evaluators.py
    │   ├── custom_inspect_task.py
    │   ├── evaluators.py
    │   ├── inspect_evaluators.py
    │   ├── inspect_utils.py
    │   ├── inspect_utils_test.py
    │   └── run_inspect_evals.py
    ├── example_data/
    │   ├── conversations.jsonl
    │   └── multilingual.txt
    ├── exceptions.py
    ├── exceptions_test.py
    ├── hyperparam_utils.py
    ├── image_processing_utils.py
    ├── image_processing_utils_test.py
    ├── model_info.py
    ├── model_info_test.py
    ├── preference/
    │   ├── __init__.py
    │   ├── comparison_policy_evaluator.py
    │   ├── dpo_datasets.py
    │   ├── preference_datasets.py
    │   ├── train_dpo.py
    │   └── types.py
    ├── py.typed
    ├── recipes/
    │   ├── README.md
    │   ├── chat_sl/
    │   │   ├── README.md
    │   │   ├── chat_datasets.py
    │   │   └── train.py
    │   ├── code_rl/
    │   │   ├── README.md
    │   │   ├── code_env.py
    │   │   ├── code_grading.py
    │   │   ├── deepcoder_tool.py
    │   │   ├── lcb_utils.py
    │   │   ├── sandbox_config/
    │   │   │   └── local.yaml
    │   │   └── train.py
    │   ├── distillation/
    │   │   ├── README.md
    │   │   ├── harbor_multiturn.py
    │   │   ├── harbor_multiturn_test.py
    │   │   ├── off_policy_reasoning.py
    │   │   ├── on_policy_distillation.py
    │   │   ├── on_policy_distillation_harbor_multi_turn.py
    │   │   └── on_policy_multi_teacher.py
    │   ├── harbor_rl/
    │   │   ├── README.md
    │   │   ├── eval.py
    │   │   ├── harbor_env.py
    │   │   ├── harbor_tools.py
    │   │   ├── harbor_tools_test.py
    │   │   ├── scripts/
    │   │   │   ├── .gitignore
    │   │   │   ├── eval_terminal_bench.py
    │   │   │   └── train_terminal_bench.py
    │   │   └── train.py
    │   ├── math_rl/
    │   │   ├── README.md
    │   │   ├── arithmetic_env.py
    │   │   ├── math_env.py
    │   │   ├── math_env_test.py
    │   │   ├── math_grading.py
    │   │   └── train.py
    │   ├── multiplayer_rl/
    │   │   ├── README.md
    │   │   ├── guess_number/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   ├── text_arena/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   └── twenty_questions/
    │   │       ├── README.md
    │   │       ├── common_english_nouns.txt
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── preference/
    │   │   ├── README.md
    │   │   ├── datasets.py
    │   │   ├── dpo/
    │   │   │   ├── README.md
    │   │   │   └── train.py
    │   │   ├── rlhf/
    │   │   │   ├── README.md
    │   │   │   └── rlhf_pipeline.py
    │   │   └── shorter/
    │   │       ├── README.md
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── prompt_distillation/
    │   │   ├── README.md
    │   │   ├── create_data.py
    │   │   └── train.py
    │   ├── rl_basic.py
    │   ├── rl_loop.py
    │   ├── rubric/
    │   │   ├── README.md
    │   │   ├── data.py
    │   │   ├── debug_env.py
    │   │   ├── env.py
    │   │   ├── generate_data.py
    │   │   ├── prometheus_experimental.py
    │   │   └── train.py
    │   ├── search_tool/
    │   │   ├── README.md
    │   │   ├── chroma_pickle_test.py
    │   │   ├── embedding.py
    │   │   ├── offline_eval.py
    │   │   ├── search_env.py
    │   │   ├── tools.py
    │   │   └── train.py
    │   ├── sl_basic.py
    │   ├── sl_loop.py
    │   ├── verifiers_rl/
    │   │   ├── README.md
    │   │   ├── evaluate.py
    │   │   ├── tinker_openai.py
    │   │   ├── train.py
    │   │   ├── verifiers_env.py
    │   │   └── verifiers_pickle_test.py
    │   └── vlm_classifier/
    │       ├── README.md
    │       ├── data.py
    │       ├── eval.py
    │       ├── eval_sweep.py
    │       ├── sweep.py
    │       └── train.py
    ├── renderers/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── base.py
    │   ├── deepseek_v3.py
    │   ├── deepseek_v3_test.py
    │   ├── gpt_oss.py
    │   ├── gpt_oss_test.py
    │   ├── kimi_k2.py
    │   ├── kimi_k25.py
    │   ├── kimi_k25_test.py
    │   ├── kimi_k2_5_tool_declaration_ts.py
    │   ├── kimi_k2_test.py
    │   ├── kimi_k2_tool_declaration_test.py
    │   ├── llama3.py
    │   ├── nemotron3.py
    │   ├── nemotron3_test.py
    │   ├── parsing_test.py
    │   ├── qwen3.py
    │   ├── qwen3_5.py
    │   ├── qwen3_test.py
    │   ├── qwen3_tool_declaration_test.py
    │   ├── renderer_pickle_test.py
    │   ├── renderers_test.py
    │   ├── role_colon.py
    │   ├── testing_utils.py
    │   └── tool_calling_test.py
    ├── rl/
    │   ├── __init__.py
    │   ├── builder_pickle_test.py
    │   ├── data_processing.py
    │   ├── message_env.py
    │   ├── message_env_test.py
    │   ├── metric_util.py
    │   ├── metrics.py
    │   ├── multiturn_weight_assignment_test.py
    │   ├── play_w_env.py
    │   ├── preference_envs.py
    │   ├── problem_env.py
    │   ├── rollout_error_resilience_test.py
    │   ├── rollout_logging.py
    │   ├── rollout_logging_test.py
    │   ├── rollout_strategy.py
    │   ├── rollouts.py
    │   ├── shutdown_test.py
    │   ├── train.py
    │   └── types.py
    ├── sandbox/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── modal_sandbox.py
    │   ├── sandbox_interface.py
    │   └── sandboxfusion.py
    ├── scripts/
    │   ├── merge_tinker_adapter_to_hf_model.py
    │   └── test_tool_calling_e2e.py
    ├── supervised/
    │   ├── __init__.py
    │   ├── common.py
    │   ├── data.py
    │   ├── nll_evaluator.py
    │   ├── resume_test.py
    │   ├── train.py
    │   ├── types.py
    │   └── viz_sft_dataset.py
    ├── third_party/
    │   ├── __init__.py
    │   ├── litellm/
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── provider.py
    │   │   └── provider_test.py
    │   ├── openai_compat.py
    │   └── openai_compat_test.py
    ├── tokenizer_utils.py
    ├── tokenizer_utils_test.py
    ├── tool_use/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agent_tool_message_env.py
    │   ├── agent_tool_message_env_test.py
    │   ├── tools.py
    │   └── types.py
    ├── utils/
    │   ├── __init__.py
    │   ├── code_state.py
    │   ├── deprecation.py
    │   ├── deprecation_test.py
    │   ├── file_utils.py
    │   ├── format_colorized.py
    │   ├── logtree.py
    │   ├── logtree_formatters.py
    │   ├── logtree_test.py
    │   ├── lr_scheduling.py
    │   ├── misc_utils.py
    │   ├── ml_log.py
    │   ├── ml_log_test.py
    │   ├── trace.py
    │   └── trace_test.py
    ├── weights/
    │   ├── __init__.py
    │   ├── _artifacts.py
    │   ├── _download.py
    │   ├── _export/
    │   │   ├── __init__.py
    │   │   ├── _full.py
    │   │   ├── _quantized.py
    │   │   └── _shard.py
    │   ├── _merge.py
    │   ├── _publish.py
    │   ├── artifacts_test.py
    │   ├── download_test.py
    │   ├── export_test.py
    │   ├── merge_test.py
    │   ├── publish_test.py
    │   ├── quantized_test.py
    │   └── stress_test.py
    └── xmux/
        ├── README.md
        ├── __init__.py
        ├── control.py
        ├── core.py
        ├── examples/
        │   ├── async_rl_sweep.py
        │   ├── fake_train.py
        │   └── ml_sweep.py
        ├── run_job.py
        └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .claude/skills/checkpoints/SKILL.md
================================================
---
name: checkpoints
description: Guide for checkpointing — saving, loading, and resuming training with CheckpointRecord. Use when the user asks about saving weights, resuming training, checkpoint management, or the checkpoint lifecycle.
---

# Checkpointing

Tinker supports two types of checkpoints and provides utilities for managing them during training.

## Reference

Read these for details:
- `tinker_cookbook/checkpoint_utils.py` — CheckpointRecord, save/load helpers
- `docs/save-load.mdx` — Checkpointing guide (save_weights_for_sampler vs save_state)

## Two checkpoint types

| Type | Method | Purpose | Contains |
|------|--------|---------|----------|
| **State** | `save_state()` | Resume training | Weights + optimizer state |
| **Sampler** | `save_weights_for_sampler()` | Sampling / export | Weights only |

```python
# Save full state (for resumption)
tc.save_state(name="step_100", ttl_seconds=None)

# Save sampler weights (for sampling/export)
tc.save_weights_for_sampler(name="step_100_sampler", ttl_seconds=None)

# Save both + get a SamplingClient
sc = tc.save_weights_and_get_sampling_client(name="step_100")
```

`ttl_seconds=None` means indefinite retention. Set a TTL for intermediate checkpoints to avoid storage bloat.

## CheckpointRecord

Typed dataclass for checkpoint bookkeeping:

```python
from tinker_cookbook.checkpoint_utils import CheckpointRecord

record = CheckpointRecord(
    name="step_100",
    batch=100,
    epoch=1,
    final=False,
    state_path="tinker://...",
    sampler_path="tinker://...",
    extra={"eval_loss": 0.5},  # User metadata
)

# Serialize
d = record.to_dict()

# Deserialize
record = CheckpointRecord.from_dict(d)

# Check if a field is set
record.has("state_path")  # True
```

## Save/load helpers

```python
from tinker_cookbook import checkpoint_utils

# Save checkpoint (async)
paths = await checkpoint_utils.save_checkpoint_async(
    training_client=tc,
    name="step_100",
    log_path="/tmp/my_run",
    loop_state={"batch": 100, "epoch": 1},
    kind="both",           # "state", "sampler", or "both"
    ttl_seconds=None,
)
# paths = {"state_path": "tinker://...", "sampler_path": "tinker://..."}

# Load checkpoint list
records = checkpoint_utils.load_checkpoints_file("/tmp/my_run")

# Get last checkpoint
record = checkpoint_utils.get_last_checkpoint(
    "/tmp/my_run",
    required_key="state_path",  # Only return records with this field
)
```

## Resuming training

The standard pattern (used by `supervised/train.py` and `rl/train.py`):

```python
# In CLIConfig
behavior_if_log_dir_exists: cli_utils.LogdirBehavior = "ask"  # "ask", "delete", "resume"

# In training loop
if config.load_checkpoint_path:
    tc.load_state_with_optimizer(config.load_checkpoint_path)
```

Set `behavior_if_log_dir_exists=resume` to continue from the last checkpoint in an existing log directory.

## Managing checkpoints (REST API / CLI)

Beyond saving and loading during training, you can manage checkpoints via the REST API or CLI. See `/tinker-sdk` for RestClient details and `/tinker-cli` for CLI commands.

```python
from tinker import ServiceClient
rest = ServiceClient().create_rest_client()

# List all your checkpoints
checkpoints = rest.list_user_checkpoints(limit=100)

# Publish a checkpoint (make it publicly accessible)
rest.publish_checkpoint_from_tinker_path("tinker://...")

# Set TTL (auto-delete after N seconds)
rest.set_checkpoint_ttl_from_tinker_path("tinker://...", ttl_seconds=86400)

# Delete a checkpoint
rest.delete_checkpoint_from_tinker_path("tinker://...")
```

Or via CLI:
```bash
tinker checkpoint list
tinker checkpoint publish <TINKER_PATH>
tinker checkpoint set-ttl <TINKER_PATH> --ttl 86400
tinker checkpoint delete <TINKER_PATH>
```

## Common pitfalls
- Use `save_state` for resumable checkpoints, `save_weights_for_sampler` for sampling/export
- `get_last_checkpoint()` returns `None` if no matching checkpoint exists — always check
- Checkpoint paths start with `tinker://` — they reference remote storage, not local files
- Set `ttl_seconds` on intermediate checkpoints to avoid accumulating old weights
- For RLHF pipelines, the SFT stage saves `state_path` (for RL init) and the RM stage saves `sampler_path` (for reward scoring)
- `delete` is permanent — there is no undo


================================================
FILE: .claude/skills/ci/SKILL.md
================================================
---
name: ci
description: Guide for testing conventions and CI pipelines — unit tests, integration smoke tests, pytest markers, and GitHub Actions workflows. Use when the user asks about testing, CI, running tests, or adding tests for a recipe.
---

# Testing & CI

The repo has two layers of testing and two CI workflows.

## Reference

Read these for details:
- `tests/helpers.py` — `run_recipe()` helper for smoke tests
- `tests/conftest.py` — Pytest configuration and API key handling
- `tests/recipes/` — Existing recipe smoke tests
- `.github/workflows/pytest.yaml` — Unit test CI (every PR)
- `.github/workflows/smoke-test-recipes.yaml` — Smoke test CI (daily)
- `CONTRIBUTING.md` — Development setup and test commands
- `pyproject.toml` — Pytest configuration (testpaths, markers, file patterns)

## Test structure

```
tinker-cookbook/
├── tinker_cookbook/
│   ├── renderers/parsing_test.py     # Unit tests: *_test.py next to source
│   ├── recipes/math_rl/math_env_test.py
│   └── ...
└── tests/
    ├── conftest.py                   # Skips integration tests without API key
    ├── helpers.py                    # run_recipe() helper
    └── recipes/
        ├── test_recipe_chat_sl.py    # Integration tests: test_recipe_*.py
        ├── test_recipe_dpo.py
        └── ...
```

## Unit tests (`*_test.py`)

Colocated with source code. Run without API key.

```bash
uv run pytest tinker_cookbook/
```

**Conventions:**
- File naming: `<module>_test.py` next to the code it tests
- No network calls, no `TINKER_API_KEY` required
- Fast (< 1s per test)
- Use standard pytest features (fixtures, parametrize, marks)
- Test picklability for components used in distributed rollout

**Example:** `tinker_cookbook/renderers/parsing_test.py`

## Integration / smoke tests (`test_recipe_*.py`)

Live in `tests/recipes/`. Require `TINKER_API_KEY`. Verify recipes can run.

```bash
# Run all integration tests
uv run pytest tests/ -v -x -s

# Run a specific recipe test
uv run pytest tests/recipes/test_recipe_chat_sl.py -v -x -s
```

**Conventions:**
- File naming: `tests/recipes/test_recipe_<name>.py`
- Mark with `@pytest.mark.integration`
- Use `run_recipe()` from `tests/helpers.py`
- `run_recipe()` passes `max_steps=2` by default — recipe runs 2 training steps and exits
- Always pass `behavior_if_log_dir_exists=delete` to avoid CI conflicts
- Override batch sizes to small values for fast execution

**Template:**

```python
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_my_recipe():
    run_recipe(
        "tinker_cookbook.recipes.my_recipe.train",
        [
            "behavior_if_log_dir_exists=delete",
            "groups_per_batch=4",
        ],
    )
```

### How `run_recipe()` works
1. Launches `uv run python -m <module> <args> max_steps=2` as a subprocess
2. Streams stdout in real time for CI debuggability
3. Waits for clean exit (exit code 0) within timeout (default: 1800s)
4. Fails if process exits non-zero or times out

## Pytest markers

Defined in `pyproject.toml`:
- `@pytest.mark.integration` — Requires API key, skipped locally without `TINKER_API_KEY`
- `@pytest.mark.slow` — Long-running tests

`tests/conftest.py` auto-skips integration tests when `TINKER_API_KEY` is not set (fails on CI if missing).

## CI workflows

### `pytest.yaml` — Unit tests (every PR/push to main)
```
Trigger: push to main, pull requests
Runs: uv run pytest tinker_cookbook/
Requires: HF_TOKEN (for tokenizer access)
```

### `smoke-test-recipes.yaml` — Integration tests (daily + manual)
```
Trigger: daily at 6am UTC, manual dispatch
Runs: Each test_recipe_*.py in parallel (matrix strategy)
Requires: TINKER_API_KEY, HF_TOKEN
Timeout: 20 min per recipe
Concurrency: 1 (avoid API contention)
```

Adding `tests/recipes/test_recipe_<name>.py` is all that's needed — CI auto-discovers it.

## Running pre-commit checks

```bash
uv run ruff check tinker_cookbook/
uv run ruff format tinker_cookbook/
uv run pyright tinker_cookbook/
pre-commit run --all-files
```


================================================
FILE: .claude/skills/completers/SKILL.md
================================================
---
name: completers
description: Guide for using completers — TokenCompleter and MessageCompleter for text generation during RL rollouts and evaluation. Use when the user asks about generating text, completing messages, or using completers in RL environments.
---

# Completers

Completers wrap SamplingClient for convenient text generation. Two levels of abstraction:
- **TokenCompleter** — low-level, returns tokens + logprobs
- **MessageCompleter** — high-level, returns parsed Message objects

## Reference

Read these for details:
- `tinker_cookbook/completers.py` — Implementation
- `docs/completers.mdx` — Usage guide

## TokenCompleter

Generates tokens from a ModelInput prompt. Used internally by RL rollouts.

```python
from tinker_cookbook.completers import TinkerTokenCompleter, TokensWithLogprobs

completer = TinkerTokenCompleter(
    sampling_client=sc,
    max_tokens=256,
    temperature=1.0,
)

result: TokensWithLogprobs = await completer(
    model_input=prompt,
    stop=stop_sequences,  # list[str] or list[int]
)
# result.tokens: list[int]
# result.maybe_logprobs: list[float] | None
```

## MessageCompleter

Higher-level: takes a conversation (list of Messages), returns a Message. Handles rendering and parsing internally.

```python
from tinker_cookbook.completers import TinkerMessageCompleter

completer = TinkerMessageCompleter(
    sampling_client=sc,
    renderer=renderer,
    max_tokens=256,
    temperature=1.0,
    stop_condition=None,  # Override stop sequences
)

response_message: Message = await completer(messages=[
    {"role": "user", "content": "What is 2+2?"},
])
# response_message = {"role": "assistant", "content": "4"}
```

## When to use which

- **TokenCompleter**: RL rollouts, custom generation loops where you need logprobs and token-level control
- **MessageCompleter**: Evaluation, tool-use environments, multi-turn RL where you work with Messages

## Custom completers

Both are abstract base classes you can subclass for non-Tinker backends:

```python
from tinker_cookbook.completers import TokenCompleter, MessageCompleter

class MyTokenCompleter(TokenCompleter):
    async def __call__(self, model_input, stop) -> TokensWithLogprobs:
        ...

class MyMessageCompleter(MessageCompleter):
    async def __call__(self, messages) -> Message:
        ...
```

## Common pitfalls
- Create a new completer (with a new SamplingClient) after saving weights
- `TokensWithLogprobs.maybe_logprobs` can be `None` if logprobs weren't requested
- MessageCompleter uses the renderer for both prompt construction and response parsing


================================================
FILE: .claude/skills/contributing/SKILL.md
================================================
---
name: contributing
description: Guide for contributing to the tinker-cookbook repo — development setup, code style, type checking, PR process, and design conventions. Use when the user asks about how to contribute, set up the dev environment, code style, or project conventions.
---

# Contributing

Guide for developing and contributing to tinker-cookbook.

## Reference

Read `CONTRIBUTING.md` for the full guide.

## Development setup

```bash
git clone https://github.com/thinking-machines-lab/tinker-cookbook.git
cd tinker-cookbook
uv sync --extra dev
pre-commit install
```

This installs dev dependencies and registers pre-commit hooks (`ruff` formatting/linting).

## Code style

- **Formatter/Linter:** [ruff](https://docs.astral.sh/ruff/) (line length: 100)
- **Type checker:** [pyright](https://github.com/microsoft/pyright)
- **Pre-commit hooks** run automatically on every commit

```bash
uv run ruff check tinker_cookbook/
uv run ruff format tinker_cookbook/
uv run pyright tinker_cookbook/
```

### Typing rules
- Use explicit types everywhere
- Avoid `Any` and `type: ignore` — prefer casting
- Prefer single types over union types
- Don't add convoluted generics just to satisfy the type checker

## Design conventions

### Builder pattern
Config objects build runtime objects:
- `SupervisedDatasetBuilder` → `SupervisedDataset`
- `RLDatasetBuilder` → `RLDataset`
- `EnvGroupBuilder` → group of `Env` objects

Config objects use `@chz.chz` decorator. They have a `__call__` method that builds the runtime object.

### Config/runtime separation
- **Config:** `@chz.chz` dataclasses, serializable, lightweight
- **Runtime:** Regular classes or dataclasses, heavyweight (datasets, clients)

### Training script organization
- **`tinker_cookbook/<module>/train.py`** — Main training loop with detailed `Config` (not CLI-constructable)
- **`tinker_cookbook/recipes/<name>/train.py`** — Launch script with `CLIConfig` from command line

### Async
- All methods that take nontrivial time should be async (especially in RL)
- Some beginner-oriented code (e.g., `sl_loop.py`) uses sync for simplicity

### Env lifecycle
- `Env` objects are single-use (no reset)
- Shared resources managed by `EnvGroupBuilder`, not individual `Env`s

### Dimension notation
Subscript suffixes on variable names:
- `_P` = problems, `_G` = groups, `_T` = tokens, `_D` = datums
- Example: `tokens_P_G_T[p][g][t]` = token t of group g of problem p
- Flattened: `tokens_PG_T` = problems and groups merged into one dimension

## PR process

1. Create a feature branch from `main`
2. Make changes with tests
3. Run `pre-commit run --all-files`
4. Open PR with clear description

CI runs pre-commit, pyright, and pytest on every PR.

## Testing

See the `/ci` skill for full testing details.

```bash
# Unit tests (no API key needed)
uv run pytest tinker_cookbook/

# Integration tests (requires TINKER_API_KEY)
uv run pytest tests/
```


================================================
FILE: .claude/skills/datasets/SKILL.md
================================================
---
name: datasets
description: Guide for dataset construction — SupervisedDatasetBuilder, RLDatasetBuilder, ChatDatasetBuilder, and custom dataset creation from JSONL, HuggingFace, or conversation data. Use when the user asks about datasets, data loading, data preparation, or custom data formats.
---

# Datasets

The cookbook uses the builder pattern for datasets: a `*DatasetBuilder` (config) builds a `*Dataset` (runtime).

## Reference

Read these for details:
- `tinker_cookbook/supervised/types.py` — SupervisedDatasetBuilder, ChatDatasetBuilder, ChatDatasetBuilderCommonConfig
- `tinker_cookbook/supervised/data.py` — Dataset construction helpers, FromConversationFileBuilder
- `tinker_cookbook/rl/types.py` — RLDatasetBuilder, RLDataset
- `docs/training-sampling.mdx` — Data preparation basics

## Supervised datasets

### ChatDatasetBuilderCommonConfig

Shared config for all chat-based dataset builders:

```python
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
from tinker_cookbook.renderers import TrainOnWhat

common_config = ChatDatasetBuilderCommonConfig(
    model_name_for_tokenizer="meta-llama/Llama-3.1-8B",
    renderer_name="llama3",
    max_length=32768,        # Max sequence length
    batch_size=128,          # Tokens per batch
    train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
)
```

### Built-in datasets

```python
from tinker_cookbook.recipes.chat_sl.chat_datasets import NoRobotsBuilder, Tulu3Builder

dataset = NoRobotsBuilder(common_config=common_config)
dataset = Tulu3Builder(common_config=common_config)
```

### Custom JSONL file

```python
from tinker_cookbook.supervised.data import FromConversationFileBuilder

dataset = FromConversationFileBuilder(
    common_config=common_config,
    file_path="/path/to/data.jsonl",
    test_size=100,       # Hold out 100 examples for eval
    shuffle_seed=42,
)
```

JSONL format — each line is a conversation:
```json
{"messages": [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
```

See `tinker_cookbook/example_data/conversations.jsonl` for the expected format.

### From HuggingFace datasets

```python
from tinker_cookbook.supervised.data import SupervisedDatasetFromHFDataset

dataset = SupervisedDatasetFromHFDataset(
    hf_dataset=hf_dataset,
    batch_size=128,
    map_fn=lambda example: conversation_to_datum(
        example["messages"], renderer, max_length, train_on_what
    ),
)
```

### Low-level datum construction

```python
from tinker_cookbook.supervised.data import conversation_to_datum

# Full pipeline: messages → datum
datum = conversation_to_datum(messages, renderer, max_length, train_on_what)

# Or step by step:
model_input, weights = renderer.build_supervised_example(messages)
datum = datum_from_model_input_weights(model_input, weights, max_length)
```

## RL datasets

RL datasets return batches of `EnvGroupBuilder` objects. See the `/environments` skill for details.

```python
@chz.chz
class MyRLDatasetBuilder(RLDatasetBuilder):
    batch_size: int = 128
    group_size: int = 4

    async def __call__(self) -> tuple[RLDataset, RLDataset | None]:
        # Return (train_dataset, optional_test_dataset)
        ...
```

## DPO datasets

DPO uses comparison pairs (chosen vs rejected):

```python
from tinker_cookbook.preference.dpo_datasets import DPODatasetBuilderFromComparisons

dataset = DPODatasetBuilderFromComparisons(
    common_config=common_config,
    comparison_builder=HHHComparisonBuilder(),
)
```

See `tinker_cookbook/preference/dpo_datasets.py` and `tinker_cookbook/recipes/preference/datasets.py`.

## Common pitfalls
- Always use `ChatDatasetBuilderCommonConfig` for consistent tokenizer/renderer setup
- `batch_size` is in tokens, not examples — larger sequences mean fewer examples per batch
- Custom JSONL must match the format in `example_data/conversations.jsonl`
- Use `test_size` to hold out evaluation data from the same distribution
- Dataset builders must be serializable (`@chz.chz`) for config persistence and sweeps


================================================
FILE: .claude/skills/distillation/SKILL.md
================================================
---
name: distillation
description: Set up and run knowledge distillation (on-policy, off-policy, or multi-teacher) from a teacher model to a student model using the Tinker API. Use when the user wants to distill knowledge, compress models, or train a student from a teacher.
argument-hint: "[student-model] [teacher-model]"
---

# Knowledge Distillation

Help the user set up and run distillation from teacher to student models using the Tinker API.

## Step 1: Understand the request

Ask the user (if not already specified):
- **Student model**: Which model to train (e.g., `Qwen/Qwen3-8B-Base`)
- **Teacher model**: Which model to distill from (e.g., `Qwen/Qwen3-8B`, or a checkpoint path)
- **Distillation type**:
  - **On-policy**: Student generates, teacher scores via KL — best for reasoning/chat
  - **Off-policy reasoning**: SFT on teacher-generated reasoning traces (e.g., OpenThoughts3)
  - **Multi-teacher**: Combine multiple teachers on different datasets

## Step 2: Reference existing recipes

Read these files for patterns:
- `tinker_cookbook/recipes/distillation/on_policy_distillation.py` — On-policy distillation CLI
- `tinker_cookbook/recipes/distillation/off_policy_reasoning.py` — SFT on OpenThoughts3 traces
- `tinker_cookbook/recipes/distillation/on_policy_multi_teacher.py` — Multi-teacher setup
- `tinker_cookbook/distillation/train_on_policy.py` — Core on-policy training loop
- `tinker_cookbook/distillation/datasets.py` — TeacherConfig, PromptOnlyDatasetBuilder, DistillationDatasetConfig

## Step 3: Choose distillation approach

### On-Policy Distillation (Recommended)
Student generates samples, teacher provides KL penalty supervision. No correctness rewards needed.

Key config:
- `TeacherConfig(base_model="Qwen/Qwen3-8B", load_checkpoint_path=None)`
- `PromptOnlyDatasetBuilder(dataset_name="deepmath"|"tulu3", ...)`
- `DistillationDatasetConfig(dataset_builder=..., teacher_config=..., groups_per_batch=...)`
- `kl_penalty_coef`: Weight of KL penalty (default 1.0)
- `kl_discount_factor`: Discount for future KL (0.0 = no discount)

### Off-Policy Reasoning (SFT on Traces)
Standard SFT on pre-generated reasoning traces (e.g., OpenThoughts3). Simpler but less effective than on-policy.

See `recipes/distillation/off_policy_reasoning.py` — uses the standard SL pipeline from `supervised/train.py`.

### Multi-Teacher Distillation
Combine multiple teacher models on different datasets. Each dataset can have its own teacher.

See `recipes/distillation/on_policy_multi_teacher.py` — passes multiple `DistillationDatasetConfig` objects.

## Step 4: Write the training script

Follow the on-policy distillation pattern:

```python
import asyncio
import chz
from tinker_cookbook import checkpoint_utils, cli_utils
from tinker_cookbook.distillation import train_on_policy
from tinker_cookbook.distillation.datasets import (
    DistillationDatasetConfig,
    PromptOnlyDatasetBuilder,
    TeacherConfig,
)

@chz.chz
class CLIConfig:
    model_name: str = "Qwen/Qwen3-8B-Base"       # Student
    teacher_model: str = "Qwen/Qwen3-8B"          # Teacher
    dataset: str = "deepmath"                       # deepmath or tulu3
    group_size: int = 4
    groups_per_batch: int = 1024
    learning_rate: float = 1e-4
    max_tokens: int = 4096
    kl_penalty_coef: float = 1.0
    kl_discount_factor: float = 0.0
    lora_rank: int = 128
    loss_fn: str = "importance_sampling"

async def cli_main(cli_config: CLIConfig):
    renderer_name = await checkpoint_utils.resolve_renderer_name_from_checkpoint_or_default_async(
        model_name=cli_config.model_name, ...)

    dataset_builder = PromptOnlyDatasetBuilder(
        dataset_name=cli_config.dataset,
        groups_per_batch=cli_config.groups_per_batch,
        group_size=cli_config.group_size,
        model_name_for_tokenizer=cli_config.model_name,
        renderer_name=renderer_name,
    )
    teacher_config = TeacherConfig(base_model=cli_config.teacher_model)
    dataset_config = DistillationDatasetConfig(
        dataset_builder=dataset_builder,
        teacher_config=teacher_config,
        groups_per_batch=cli_config.groups_per_batch,
    )
    config = train_on_policy.Config(
        dataset_configs=[dataset_config],
        model_name=cli_config.model_name,
        renderer_name=renderer_name,
        learning_rate=cli_config.learning_rate,
        lora_rank=cli_config.lora_rank,
        max_tokens=cli_config.max_tokens,
        kl_penalty_coef=cli_config.kl_penalty_coef,
        kl_discount_factor=cli_config.kl_discount_factor,
        loss_fn=cli_config.loss_fn,
        log_path="/tmp/tinker-examples/distillation/my_run",
    )
    await train_on_policy.main(config)
```

## Step 5: Run

```bash
# On-policy distillation (reasoning)
python -m tinker_cookbook.recipes.distillation.on_policy_distillation \
    model_name=Qwen/Qwen3-8B-Base dataset=deepmath learning_rate=1e-4

# Off-policy reasoning (SFT on traces)
python -m tinker_cookbook.recipes.distillation.off_policy_reasoning \
    model_name=Qwen/Qwen3-8B-Base learning_rate=2e-4

# Multi-teacher
python -m tinker_cookbook.recipes.distillation.on_policy_multi_teacher \
    model_name=Qwen/Qwen3-8B-Base learning_rate=1e-4
```

## Step 6: Add tests

If you created a new distillation recipe, add a smoke test:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete", "groups_per_batch=16"],
    )
```

`run_recipe()` automatically passes `max_steps=2` so the recipe runs 2 training steps and exits. See `tests/recipes/test_recipe_on_policy_distillation.py` and `tests/recipes/test_recipe_on_policy_multi_teacher.py` for existing examples.

## Step 7: Export weights (optional)

After distillation, export the student model using the `tinker_cookbook.weights` API:

```python
from tinker_cookbook import weights

adapter_dir = weights.download(tinker_path="tinker://run-id/sampler_weights/final", output_dir="./adapter")
weights.build_hf_model(base_model="Qwen/Qwen3-8B-Base", adapter_path=adapter_dir, output_path="./model")
weights.publish_to_hf_hub(model_path="./model", repo_id="user/my-distilled-model")
```

## Common pitfalls
- Teacher model must be compatible with student's tokenizer/renderer
- On-policy is generally better than off-policy but more compute-intensive
- `kl_discount_factor=0.0` means no discounting — increase for longer sequences
- High `kl_penalty_coef` can make training too conservative
- For multi-teacher, ensure `groups_per_batch` is balanced across datasets


================================================
FILE: .claude/skills/dpo/SKILL.md
================================================
---
name: dpo
description: Set up and run Direct Preference Optimization (DPO) training on preference datasets using the Tinker API. Use when the user wants to train with preference data, chosen/rejected pairs, or DPO.
argument-hint: "[model-name] [dataset]"
---

# Direct Preference Optimization (DPO)

Help the user set up and run DPO training using the Tinker API.

## Step 1: Understand the request

Ask the user (if not already specified):
- **Model**: Which model to train (e.g., `meta-llama/Llama-3.2-1B`, `Qwen/Qwen3-8B`)
- **Dataset**: Which preference dataset — built-in (HHH, HelpSteer3, UltraFeedback) or custom
- **Starting checkpoint**: Train from base model or from an SFT checkpoint

## Step 2: Reference existing recipes

Read these files for patterns:
- `tinker_cookbook/recipes/preference/dpo/train.py` — DPO CLI with built-in datasets
- `tinker_cookbook/preference/train_dpo.py` — Core DPO training loop
- `tinker_cookbook/preference/dpo_datasets.py` — DPO dataset builders
- `tinker_cookbook/recipes/preference/datasets.py` — HHH, HelpSteer3, UltraFeedback builders
- `docs/preferences/dpo-guide.mdx` — DPO guide

## Step 3: Configure the training run

### Key Parameters

- `dpo_beta`: Controls how much the model deviates from reference. **Start with 0.1** (recommended default).
  - Lower beta = more deviation from reference (more aggressive optimization)
  - Higher beta = stays closer to reference (more conservative)
- `learning_rate`: Typically **1e-5** for DPO (lower than SFT)
- `lr_schedule`: `"linear"` decay is standard
- `batch_size`: Number of tokens per batch (default: 256)
- `max_length`: Maximum sequence length (default: 8192)
- `reference_model_name`: Explicit reference model (defaults to the base model)

### Preference Datasets

**Built-in:**
- `"hhh"` — Anthropic HHH (Helpful, Harmless, Honest) comparisons
- `"helpsteer3"` — NVIDIA HelpSteer3 preference data
- `"ultrafeedback"` — UltraFeedback preference data

**Custom:** Create a `ComparisonBuilder` that yields `(chosen, rejected)` conversation pairs. See `recipes/preference/datasets.py` for examples.

### Dataset Construction
```python
from tinker_cookbook.preference.dpo_datasets import DPODatasetBuilderFromComparisons
from tinker_cookbook.recipes.preference.datasets import HHHComparisonBuilder

common_config = ChatDatasetBuilderCommonConfig(
    model_name_for_tokenizer=model_name,
    renderer_name=renderer_name,
    max_length=8192,
    batch_size=256,
)
dataset = DPODatasetBuilderFromComparisons(
    common_config=common_config,
    comparison_builder=HHHComparisonBuilder(),
)
```

## Step 4: Write the training script

Follow the pattern from `recipes/preference/dpo/train.py`:

```python
import chz
from tinker_cookbook import checkpoint_utils, cli_utils
from tinker_cookbook.preference import train_dpo
from tinker_cookbook.preference.dpo_datasets import DPODatasetBuilderFromComparisons
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig

config = train_dpo.Config(
    log_path="/tmp/tinker-examples/dpo/my_run",
    model_name="meta-llama/Llama-3.2-1B",
    renderer_name=renderer_name,
    dataset_builder=dataset,
    learning_rate=1e-5,
    lr_schedule="linear",
    dpo_beta=0.1,
    reference_model_name=None,  # Uses base model as reference
    load_checkpoint_path=None,  # Or path to SFT checkpoint
)

train_dpo.main(config)
```

## Step 5: Run

```bash
# Basic DPO with HHH dataset
python -m tinker_cookbook.recipes.preference.dpo.train dataset=hhh

# With different model and dataset
python -m tinker_cookbook.recipes.preference.dpo.train \
    model_name=meta-llama/Llama-3.1-8B \
    dataset=ultrafeedback \
    dpo_beta=0.1 \
    learning_rate=1e-5

# From an SFT checkpoint
python -m tinker_cookbook.recipes.preference.dpo.train \
    load_checkpoint_path=/tmp/tinker-examples/sft/checkpoint_100
```

## Step 6: Add tests

If you created a new DPO recipe, add a smoke test:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete"],
    )
```

`run_recipe()` automatically passes `max_steps=2` so the recipe runs 2 training steps and exits. See `tests/recipes/test_recipe_dpo.py` for the existing example.

## Step 7: Export weights (optional)

After DPO training, export weights using the `tinker_cookbook.weights` API:

```python
from tinker_cookbook import weights

adapter_dir = weights.download(tinker_path="tinker://run-id/sampler_weights/final", output_dir="./adapter")
weights.build_hf_model(base_model="meta-llama/Llama-3.2-1B", adapter_path=adapter_dir, output_path="./model")
weights.publish_to_hf_hub(model_path="./model", repo_id="user/my-dpo-model")
```

## Common pitfalls
- **Start with `dpo_beta=0.1`** — this is well-tested. Tune from there.
- DPO LR should be **lower than SFT** (1e-5 vs 2e-4)
- DPO works best when starting from an SFT checkpoint, not a raw base model
- Reference model defaults to the base model — set `reference_model_name` explicitly if you want a different reference
- Preference data quality matters more than quantity — ensure chosen/rejected pairs have clear quality differences


================================================
FILE: .claude/skills/environments/SKILL.md
================================================
---
name: environments
description: Guide for defining RL environments — the Env protocol, EnvGroupBuilder, RLDataset, and custom environment creation. Use when the user asks about RL environments, reward functions, or how to define custom tasks for RL training.
---

# RL Environments

RL training requires environments that provide observations and rewards. This skill covers how to define and use them.

## Reference

Read these for details:
- `tinker_cookbook/rl/types.py` — Env, EnvGroupBuilder, RLDatasetBuilder, Trajectory
- `docs/rl/rl-envs.mdx` — Custom environments guide
- `tinker_cookbook/recipes/math_rl/math_env.py` — Math environment example
- `tinker_cookbook/recipes/harbor_rl/harbor_env.py` — Multi-turn sandbox environment
- `tinker_cookbook/rl/message_env.py` — Message-based environment interface
- `CONTRIBUTING.md` — Env lifecycle and design conventions

## Core types

### Env (single-use, no reset)

```python
from tinker_cookbook.rl.types import Env, Observation, Action, StepResult, StopCondition

class MyEnv(Env):
    async def initial_observation(self) -> tuple[Observation, StopCondition]:
        """Return the initial prompt and stop condition."""
        model_input = renderer.build_generation_prompt(messages)
        stop = renderer.get_stop_sequences()
        return model_input, stop

    async def step(self, action: Action) -> StepResult:
        """Process model output and return next observation + reward."""
        # action is TokensWithLogprobs (tokens + logprobs)
        return StepResult(
            observation=next_model_input,
            stop_condition=stop,
            reward=reward_value,
            episode_done=True,
            metrics={"accuracy": 1.0},
        )
```

**Important:** Env objects are **single-use** — no reset method. Create fresh envs via EnvGroupBuilder each batch.

### EnvGroupBuilder

Creates a group of envs for the same prompt/task. Advantages are centered within each group (GRPO).

```python
from tinker_cookbook.rl.types import EnvGroupBuilder, TrajectoryGroup

class MyEnvGroupBuilder(EnvGroupBuilder):
    async def make_envs(self) -> Sequence[Env]:
        """Return group_size envs for the same task."""
        return [MyEnv(problem=self.problem) for _ in range(self.group_size)]

    async def compute_group_rewards(
        self, trajectory_group: list[Trajectory], env_group: Sequence[Env]
    ) -> list[tuple[float, Metrics]]:
        """Compute final rewards for each trajectory in the group."""
        return [(env.reward, {"solved": env.reward > 0}) for env in env_group]

    def logging_tags(self) -> list[str]:
        return ["my_task"]
```

### RLDatasetBuilder

Builds train/test datasets of EnvGroupBuilders:

```python
@chz.chz
class MyDatasetBuilder(RLDatasetBuilder):
    batch_size: int = 128
    group_size: int = 4

    async def __call__(self) -> tuple[RLDataset, RLDataset | None]:
        # Return (train_dataset, optional_test_dataset)
        ...
```

## Key data types

```python
@dataclass
class Transition:
    ob: Observation       # ModelInput
    ac: TokensWithLogprobs  # Action with logprobs
    reward: float
    episode_done: bool

@dataclass
class Trajectory:
    transitions: list[Transition]
    final_ob: Observation

@dataclass
class TrajectoryGroup:
    trajectories_G: list[Trajectory]
    final_rewards_G: list[float]
    metrics_G: list[Metrics]
```

## Patterns

### Single-turn (math, classification)
Model generates one response, gets a reward. See `recipes/math_rl/math_env.py`.

### Multi-turn (tool use, sandbox)
Model generates, environment responds, repeat. See `recipes/harbor_rl/harbor_env.py` and `docs/rl/sequence-extension.mdx` for KV-cache support.

### Multiplayer (games)
Group of envs represents a game — envs within the group interact. See `recipes/multiplayer_rl/text_arena/env.py`.

### Preference-based (RLHF)
Group of envs generates completions, preference model scores pairs. See `tinker_cookbook/rl/preference_envs.py`.

## Pluggable rollout executor

For scaling rollout collection, `train.main()` accepts an optional `rollout_executor` parameter:

```python
from concurrent.futures import ProcessPoolExecutor
from tinker_cookbook.rl.train import main

await main(config, rollout_executor=ProcessPoolExecutor(max_workers=4))
```

EnvGroupBuilders must be **pickleable** for distributed execution. Test with `tinker_cookbook/rl/builder_pickle_test.py`.

## Dimension conventions

- `_P` = problems (different prompts/tasks)
- `_G` = groups (multiple rollouts per problem)
- `_T` = tokens (sequence position)
- `_D` = datums (training data items)

Example: `tokens_P_G_T[p][g][t]` = token `t` of group `g` of problem `p`.

## Common pitfalls
- Envs are **single-use** — always create fresh ones via EnvGroupBuilder
- Advantages are centered within each group — `group_size` affects variance reduction
- EnvGroupBuilders must be pickleable for distributed rollout execution
- Shared resources (DB connections, sandboxes) should be managed by the builder, not the env
- For multi-turn envs, use `max_steps_off_policy` for async rollouts when env execution is slow


================================================
FILE: .claude/skills/evals/SKILL.md
================================================
---
name: evals
description: Guide for evaluation — inline evaluators, Inspect AI integration, and custom evaluators for measuring training progress. Use when the user asks about evaluation, metrics, benchmarks, or how to measure model quality during training.
---

# Evaluation

Training scripts support inline evaluation at configurable intervals. The cookbook provides several evaluator patterns.

## Reference

Read these for details:
- `docs/evals.mdx` — Evaluation guide
- `tinker_cookbook/supervised/train.py` — SL evaluator integration (search for `evaluator_builders`)
- `tinker_cookbook/rl/train.py` — RL evaluator integration
- `tinker_cookbook/recipes/chat_sl/train.py` — Example with Inspect AI evaluators

## Evaluator types

### SL evaluators
SL training supports two evaluator tiers:

```python
config = supervised_train.Config(
    evaluator_builders=[...],              # Run every eval_every steps
    infrequent_evaluator_builders=[...],   # Run every infrequent_eval_every steps
    eval_every=8,
    infrequent_eval_every=50,
)
```

### RL evaluators
RL training uses `SamplingClientEvaluator`:

```python
async def my_evaluator(sampling_client: SamplingClient) -> dict[str, float]:
    # Generate samples, compute metrics
    return {"accuracy": 0.85, "avg_length": 150}

config = rl_train.Config(
    evaluator_builders=[my_evaluator],
    eval_every=20,
)
```

### RL test set evaluator
Evaluates the policy on a held-out test set of environments:

```python
# Built into rl/train.py via test_dataset from RLDatasetBuilder
# RLDatasetBuilder.__call__() returns (train_dataset, test_dataset)
```

## Inspect AI integration

The cookbook integrates with [Inspect AI](https://inspect.ai) for standard benchmarks:

```python
from tinker_cookbook.eval.inspect_utils import InspectAPIFromTinkerSampling

# Create an Inspect evaluator that uses Tinker sampling
evaluator = InspectAPIFromTinkerSampling(
    task="gsm8k",          # Inspect task name
    renderer_name=renderer_name,
    model_name=model_name,
    include_reasoning=True,  # Include reasoning traces
)
```

See `tinker_cookbook/recipes/chat_sl/train.py` for a working example with GSM8K and IFEval.

## Custom evaluators

### Pattern 1: Sampling-based evaluation

```python
async def eval_math(sampling_client: SamplingClient) -> dict[str, float]:
    correct = 0
    total = 100
    for problem in test_problems:
        response = sampling_client.sample(
            prompt=problem.prompt,
            num_samples=1,
            sampling_params=SamplingParams(max_tokens=256, temperature=0.0),
        )
        answer = parse_answer(response.sequences[0].tokens)
        if answer == problem.expected:
            correct += 1
    return {"math_accuracy": correct / total}
```

### Pattern 2: NLL-based evaluation

Compute negative log-likelihood on a held-out dataset without generating text. See `tinker_cookbook/supervised/train.py` for the built-in NLL evaluator.

## Metrics logging

```python
from tinker_cookbook.utils.ml_log import log_metrics

log_metrics({"train/loss": 0.5, "eval/accuracy": 0.85}, step=100)
```

## Common pitfalls
- Evaluators run inline during training — keep them fast to avoid stalling the training loop
- Use `infrequent_evaluator_builders` for expensive evals (large benchmarks)
- RL evaluators receive a SamplingClient — create completers from it if needed
- For Inspect AI, set `include_reasoning=True` to capture thinking traces


================================================
FILE: .claude/skills/grpo/SKILL.md
================================================
---
name: grpo
description: Set up and run reinforcement learning with verifiable rewards (RLVR/GRPO) for math, code, or custom environments using the Tinker API. Use when the user wants to do RL training, GRPO, reward-based optimization, or train with verifiable rewards.
argument-hint: "[model-name] [environment]"
---

# Group Relative Policy Optimization (GRPO / RL)

Help the user set up and run RL training with verifiable rewards using the Tinker API.

## Step 1: Understand the request

Ask the user (if not already specified):
- **Model**: Which model to train (e.g., `meta-llama/Llama-3.1-8B-Instruct`, `Qwen/Qwen3-8B`)
- **Environment/Task**: What type of reward signal — math (GSM8K, DeepMath, arithmetic), code (DeepCoder), instruction following (IFBench), or custom
- **Reward type**: Verifiable (programmatic correctness) or learned (preference model)

## Step 2: Reference existing recipes

Read these files for patterns:
- `tinker_cookbook/recipes/rl_basic.py` — Minimal RL example (GSM8K)
- `tinker_cookbook/recipes/math_rl/train.py` — Full math RL with multiple environments and loss functions
- `tinker_cookbook/recipes/code_rl/train.py` — Code generation RL with sandbox execution
- `tinker_cookbook/recipes/rubric/train.py` — Rubric-graded RL with LLM scoring
- `tinker_cookbook/rl/train.py` — Core RL training loop
- `tinker_cookbook/rl/types.py` — Env, EnvGroupBuilder, RLDatasetBuilder
- `docs/rl/rl-basic.mdx` — Getting started
- `docs/rl/rl-envs.mdx` — Custom environments
- `docs/rl/rl-hyperparams.mdx` — Hyperparameter guidance

## Step 3: Configure the training run

### Environment Setup
RL requires an environment that produces rewards. Key patterns:

**Built-in environments:**
- `Gsm8kDatasetBuilder` — Grade-school math (from `recipes/math_rl/math_env.py`)
- `ArithmeticDatasetBuilder` — Simple arithmetic
- `DeepMathDatasetBuilder`, `PolarisDatasetBuilder` — Advanced math
- `DeepCoderDatasetBuilder` — Code generation with sandbox
- `RubricDatasetBuilder` — Rubric-graded tasks

**Custom environments:**
Implement the `Env` protocol from `tinker_cookbook/rl/types.py`. Key points:
- `Env` objects are **single-use** (no reset method)
- Create new envs via `EnvGroupBuilder` each batch
- Each env returns a `float` reward

### Key Hyperparameters

- `group_size`: Number of rollouts per prompt (typically 4-16). Advantages are centered within each group.
- `groups_per_batch` (or `batch_size`): Number of problems per batch
- `max_tokens`: Maximum generation length
- `learning_rate`: Typically 1e-5 to 4e-5 for RL
- `kl_penalty_coef`: KL penalty against reference model (0.0 = no penalty)
- `temperature`: Sampling temperature (default 1.0)
- `rollout_error_tolerance`: Tolerance for rollout errors (`False` = crash on any error, `True` = retry failed trajectories with default budget, `RetryOnFailure(max_retries=5)` = custom retry budget). Error counts logged as `rollout_errors/*` metrics.

### Loss Functions
- `importance_sampling` — Default, on-policy
- `ppo` — Proximal Policy Optimization (clipped)
- `cispo` — Conservative Importance Sampling PPO
- `dro` — Distributionally Robust Optimization
- Configure via `loss_fn` and `loss_fn_config` parameters

### Async Training (Off-Policy)
For overlapping sampling and training:
```python
async_config=AsyncConfig(
    max_steps_off_policy=cli_config.max_steps_off_policy,
    groups_per_batch=cli_config.groups_per_batch,
)
```

## Step 4: Write the training script

Follow the pattern from `rl_basic.py` / `math_rl/train.py`:

```python
import asyncio
import chz
from tinker_cookbook import cli_utils, model_info
from tinker_cookbook.rl import train

def build_config_blueprint() -> chz.Blueprint[train.Config]:
    model_name = "meta-llama/Llama-3.1-8B-Instruct"
    renderer_name = model_info.get_recommended_renderer_name(model_name)

    # Configure your dataset builder with environment
    builder = ...  # e.g., Gsm8kDatasetBuilder(...)

    return chz.Blueprint(train.Config).apply({
        "model_name": model_name,
        "renderer_name": renderer_name,
        "log_path": "/tmp/tinker-examples/my_rl_run",
        "dataset_builder": builder,
        "learning_rate": 4e-5,
        "max_tokens": 256,
        "eval_every": 20,
    })

def main(config: train.Config):
    cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
    asyncio.run(train.main(config))
```

For the full CLI pattern with `@chz.chz` config class, see `recipes/math_rl/train.py`.

## Step 5: Run

```bash
python -m tinker_cookbook.recipes.<recipe_name>
```

Override: `python -m tinker_cookbook.recipes.<recipe_name> env=gsm8k group_size=16 learning_rate=4e-5`

## Step 6: Add tests

If you created a new recipe, add a smoke test so CI catches regressions:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete", "groups_per_batch=4", "group_size=2"],
    )
```

`run_recipe()` automatically passes `max_steps=2` so the recipe runs 2 training steps and exits. For environment logic (reward grading, env setup), add unit tests as `*_test.py` next to the source:
- Example: `tinker_cookbook/recipes/math_rl/math_env_test.py`

## Step 7: Export weights (optional)

After training, export weights using the `tinker_cookbook.weights` API:

```python
from tinker_cookbook import weights

adapter_dir = weights.download(tinker_path="tinker://run-id/sampler_weights/final", output_dir="./adapter")
weights.build_hf_model(base_model="meta-llama/Llama-3.1-8B-Instruct", adapter_path=adapter_dir, output_path="./model")
weights.publish_to_hf_hub(model_path="./model", repo_id="user/my-finetuned-model")
```

## Common pitfalls
- `Env` objects are single-use — always create fresh envs via builder
- Advantages are centered within each group — `group_size` matters for variance reduction
- `max_tokens` too small truncates reasoning; too large wastes compute
- Start with small `groups_per_batch` for debugging, scale up for real runs
- Use `num_substeps > 1` for very large batches to split optimizer steps


================================================
FILE: .claude/skills/hyperparams/SKILL.md
================================================
---
name: hyperparams
description: Guide for hyperparameter selection — learning rate formulas, LoRA rank, batch size, group size, schedules, and model-specific tuning. Use when the user asks about learning rate, batch size, hyperparameter tuning, or how to configure training parameters.
---

# Hyperparameter Selection

Guide for choosing training hyperparameters across SL, RL, DPO, and distillation.

## Reference

- `docs/supervised-learning/sl-hyperparams.mdx` — SL hyperparameter guide with LR formula
- `docs/rl/rl-hyperparams.mdx` — RL hyperparameters (batch_size, group_size, num_substeps, async)
- `tinker_cookbook/hyperparam_utils.py` — LR formulas and model-specific calculations

## Learning rate

### The formula

The recommended LR for a model `m` with LoRA:

```
LR(m) = lr_base × M_LoRA × (2000 / H_m) ^ P_m
```

Where:
- `lr_base = 5e-5`
- `M_LoRA = 10` (1 for full fine-tuning)
- `H_m` = hidden size of the model
- `P_m` = model-specific exponent (0.0775 for Qwen, 0.781 for Llama)

### Use the helper function

```python
from tinker_cookbook.hyperparam_utils import get_lr

lr = get_lr("meta-llama/Llama-3.1-8B", is_lora=True)
# Returns model-specific recommended LR
```

This formula gives <0.5% regret vs exhaustive sweeps across diverse SFT experiments.

### Rules of thumb

| Training type | Typical LR range | Notes |
|---------------|------------------|-------|
| SL (LoRA) | 1e-4 to 5e-4 | Use `get_lr()` |
| SL (full FT) | 1e-5 to 5e-5 | LoRA LR / 10 |
| RL | 1e-5 to 4e-5 | Lower than SL |
| DPO | ~1e-5 | Much lower than SL |
| RLHF (RL stage) | ~1e-5 | Same as RL |
| Distillation | ~1e-4 | Similar to SL |

## LoRA rank

- **Default**: 32 for most tasks
- **Higher rank** (64–128): More capacity, needed for complex tasks or large models
- **Lower rank** (8–16): Faster, sufficient for simple adaptations
- LR is **independent** of LoRA rank (validated empirically)

```python
from tinker_cookbook.hyperparam_utils import get_lora_param_count

# Check parameter count for a given rank
params = get_lora_param_count("meta-llama/Llama-3.1-8B", lora_rank=32)
```

## Batch size

### SL batch size
- Measured in **tokens**, not examples
- **Recommended**: Start with 128
- Smaller batch sizes often give better final performance at cost of longer training
- Scale LR proportionally: `LR ∝ √batch_size`
- Aim for at least 100 training steps (best results with 1000+)

### RL batch size and group size
Two parameters control RL batch composition:

- **`batch_size`** (or `groups_per_batch`): Number of unique problems/environments per batch
- **`group_size`**: Number of rollouts per problem (advantages centered within group)

```
total_rollouts = batch_size × group_size
```

Guidelines:
- If limited problems: increase `group_size` for more training signal
- Scale LR with batch_size: `LR ∝ √batch_size`
- Start small for debugging (`groups_per_batch=4, group_size=2`)

## Learning rate schedule

Available schedules:
- `"linear"` — Linear decay to 0 (most common)
- `"cosine"` — Cosine annealing
- `"constant"` — No decay

Set via `lr_schedule` parameter in config.

## `num_substeps` (RL)

Controls how many optimizer updates per sampling iteration:

- `num_substeps=1` (default): One update per batch — simplest, usually sufficient
- `num_substeps>1`: Splits batch into mini-batches, one update each. Requires PPO objective.
- Start with 2–4 if experimenting; decrease LR with higher values

## DPO-specific

- **`dpo_beta=0.1`** — Well-tested default. Controls deviation from reference model.
- Lower beta = more aggressive optimization
- Higher beta = stays closer to reference

## Distillation-specific

- **`kl_penalty_coef=1.0`** — Weight of KL penalty from teacher
- **`kl_discount_factor=0.0`** — No discounting (increase for long sequences)

## Quick-start recommendations

| Scenario | Model | LR | Batch | LoRA Rank |
|----------|-------|-----|-------|-----------|
| SFT on chat data | Llama-3.1-8B | `get_lr(model)` | 128 | 32 |
| Math GRPO | Llama-3.1-8B-Instruct | 4e-5 | 128×16 | 32 |
| DPO | Llama-3.2-1B | 1e-5 | 256 | 32 |
| Distillation | Qwen3-8B-Base | 1e-4 | 1024×4 | 128 |
| Multi-turn RL | Kimi-K2-Thinking | 1e-5 | 8×4 | 32 |

## Common pitfalls
- LoRA needs ~10x higher LR than full fine-tuning — use `get_lr()` to get it right
- `get_lr()` currently only supports Llama and Qwen families — other models need manual tuning
- DPO LR should be much lower than SFT (1e-5 vs 2e-4)
- RL LR should be lower than SFT — too aggressive updates destabilize the policy
- Batch size too small = noisy gradients; too large = diminishing returns
- Monitor KL divergence in RL — training is stable when KL < 0.01


================================================
FILE: .claude/skills/logging/SKILL.md
================================================
---
name: logging
description: Guide for training outputs, metrics logging, logtree reports, tracing/profiling, and debugging training runs. Use when the user asks about training logs, metrics, debugging, tracing, profiling, timing, Gantt charts, or understanding training output files.
---

# Logging & Debugging

Every training run writes structured outputs to `log_path`. This skill covers what's produced and how to use it.

## Reference

- `docs/rl/rl-logging.mdx` — Complete file reference for RL training outputs
- `tinker_cookbook/utils/ml_log.py` — Metrics logging API
- `tinker_cookbook/utils/logtree.py` — Logtree (structured rollout transcripts)
- `tinker_cookbook/utils/trace.py` — Tracing/profiling (`@scope`, `trace_iteration`, Gantt charts)

## Output files

Each training run writes to its `log_path` directory:

| File | Format | Contents |
|------|--------|----------|
| `metrics.jsonl` | JSONL | Scalar metrics per training iteration |
| `config.json` | JSON | Full serialized training config (reproducibility) |
| `checkpoints.jsonl` | JSONL | Checkpoint metadata (paths, loop state for resume) |
| `code.diff` | text | Git diff at training start |
| `train_iteration_NNNNNN.html` | HTML | Human-readable logtree report |
| `train_iteration_NNNNNN_logtree.json` | JSON | Machine-readable rollout transcripts |
| `train_iteration_NNNNNN_rollout_summaries.jsonl` | JSONL | Per-trajectory rewards and metrics |
| `eval_<name>_iteration_NNNNNN.*` | mixed | Same formats for eval rollouts |
| `timing_spans.jsonl` | JSONL | Per-iteration span timing data (from `trace_iteration`) |
| `trace_events.jsonl` | JSONL | Perfetto/Chrome Trace format events (from `trace_init`) |
| `gantt_NNNNNN.html` | HTML | Plotly Gantt chart of span timeline (optional) |

Iteration numbers are zero-padded to 6 digits.

## Analyzing metrics

```python
import pandas as pd

df = pd.read_json("path/to/log_path/metrics.jsonl", lines=True)
df.plot(x="progress/batch", y="env/all/reward/total")
```

### Common metric keys

**Progress:**
- `progress/batch` — iteration index
- `progress/done_frac` — completion fraction

**RL rewards:**
- `env/all/reward/total` — mean total reward
- `env/all/<metric>` — env-emitted metrics (e.g., `correct`, `format_parse`)

**Training health:**
- `entropy` — per-token entropy
- `kl_sample_train_v1`, `kl_sample_train_v2` — KL divergence (should stay < 0.01)
- `optim/lr` — current learning rate
- `ac_tokens_per_turn` — mean generated tokens per turn

**Timing** (from `trace_iteration`):
- `time/total` — iteration wall-clock duration
- `time/<name>` — single-call duration (e.g., `time/train_step`)
- `time/<name>:total`, `time/<name>:count`, `time/<name>:mean`, `time/<name>:max` — aggregates for functions called multiple times (e.g., `time/sample_async:total`)

## Analyzing rollouts

### Rollout summaries (aggregate)

```python
import json

with open("train_iteration_000010_rollout_summaries.jsonl") as f:
    trajectories = [json.loads(line) for line in f]

for traj in trajectories:
    print(f"reward={traj['total_reward']:.2f}, metrics={traj['trajectory_metrics']}")
    # Each trajectory has: total_reward, final_reward, trajectory_metrics,
    # steps (list of {ob_len, ac_len, reward, episode_done, metrics})
```

### Logtree JSON (full transcripts)

Contains full text of prompts, model responses, grading details. Walk the tree recursively looking for nodes with `data.type == "conversation"` to extract conversations. See `docs/rl/rl-logging.mdx` for the full schema.

### HTML reports

Open `train_iteration_NNNNNN.html` in a browser for a human-readable view of rollouts with collapsible sections. `num_groups_to_log` (default: 4) controls how many trajectory groups get detailed logging.

## Logging in your own code

### Scalar metrics

```python
from tinker_cookbook.utils import ml_log

# Set up logging (done once in training scripts)
ml_logger = ml_log.setup_logging(log_path="/tmp/my_run", wandb_project=None, wandb_name=None)

# Log scalar metrics
ml_logger.log_metrics({"train/loss": 0.5, "eval/accuracy": 0.85}, step=100)
```

### Logtree (structured transcripts)

```python
from tinker_cookbook.utils import logtree

with logtree.scope_header("my_section"):
    # Nested logging of rollouts, grading, etc.
    ...
```

## Weights & Biases integration

Pass `wandb_project` and `wandb_name` in your config to enable W&B logging:

```python
config = train.Config(
    wandb_project="my-project",
    wandb_name="my-experiment",
    ...
)
```

## Tracing & profiling

The `tinker_cookbook/utils/trace` module provides per-iteration profiling across all training modules (RL, SL, DPO, distillation).

### Core API

```python
from tinker_cookbook.utils import trace

# Initialize Perfetto trace collector (optional — writes trace_events.jsonl)
trace.trace_init()

# In training loop — collect per-iteration timing
for i_batch in range(n_batches):
    with trace.trace_iteration(step=i_batch) as window:
        # All @scope-decorated calls are automatically recorded
        await gather_rollouts(...)
        await train_step(...)

    # Get timing metrics for this iteration
    metrics.update(window.get_timing_metrics())

    # Persist span data for post-hoc analysis
    window.write_spans_jsonl(log_path / "timing_spans.jsonl", step=i_batch)

    # Optional: Gantt chart visualization (requires plotly)
    trace.save_gantt_chart_html(window, i_batch, log_path / f"gantt_{i_batch}.html")
```

### Instrumenting your code

```python
from tinker_cookbook.utils import trace

# Decorator — automatically traces function calls
@trace.scope
async def my_training_step(tc, batch):
    result = await tc.forward_backward_async(data=batch, loss_fn="cross_entropy")
    return result

# Inline span — for timing a code block without a dedicated function
async with trace.scope_span("data_prep"):
    batch = prepare_next_batch(...)

# Sync variant
with trace.scope_span_sync("data_prep"):
    batch = prepare_next_batch(...)
```

`@scope` and `scope_span` are no-ops when called outside `trace_iteration` — safe to leave in production.

### Viewing Perfetto traces

```bash
# Convert JSONL to JSON for visualization
uv run python -m tinker_cookbook.utils.trace trace_events.jsonl trace.json
# Open trace.json in chrome://tracing or https://ui.perfetto.dev/
```

## Debugging tips

1. **Training not improving**: Check `metrics.jsonl` — is loss decreasing? Are rewards increasing?
2. **KL divergence spiking**: KL > 0.01 indicates instability. Lower the learning rate.
3. **Reward stuck at 0**: Check rollout summaries — are responses being parsed correctly?
4. **OOM / timeout**: Reduce `batch_size`, `group_size`, or `max_tokens`
5. **Shrink workloads for debugging**: Set small `batch_size`, `group_size`, and `max_steps`
6. **Compare runs**: Load multiple `metrics.jsonl` into a DataFrame and overlay plots


================================================
FILE: .claude/skills/manage-skills/SKILL.md
================================================
---
name: manage-skills
description: Create, update, or organize Claude Code skills in this repo. Use when adding a new skill, reviewing existing skills for consistency, or maintaining the skill taxonomy.
disable-model-invocation: true
argument-hint: "[create|update|audit] [skill-name]"
---

# Manage Claude Code Skills

This meta-skill governs how skills are created and maintained in the tinker-cookbook repo.

## Skill taxonomy

All skills in `.claude/skills/` are organized into 5 layers:

### Layer 0: Fundamentals (`setup`, `models`, `hyperparams`, `logging`)
**Scope:** Getting started, model selection, hyperparameter guidance, training output analysis. Cross-cutting concerns needed before touching any code.
**Auto-invocation:** Yes — triggers when users ask about setup, models, hyperparameters, or debugging.
**Key principle:** These inform all other layers. Reference `docs/`, `README.md`, `tinker_cookbook/hyperparam_utils.py`.

### Layer 1: Tinker SDK (`tinker-sdk`, `tinker-types`, `tinker-cli`)
**Scope:** Raw Tinker Python SDK APIs — ServiceClient, TrainingClient, SamplingClient, RestClient, types, errors, and CLI commands.
**Auto-invocation:** Yes — triggers when users ask about Tinker API basics or CLI usage.
**Key principle:** Reference `docs/api-reference/` for authoritative API docs.

### Layer 2: Cookbook Primitives (`renderers`, `environments`, `weights`, `completers`, `checkpoints`, `evals`, `datasets`)
**Scope:** Building blocks in `tinker_cookbook/` — renderers, RL environments, weight lifecycle, completers, checkpointing, evaluators, dataset construction.
**Auto-invocation:** Yes — triggers when users ask about specific primitives.
**Key principle:** Reference source code in `tinker_cookbook/` and docs in `docs/`.

### Layer 3: Algorithm / Task Recipes (`sft`, `grpo`, `distillation`, `dpo`, `rlhf`, `multiturn-rl`)
**Scope:** End-to-end training workflows built on Layer 1 + Layer 2.
**Auto-invocation:** Yes — triggers when users want to set up a specific training method.
**Key principle:** Reference recipes in `tinker_cookbook/recipes/` and defer primitive details to Layer 2 skills.

### Layer 4: Repo Development (`new-recipe`, `ci`, `contributing`, `manage-skills`)
**Scope:** Development workflow — scaffolding, testing, CI, code style, skill maintenance.
**Auto-invocation:** `contributing` and `ci` auto-invoke; `new-recipe` and `manage-skills` are manual-only.
**Key principle:** Reference `CONTRIBUTING.md`, `tests/`, `.github/workflows/`.

## Creating a new skill

### Step 1: Determine the layer
Which layer does this skill belong to? Skills should have a clear, non-overlapping scope. If it spans layers, split it.

### Step 2: Check for overlap
Read existing skills in `.claude/skills/` to ensure the new skill doesn't duplicate content. If there's overlap, update the existing skill instead.

### Step 3: Create the skill file

Create `.claude/skills/<skill-name>/SKILL.md` with this structure:

```yaml
---
name: <skill-name>
description: <Clear description of what the skill does and when to use it>
argument-hint: "[optional args]"  # Only if the skill takes arguments
disable-model-invocation: true    # Only for manual-trigger skills (Layer 4 actions)
---

# <Skill Title>

<Brief description of what this skill helps with>

## Step 1: Understand the request
<What to ask the user if not specified>

## Step 2: Reference existing code
<Which files to read for patterns — be specific with file paths>

## Step 3: Key concepts
<Core APIs, parameters, patterns>

## Step 4: Implementation
<Code examples following repo conventions>

## Step N: Add tests
<Testing guidance — smoke tests and unit tests>
```

### Step 4: Follow these conventions

**Naming:**
- Lowercase, hyphenated: `tinker-sdk`, `new-recipe`, `manage-skills`
- Layer 0: named after the fundamental concept
- Layer 1: named after the SDK concept
- Layer 2: named after the primitive
- Layer 3: named after the algorithm/method
- Layer 4: named after the dev action

**Content rules:**
- Always reference **actual file paths** in the repo — never describe APIs from memory
- Include code examples that follow repo conventions (`@chz.chz`, explicit typing, etc.)
- For Layer 3 skills: defer primitive details to Layer 2 skills (e.g., say "see `/renderers` skill" instead of re-explaining renderers)
- Include a testing section pointing to `tests/recipes/` for smoke tests and `*_test.py` for unit tests
- Keep skills under 200 lines — move detailed reference material to separate files in the skill directory

**Frontmatter rules:**
- `description` is required and must clearly state **when** to trigger the skill
- Use `disable-model-invocation: true` only for action-oriented Layer 4 skills
- Use `argument-hint` if the skill takes positional arguments

## Auditing existing skills

When auditing, check each skill for:

1. **Accuracy:** Do file paths and API references match the current codebase? Run `ls` or `grep` to verify.
2. **Freshness:** Has the referenced code changed since the skill was written? Check git log for the referenced files.
3. **Taxonomy compliance:** Is the skill in the correct layer? Does it overlap with other skills?
4. **Convention compliance:** Does it follow the structure above? Does it include testing guidance?
5. **Cross-references:** Do Layer 3 skills reference Layer 2 skills where appropriate?

## Current skill inventory

```
.claude/skills/
├── Layer 0: Fundamentals
│   ├── setup/               # Installation, API key, first run
│   ├── models/              # Model lineup, selection, families
│   ├── hyperparams/         # LR formulas, batch size, LoRA rank
│   └── logging/             # Training outputs, metrics, debugging
├── Layer 1: SDK
│   ├── tinker-sdk/          # ServiceClient, TrainingClient, SamplingClient, RestClient APIs
│   ├── tinker-types/        # Datum, ModelInput, TensorData, response types, error types
│   └── tinker-cli/          # tinker CLI: run/checkpoint management, download, publish
├── Layer 2: Primitives
│   ├── renderers/           # Renderer setup, TrainOnWhat, vision
│   ├── environments/        # Env, EnvGroupBuilder, custom RL envs
│   ├── weights/             # download, build_hf_model, publish
│   ├── completers/          # TokenCompleter, MessageCompleter
│   ├── checkpoints/         # save/load, CheckpointRecord, resume
│   ├── evals/               # Evaluators, Inspect AI
│   └── datasets/            # SupervisedDatasetBuilder, RLDatasetBuilder
├── Layer 3: Recipes
│   ├── sft/                 # Supervised fine-tuning
│   ├── grpo/                # RL with verifiable rewards
│   ├── distillation/        # Knowledge distillation
│   ├── dpo/                 # Direct Preference Optimization
│   ├── rlhf/                # RLHF pipeline
│   └── multiturn-rl/        # Multi-turn RL
└── Layer 4: Development
    ├── new-recipe/          # Scaffold new recipe
    ├── ci/                  # Testing and CI
    ├── contributing/        # Dev setup and code style
    └── manage-skills/       # This skill
```

## Maintenance schedule

When the codebase changes significantly (new modules, API changes, renamed files):
1. Run `/manage-skills audit` to check all skills
2. Update affected skills
3. Commit changes with a descriptive message


================================================
FILE: .claude/skills/models/SKILL.md
================================================
---
name: models
description: Guide for choosing models in Tinker — available model families, model types (base, instruction, reasoning, hybrid, vision), architecture (dense vs MoE), and how to match renderers to models. Use when the user asks which model to use, what models are available, or how to pick a model for their task.
---

# Model Selection

Help the user choose the right model for their task.

## Reference

- `docs/model-lineup.mdx` — Full model listing with types, sizes, and architecture
- `tinker_cookbook/model_info.py` — Model metadata and renderer mapping

## Available models

### Qwen family
| Model | Type | Arch | Size |
|-------|------|------|------|
| `Qwen/Qwen3.5-397B-A17B` | Hybrid + Vision | MoE | Large |
| `Qwen/Qwen3.5-35B-A3B` | Hybrid + Vision | MoE | Medium |
| `Qwen/Qwen3.5-27B` | Hybrid + Vision | Dense | Medium |
| `Qwen/Qwen3.5-4B` | Hybrid + Vision | Dense | Compact |
| `Qwen/Qwen3-235B-A22B-Instruct-2507` | Instruction | MoE | Large |
| `Qwen/Qwen3-30B-A3B-Instruct-2507` | Instruction | MoE | Medium |
| `Qwen/Qwen3-30B-A3B` | Hybrid | MoE | Medium |
| `Qwen/Qwen3-30B-A3B-Base` | Base | MoE | Medium |
| `Qwen/Qwen3-32B` | Hybrid | Dense | Medium |
| `Qwen/Qwen3-8B` | Hybrid | Dense | Small |
| `Qwen/Qwen3-8B-Base` | Base | Dense | Small |
| `Qwen/Qwen3-4B-Instruct-2507` | Instruction | Dense | Compact |
| `Qwen/Qwen3-VL-235B-A22B-Instruct` | Vision | MoE | Large |
| `Qwen/Qwen3-VL-30B-A3B-Instruct` | Vision | MoE | Medium |

### Llama family
| Model | Type | Arch | Size |
|-------|------|------|------|
| `meta-llama/Llama-3.3-70B-Instruct` | Instruction | Dense | Large |
| `meta-llama/Llama-3.1-70B` | Base | Dense | Large |
| `meta-llama/Llama-3.1-8B` | Base | Dense | Small |
| `meta-llama/Llama-3.1-8B-Instruct` | Instruction | Dense | Small |
| `meta-llama/Llama-3.2-3B` | Base | Dense | Compact |
| `meta-llama/Llama-3.2-1B` | Base | Dense | Compact |

### Nemotron family
| Model | Type | Arch | Size |
|-------|------|------|------|
| `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16` | Hybrid | MoE | Large |
| `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` | Hybrid | MoE | Medium |

### Other families
| Model | Type | Arch | Size |
|-------|------|------|------|
| `openai/gpt-oss-120b` | Reasoning | MoE | Medium |
| `openai/gpt-oss-20b` | Reasoning | MoE | Small |
| `deepseek-ai/DeepSeek-V3.1` | Hybrid | MoE | Large |
| `deepseek-ai/DeepSeek-V3.1-Base` | Base | MoE | Large |
| `moonshotai/Kimi-K2-Thinking` | Reasoning | MoE | Large |
| `moonshotai/Kimi-K2.5` | Reasoning + Vision | MoE | Large |

## How to choose

### By task type

- **Instruction tuning / chat SFT**: Start with an Instruction model (e.g., `Llama-3.1-8B-Instruct`, `Qwen3-30B-A3B-Instruct-2507`)
- **RL with verifiable rewards (GRPO)**: Use Instruction or Hybrid models — they already follow instructions
- **Reasoning / chain-of-thought**: Use Reasoning or Hybrid models (`Kimi-K2-Thinking`, `Qwen3-8B`)
- **Full post-training pipeline**: Start with a Base model (e.g., `Qwen3-8B-Base`, `Llama-3.1-8B`)
- **Vision tasks**: Use Vision or Hybrid+Vision models (`Qwen3.5-35B-A3B`, `Qwen3-VL-*`)
- **Distillation (student)**: Use a Base model as student
- **Quick prototyping**: Use compact models (`Llama-3.2-1B`, `Qwen3.5-4B`)

### By cost

**Prefer MoE models** — they're much more cost-effective than dense models because cost scales with active parameters, not total parameters. For example, `Qwen3-30B-A3B` (MoE, 3B active) is cheaper than `Qwen3-32B` (Dense, 32B active) despite similar quality.

### Model types explained

- **Base**: Pre-trained on raw text. For research or full post-training pipelines.
- **Instruction**: Fine-tuned for instruction following. Fast inference, no chain-of-thought.
- **Reasoning**: Always uses chain-of-thought before visible output.
- **Hybrid**: Can operate in both thinking and non-thinking modes.
- **Vision**: Processes images alongside text. See `/renderers` skill for vision input handling.

### Size categories
- **Compact**: 1B–4B parameters
- **Small**: 8B parameters
- **Medium**: 27B–32B parameters
- **Large**: 70B+ parameters

## Renderer matching

Every model needs a matching renderer. **Always use the automatic lookup**:

```python
from tinker_cookbook import model_info

renderer_name = model_info.get_recommended_renderer_name(model_name)
```

Never hardcode renderer names — the mapping is maintained in `model_info.py`.

## Learning rate by model

Use `hyperparam_utils.get_lr(model_name)` for model-specific LR recommendations. See the `/hyperparams` skill for details.

## Common pitfalls
- MoE models are cheaper than dense — prefer them unless you have a specific reason
- Base models need full post-training (SFT + alignment) to be useful for chat
- Instruction models are best for tasks where you want to start from a capable baseline
- Vision models require `ImageChunk` in messages — see `/renderers` skill
- Llama models require `HF_TOKEN` for tokenizer download (gated on HuggingFace)


================================================
FILE: .claude/skills/multiturn-rl/SKILL.md
================================================
---
name: multiturn-rl
description: Set up and run multi-turn RL training for interactive environments (terminal tasks, tool use, search/RAG, games) using the Tinker API. Use when the user wants multi-turn RL, agentic training, tool-use RL, or interactive environment training.
argument-hint: "[model-name] [environment-type]"
---

# Multi-Turn RL Training

Help the user set up RL training for multi-turn interactive environments using the Tinker API.

## Step 1: Understand the request

Ask the user (if not already specified):
- **Model**: Which model to train (e.g., `moonshotai/Kimi-K2-Thinking`, `Qwen/Qwen3-8B`)
- **Environment type**:
  - **Terminal/sandbox tasks**: Model executes shell commands (Harbor)
  - **Search/RAG**: Model uses retrieval tools (Search-R1)
  - **Multiplayer games**: Two models compete (TicTacToe, Twenty Questions, Guess Number)
  - **Custom multi-turn**: User-defined interactive environment
- **Turn structure**: Max turns, tool outputs, observation handling

## Step 2: Reference existing recipes

Read these files for patterns:
- `tinker_cookbook/recipes/harbor_rl/train.py` — Terminal task RL with sandbox execution
- `tinker_cookbook/recipes/harbor_rl/harbor_env.py` — HarborDatasetBuilder, sandbox factory
- `tinker_cookbook/recipes/search_tool/train.py` — Search-R1 with Chroma vector DB
- `tinker_cookbook/recipes/multiplayer_rl/text_arena/train.py` — Two-player games
- `tinker_cookbook/recipes/multiplayer_rl/twenty_questions/train.py` — Twenty Questions
- `tinker_cookbook/recipes/multiplayer_rl/guess_number/train.py` — Guess the Number
- `tinker_cookbook/rl/message_env.py` — Message-based environment interface
- `docs/rl/sequence-extension.mdx` — Multi-turn RL and KV-cache
- `docs/rl/rl-envs.mdx` — Custom environments

## Step 3: Configure the environment

### Harbor (Terminal Tasks)
Interactive sandbox where model runs shell commands and gets outputs:

```python
from tinker_cookbook.recipes.harbor_rl.harbor_env import HarborDatasetBuilder, HarborTask

dataset_builder = HarborDatasetBuilder(
    tasks=tasks,                    # List of HarborTask objects
    batch_size=8,                   # groups_per_batch
    group_size=4,                   # rollouts per task
    model_name=model_name,
    renderer_name=renderer_name,
    max_turns=10,                   # max interaction turns
    sandbox_timeout=3600,           # sandbox lifetime (seconds)
    command_timeout=120,            # per-command timeout
    grader_timeout=60,              # grading timeout
)
```

### Search/RAG (Search-R1)
Model queries a vector database during generation:

See `recipes/search_tool/train.py` for Chroma integration and streaming minibatch config.

### Multiplayer Games
Two models play against each other:

See `recipes/multiplayer_rl/text_arena/train.py` for the competitive RL pattern.

### Key Multi-Turn Parameters

- `max_turns`: Maximum number of interaction turns
- `max_tokens`: Max tokens per generation step
- `kl_penalty_coef`: KL penalty (often 0.0 for multi-turn to allow exploration)
- `max_steps_off_policy`: Enable async rollouts for expensive environments

### Async Rollouts
Multi-turn envs are slow due to tool execution. Use async config:
```python
config = Config(
    ...
    async_config=AsyncConfig(
        max_steps_off_policy=cli_config.max_steps_off_policy,
        groups_per_batch=cli_config.groups_per_batch,
    ) if cli_config.max_steps_off_policy is not None else None,
)
```

## Step 4: Write the training script

Follow the Harbor pattern:

```python
import asyncio
import chz
from tinker_cookbook import cli_utils, model_info
from tinker_cookbook.rl.train import AsyncConfig, Config, main

@chz.chz
class CLIConfig:
    model_name: str = "moonshotai/Kimi-K2-Thinking"
    lora_rank: int = 32
    max_tokens: int = 8192
    max_turns: int = 10
    group_size: int = 4
    groups_per_batch: int = 8
    learning_rate: float = 1e-5
    kl_penalty_coef: float = 0.0
    max_steps_off_policy: int | None = None

async def cli_main(cli_config: CLIConfig):
    renderer_name = model_info.get_recommended_renderer_name(cli_config.model_name)

    dataset_builder = ...  # Your multi-turn dataset builder

    config = Config(
        learning_rate=cli_config.learning_rate,
        dataset_builder=dataset_builder,
        model_name=cli_config.model_name,
        lora_rank=cli_config.lora_rank,
        max_tokens=cli_config.max_tokens,
        kl_penalty_coef=cli_config.kl_penalty_coef,
        log_path="/tmp/tinker-examples/multiturn/my_run",
        async_config=AsyncConfig(
            max_steps_off_policy=cli_config.max_steps_off_policy,
            groups_per_batch=cli_config.groups_per_batch,
        ) if cli_config.max_steps_off_policy is not None else None,
    )

    await main(config)
```

## Step 5: Run

```bash
# Harbor terminal RL
python -m tinker_cookbook.recipes.harbor_rl.train

# Search-R1
python -m tinker_cookbook.recipes.search_tool.train

# Multiplayer games
python -m tinker_cookbook.recipes.multiplayer_rl.text_arena.train
```

## Step 6: Add tests

If you created a new multi-turn recipe, add a smoke test:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete", "groups_per_batch=4", "group_size=2"],
    )
```

`run_recipe()` automatically passes `max_steps=2` so the recipe runs 2 training steps and exits. See `tests/recipes/test_recipe_text_arena.py` and `tests/recipes/test_recipe_twenty_questions.py` for existing multi-turn examples. For environment-specific logic (sandbox setup, tool parsing), add unit tests as `*_test.py` next to the source code.

## Common pitfalls
- Multi-turn envs are expensive — start with small `groups_per_batch` (4-8)
- Use `max_steps_off_policy` for async rollouts when env execution is slow
- `Env` objects are single-use — the builder creates fresh envs each batch
- Sandbox timeouts need to be generous enough for complex tasks
- KV-cache (sequence extension) is key for multi-turn efficiency — see `docs/rl/sequence-extension.mdx`
- `kl_penalty_coef=0.0` is common for multi-turn since you want the model to explore tool use


================================================
FILE: .claude/skills/new-recipe/SKILL.md
================================================
---
name: new-recipe
description: Scaffold a new training recipe for the Tinker cookbook following repo conventions. Use when the user wants to create a new recipe, training script, or experiment.
disable-model-invocation: true
argument-hint: "[recipe-name]"
---

# Create a New Training Recipe

Scaffold a new training recipe in `tinker_cookbook/recipes/` following repo conventions.

## Step 1: Understand the request

Ask the user:
- **Recipe name**: What to call it (becomes the directory/file name under `recipes/`)
- **Training type**: SL, RL, DPO, distillation, or hybrid
- **Key details**: Model, dataset, environment, reward signal, etc.

## Step 2: Read existing recipes for patterns

Before writing any code, read the most relevant existing recipe:
- **SL-based**: Read `tinker_cookbook/recipes/sl_basic.py` and `tinker_cookbook/recipes/chat_sl/train.py`
- **RL-based**: Read `tinker_cookbook/recipes/rl_basic.py` and `tinker_cookbook/recipes/math_rl/train.py`
- **DPO-based**: Read `tinker_cookbook/recipes/preference/dpo/train.py`
- **Distillation-based**: Read `tinker_cookbook/recipes/distillation/on_policy_distillation.py`
- **Multi-turn RL**: Read `tinker_cookbook/recipes/harbor_rl/train.py`

Also read `CLAUDE.md` for conventions.

## Step 3: Follow repo conventions

Every recipe MUST follow these patterns:

### File structure
```
tinker_cookbook/recipes/<recipe_name>/
├── __init__.py        # Empty or minimal
├── train.py           # Main entry point with CLIConfig + cli_main
└── <env_or_data>.py   # Dataset/environment definitions (if needed)
```

Or for simple recipes: `tinker_cookbook/recipes/<recipe_name>.py`

### CLI pattern (use `@chz.chz` for config)
```python
@chz.chz
class CLIConfig:
    model_name: str = "meta-llama/Llama-3.1-8B"
    learning_rate: float = 1e-4
    # ... all configurable parameters with defaults

async def cli_main(cli_config: CLIConfig):
    # Build full config from CLI config
    # Call training main function

if __name__ == "__main__":
    cli_config = chz.entrypoint(CLIConfig)
    asyncio.run(cli_main(cli_config))
```

### Required elements
1. **`@chz.chz` config class** with sensible defaults
2. **`model_info.get_recommended_renderer_name(model_name)`** for renderer — never hardcode
3. **`cli_utils.check_log_dir()`** before training to avoid clobbering
4. **`checkpoint_utils.resolve_renderer_name_from_checkpoint_or_default_async()`** if loading checkpoints
5. **Explicit typing** — no `Any` or `type: ignore`
6. **Auto-generated log paths** with model name, hyperparams, and timestamp

### Naming conventions
- Subscript suffixes for tensors: `_P` (problems), `_G` (groups), `_T` (tokens), `_D` (datums)
- Use `safezip`, `timed`, `scope` helpers where appropriate
- Use `ml_log.log_metrics` for metrics, `logtree` for transcripts

### Entry point
Recipe must be runnable as:
```bash
python -m tinker_cookbook.recipes.<recipe_name>.train [chz overrides]
```

## Step 4: Create the recipe

Write the recipe files following the patterns above. Place them in `tinker_cookbook/recipes/$ARGUMENTS/`.

## Step 5: Add tests

The repo has two layers of testing. **Both should be added for every new recipe.**

### Smoke test (required)
Create `tests/recipes/test_recipe_<name>.py` — a minimal test that runs the recipe for 2 training steps and verifies clean exit. CI auto-discovers these files and runs them daily.

```python
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        [
            "behavior_if_log_dir_exists=delete",
            # Override params to make it fast:
            # "groups_per_batch=4", "group_size=2", "batch_size=16", etc.
        ],
    )
```

Key conventions:
- `run_recipe()` launches the module as a subprocess and automatically passes `max_steps=2` (configurable via the `max_steps` parameter)
- The recipe runs for 2 training steps and exits naturally — the test passes on clean exit (exit code 0)
- Always pass `behavior_if_log_dir_exists=delete` to avoid conflicts in repeated CI runs
- Override batch sizes / group sizes to small values so the test completes quickly
- Mark tests with `@pytest.mark.integration` — these require `TINKER_API_KEY`
- See `tests/helpers.py` for `run_recipe()` details and `tests/conftest.py` for fixtures

### Unit tests (for testable components)
Place unit tests next to the code they test using the `*_test.py` naming convention:

```
tinker_cookbook/recipes/<recipe_name>/<component>_test.py
```

For example:
- `tinker_cookbook/recipes/math_rl/math_env_test.py` — tests environment logic
- `tinker_cookbook/renderers/parsing_test.py` — tests parsing helpers

Unit tests should:
- Run without `TINKER_API_KEY` (no network calls)
- Be fast (< 1s per test)
- Use standard pytest features (fixtures, parametrize, marks)
- Test picklability if the component needs to be serialized for distributed rollout

### Running tests locally

```bash
# Unit tests only (no API key needed)
uv run pytest tinker_cookbook/

# Integration / smoke tests (requires TINKER_API_KEY)
uv run pytest tests/recipes/test_recipe_<name>.py -v -x -s
```

### CI integration
- **Unit tests** (`pytest tinker_cookbook/`) run on every PR via `.github/workflows/pytest.yaml`
- **Integration tests** (`pytest tests/`) run daily and on manual trigger via `.github/workflows/smoke-test-recipes.yaml`
- Adding `tests/recipes/test_recipe_<name>.py` is all that's needed — CI auto-discovers it

## Step 6: Verify

- Ensure the recipe is importable: `python -c "from tinker_cookbook.recipes.<name> import train"`
- Check that CLI help works: `python -m tinker_cookbook.recipes.<name>.train --help`
- Run the smoke test locally: `uv run pytest tests/recipes/test_recipe_<name>.py -v -x -s`


================================================
FILE: .claude/skills/renderers/SKILL.md
================================================
---
name: renderers
description: Guide for using renderers — the bridge between chat-style messages and token sequences. Covers renderer setup, TrainOnWhat, vision inputs, model family matching, and custom renderers. Use when the user asks about renderers, tokenization, message formatting, or vision inputs.
---

# Renderers

Renderers convert chat-style messages into token sequences for training and generation.

## Reference

Read these for details:
- `tinker_cookbook/renderers/base.py` — Renderer base class and API
- `tinker_cookbook/renderers/__init__.py` — Registry, factory, TrainOnWhat enum
- `docs/rendering.mdx` — Rendering guide with examples

## Getting a renderer

Always use `model_info.get_recommended_renderer_name()` — never hardcode:

```python
from tinker_cookbook import model_info
from tinker_cookbook.renderers import get_renderer
from tinker_cookbook.tokenizer_utils import get_tokenizer

renderer_name = model_info.get_recommended_renderer_name(model_name)
tokenizer = get_tokenizer(model_name)
renderer = get_renderer(renderer_name, tokenizer)
```

**Available renderers:** `llama3`, `qwen3`, `deepseekv3`, `kimi_k2`, `kimi_k25`, `nemotron3`, `nemotron3_disable_thinking`, `role_colon`, and more. See `tinker_cookbook/renderers/__init__.py` for the full registry.

## Key renderer methods

```python
# Build generation prompt (for sampling)
model_input = renderer.build_generation_prompt(messages, role="assistant")

# Build supervised example (for training)
model_input, weights = renderer.build_supervised_example(
    messages, train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES
)

# Parse model output back to a message
message, is_complete = renderer.parse_response(token_ids)

# Get stop sequences for sampling
stop = renderer.get_stop_sequences()

# Tool calling support
prefix_messages = renderer.create_conversation_prefix_with_tools(tool_specs)
```

## TrainOnWhat

Controls which tokens receive training signal:

```python
from tinker_cookbook.renderers import TrainOnWhat

# Most common — train on all assistant responses
TrainOnWhat.ALL_ASSISTANT_MESSAGES

# Train only on the final assistant response
TrainOnWhat.LAST_ASSISTANT_MESSAGE

# Train on everything (including user messages)
TrainOnWhat.ALL_TOKENS

# Other options
TrainOnWhat.LAST_ASSISTANT_TURN
TrainOnWhat.ALL_MESSAGES
TrainOnWhat.ALL_USER_AND_SYSTEM_MESSAGES
TrainOnWhat.CUSTOMIZED  # Set trainable=True/False on individual messages
```

## Vision inputs

For VLM models, use `ImageChunk` in messages:

```python
message = {
    "role": "user",
    "content": [
        {"type": "image", "image_url": "https://..."},  # or local path
        {"type": "text", "text": "What is in this image?"},
    ],
}
```

See `docs/rendering.mdx` and `tinker_cookbook/recipes/vlm_classifier/train.py` for VLM examples.

## Custom renderers

Register a custom renderer:

```python
from tinker_cookbook.renderers import register_renderer

def my_renderer_factory(tokenizer, image_processor):
    return MyCustomRenderer(tokenizer)

register_renderer("my_renderer", my_renderer_factory)
```

## Picklability

Renderers must be pickleable for distributed rollout execution. The codebase tests this — see `tinker_cookbook/renderers/renderer_pickle_test.py`.

## Common pitfalls
- Always use `model_info.get_recommended_renderer_name()` — renderer must match model family
- After loading a checkpoint trained with a specific renderer, use the same renderer name
- `build_supervised_example()` returns weights as `list[float]` — wrap with `TensorData.from_numpy()` if needed
- For tool calling, use `create_conversation_prefix_with_tools()` to inject tool definitions


================================================
FILE: .claude/skills/rlhf/SKILL.md
================================================
---
name: rlhf
description: Set up and run the full RLHF pipeline (SFT, reward model training, RL from reward model) using the Tinker API. Use when the user wants to do RLHF, train a reward model, or run the full preference-based RL pipeline.
argument-hint: "[model-name]"
---

# RL from Human Feedback (RLHF) Pipeline

Help the user set up and run the full 3-stage RLHF pipeline using the Tinker API.

## Overview

RLHF is a multi-stage pipeline:
1. **SFT Stage** — Fine-tune base model on instruction data
2. **Reward Model (RM) Stage** — Train a reward model on preference comparisons
3. **RL Stage** — Optimize the SFT policy using the reward model

## Step 1: Understand the request

Ask the user (if not already specified):
- **Base model**: Which model to start from (e.g., `meta-llama/Llama-3.2-3B`)
- **Preference data**: Which comparison dataset (HHH, HelpSteer3, UltraFeedback, or custom)
- **Which stages to run**: All 3, or skip SFT/RM if checkpoints exist
- **LoRA rank**: Typically 64 for RLHF

## Step 2: Reference existing recipes

Read these files:
- `tinker_cookbook/recipes/preference/rlhf/rlhf_pipeline.py` — Complete 3-stage pipeline
- `tinker_cookbook/rl/preference_envs.py` — Preference-based RL environments
- `tinker_cookbook/preference/types.py` — PreferenceModelBuilder
- `tinker_cookbook/preference/comparison_policy_evaluator.py` — RM evaluation
- `docs/preferences/rlhf-example.mdx` — RLHF guide

## Step 3: Configure each stage

### Stage 1: SFT
Standard supervised fine-tuning (see `/sft` skill). Key settings:
- Dataset: NoRobots or similar instruction data
- `sft_learning_rate`: 2e-4 (LoRA)
- `train_on_what`: `TrainOnWhat.ALL_ASSISTANT_MESSAGES`

### Stage 2: Reward Model
Train on preference comparisons:
- Uses `ChatDatasetBuilderFromComparisons` with a comparison builder (e.g., `HHHComparisonBuilder`)
- `rm_learning_rate`: 3e-4
- Produces a reward model checkpoint used in Stage 3

### Stage 3: RL from Reward Model
Optimize SFT policy using RM scores:
- Load SFT checkpoint as starting policy
- Load RM weights for scoring
- `PreferenceModelBuilderFromChatRenderer` wraps the RM
- `PairwisePreferenceRLDatasetBuilder` creates the RL environment
- `rl_learning_rate`: 1e-5 (much lower than SFT)
- `tournament_pattern`: `ALL_PAIRS_BOTH_WAYS` for pairwise comparison

### Typical Hyperparameters
```python
@chz.chz
class CLIConfig:
    base_model: str = "meta-llama/Llama-3.2-3B"
    lora_rank: int = 64
    batch_size: int = 256
    max_length: int = 16384
    sft_learning_rate: float = 2e-4
    rm_learning_rate: float = 3e-4
    rl_learning_rate: float = 1e-5
    rl_max_tokens: int = 1024
    rl_group_size: int = 4
```

## Step 4: Write the training script

Follow the pipeline pattern from `rlhf_pipeline.py`:

```python
import asyncio
import os
import chz
from tinker_cookbook import checkpoint_utils, model_info
from tinker_cookbook.preference.types import PreferenceModelBuilderFromChatRenderer
from tinker_cookbook.rl import preference_envs, train
from tinker_cookbook.supervised import train as supervised_train

# Stage 1: SFT
def sft_stage(log_path, base_model, ...):
    # Standard SFT config + supervised_train.main()
    ...

# Stage 2: Reward Model
def train_rm(log_path, base_model, ...):
    # Train on preference comparisons
    ...

# Stage 3: RL
async def train_rl(log_path, sft_log_path, rm_log_path, base_model, ...):
    sft_checkpoint = checkpoint_utils.get_last_checkpoint(sft_log_path)["state_path"]
    rm_weights = checkpoint_utils.get_last_checkpoint(rm_log_path)["sampler_path"]

    preference_model_builder = PreferenceModelBuilderFromChatRenderer(
        renderer_name=renderer_name,
        model_name=base_model,
        rm_weights_path=rm_weights,
    )
    rl_dataset_builder = preference_envs.PairwisePreferenceRLDatasetBuilder(
        comparison_builder=comparison_builder,
        preference_model_builder=preference_model_builder,
        batch_size=batch_size,
        group_size=group_size,
        tournament_pattern=preference_envs.TournamentPattern.ALL_PAIRS_BOTH_WAYS,
        ...
    )
    config = train.Config(
        load_checkpoint_path=sft_checkpoint,
        dataset_builder=rl_dataset_builder,
        learning_rate=1e-5,
        ...
    )
    await train.main(config)
```

## Step 5: Run

```bash
# Full pipeline
python -m tinker_cookbook.recipes.preference.rlhf.rlhf_pipeline

# Skip SFT (already have checkpoint)
python -m tinker_cookbook.recipes.preference.rlhf.rlhf_pipeline run_sft=False

# Skip SFT and RM
python -m tinker_cookbook.recipes.preference.rlhf.rlhf_pipeline run_sft=False run_rm=False
```

## Step 6: Add tests

If you created a new RLHF recipe, add a smoke test:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete"],
    )
```

`run_recipe()` automatically passes `max_steps=2` so the recipe runs 2 training steps and exits. See `tests/recipes/test_recipe_rlhf_pipeline.py` for the existing example.

## Common pitfalls
- RL learning rate must be **much lower** than SFT (1e-5 vs 2e-4)
- Checkpoints flow between stages: SFT → RL policy init, RM → RL reward scoring
- Use `checkpoint_utils.get_last_checkpoint()` to find checkpoints from previous stages
- RM quality directly impacts RL — validate RM before running Stage 3
- `group_size` in RL stage affects variance of reward estimates


================================================
FILE: .claude/skills/setup/SKILL.md
================================================
---
name: setup
description: Guide for installing Tinker, setting up the environment, getting an API key, and verifying everything works. Use when the user is getting started, setting up their environment, or troubleshooting installation issues.
---

# Setup & Installation

Get Tinker and tinker-cookbook running from scratch.

## Reference

- `docs/install.mdx` — Official installation guide
- `CONTRIBUTING.md` — Development setup
- `README.md` — Project overview

## Step 1: Sign up and get an API key

1. Sign up at [https://auth.thinkingmachines.ai/sign-up](https://auth.thinkingmachines.ai/sign-up)
2. Create an API key from the [console](https://tinker-console.thinkingmachines.ai)
3. Export it:
```bash
export TINKER_API_KEY=<your-key>
```

Add to your shell profile (`.bashrc`, `.zshrc`) for persistence.

## Step 2: Install Tinker SDK

```bash
pip install tinker
```

This gives you:
- **Python SDK** — `TrainingClient`, `SamplingClient`, low-level training/sampling APIs
- **Tinker CLI** — `tinker` or `python -m tinker` for management tasks

## Step 3: Install tinker-cookbook

```bash
git clone https://github.com/thinking-machines-lab/tinker-cookbook.git
cd tinker-cookbook
pip install -e .
```

Or with dev dependencies (for contributing):
```bash
uv sync --extra dev
pre-commit install
```

## Step 4: Verify installation

```python
import tinker
service_client = tinker.ServiceClient()
training_client = service_client.create_lora_training_client(
    base_model="meta-llama/Llama-3.2-1B", rank=32,
)
info = training_client.get_info()
print(info)  # Should print model info
```

## Step 5: Run a minimal example

```bash
# Supervised learning
python -m tinker_cookbook.recipes.sl_basic

# Reinforcement learning
python -m tinker_cookbook.recipes.rl_basic
```

## Environment variables

| Variable | Purpose |
|----------|---------|
| `TINKER_API_KEY` | Required — authenticates with Tinker service |
| `HF_TOKEN` | Optional — access gated HuggingFace models (Llama, etc.) |
| `HF_TRUST_REMOTE_CODE` | Optional — allow custom tokenizer code |
| `WANDB_API_KEY` | Optional — log to Weights & Biases |

## Common issues

- **`TINKER_API_KEY not set`**: Export the key in your shell or `.env` file
- **Tokenizer download fails**: Set `HF_TOKEN` for gated models (e.g., Llama)
- **Import errors**: Ensure `pip install -e .` was run from the repo root
- **`uv` not found**: Install with `curl -LsSf https://astral.sh/uv/install.sh | sh`


================================================
FILE: .claude/skills/sft/SKILL.md
================================================
---
name: sft
description: Set up and run supervised fine-tuning (SFT) on instruction or chat datasets using the Tinker API. Use when the user wants to do instruction tuning, chat fine-tuning, or supervised learning.
argument-hint: "[model-name] [dataset]"
---

# Supervised Fine-Tuning (SFT)

Help the user set up and run supervised fine-tuning using the Tinker API.

## Step 1: Understand the request

Ask the user (if not already specified):
- **Model**: Which base model to fine-tune (e.g., `meta-llama/Llama-3.1-8B`, `Qwen/Qwen3-8B`). See `docs/model-lineup.mdx` for available models.
- **Dataset**: What data to train on — built-in datasets (NoRobots, Tulu3) or custom JSONL file.
- **Goal**: General instruction tuning, domain-specific fine-tuning, or chat quality improvement.

## Step 2: Reference existing recipes

Read these files for patterns and conventions:
- `tinker_cookbook/recipes/sl_basic.py` — Minimal SFT example
- `tinker_cookbook/recipes/chat_sl/train.py` — Full-featured chat SFT with eval
- `tinker_cookbook/supervised/train.py` — Core training loop
- `tinker_cookbook/supervised/data.py` — Dataset construction helpers
- `docs/supervised-learning/sl-basic.mdx` — Getting started guide
- `docs/supervised-learning/sl-hyperparams.mdx` — Learning rate and batch size guidance

## Step 3: Configure the training run

Key configuration decisions:

### Renderer
Match renderer to model family using `model_info.get_recommended_renderer_name(model_name)`. Never hardcode renderer names.

### Learning Rate
- Use `hyperparam_utils.get_lr(model_name)` for recommended LR
- LoRA fine-tuning typically needs ~10x higher LR than full fine-tuning (e.g., 2e-4 for LoRA vs 2e-5 for full)

### TrainOnWhat
- `TrainOnWhat.ALL_ASSISTANT_MESSAGES` — Train on all assistant turns (most common)
- `TrainOnWhat.LAST_ASSISTANT_MESSAGE` — Train only on final assistant response
- `TrainOnWhat.EVERYTHING` — Train on entire conversation including user messages

### Dataset
- Built-in: `NoRobotsBuilder`, `Tulu3Builder`
- Custom JSONL: Use `FromConversationFileBuilder(common_config=..., file_path="path/to/data.jsonl")`
- Format: Same as `tinker_cookbook/example_data/conversations.jsonl`

### Batch Size & Epochs
- `batch_size`: Number of tokens per training batch (default: 128 for basic, scale up as needed)
- `num_epochs`: Number of passes through the dataset
- `eval_every`: Evaluate every N batches

## Step 4: Write the training script

Follow the pattern from `sl_basic.py`:

```python
import asyncio
import chz
import sys
from tinker_cookbook import cli_utils, model_info
from tinker_cookbook.supervised import train
from tinker_cookbook.renderers import TrainOnWhat
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig

def build_config_blueprint() -> chz.Blueprint[train.Config]:
    model_name = "meta-llama/Llama-3.1-8B"
    renderer_name = model_info.get_recommended_renderer_name(model_name)
    common_config = ChatDatasetBuilderCommonConfig(
        model_name_for_tokenizer=model_name,
        renderer_name=renderer_name,
        max_length=32768,
        batch_size=128,
        train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
    )
    # Configure dataset builder here
    dataset = ...

    return chz.Blueprint(train.Config).apply({
        "log_path": "/tmp/tinker-examples/my_sft_run",
        "model_name": model_name,
        "renderer_name": renderer_name,
        "dataset_builder": dataset,
        "learning_rate": 2e-4,
        "lr_schedule": "linear",
        "num_epochs": 1,
        "eval_every": 8,
    })

def main(config: train.Config):
    cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
    asyncio.run(train.main(config))

if __name__ == "__main__":
    blueprint = build_config_blueprint()
    blueprint.make_from_argv(sys.argv[1:])
    main(blueprint.make())
```

## Step 5: Run and iterate

```bash
python -m tinker_cookbook.recipes.<recipe_name>
```

Override parameters from CLI: `python -m tinker_cookbook.recipes.<recipe_name> learning_rate=1e-4 batch_size=256`

## Step 6: Add tests

If you created a new recipe, add a smoke test so CI catches regressions:

```python
# tests/recipes/test_recipe_<name>.py
import pytest
from tests.helpers import run_recipe

@pytest.mark.integration
def test_<recipe_name>():
    run_recipe(
        "tinker_cookbook.recipes.<recipe_name>.train",
        ["behavior_if_log_dir_exists=delete", "batch_size=16"],
    )
```

`run_recipe()` automatically passes `max_steps=2` to the recipe, so it runs 2 training steps and exits cleanly. Tests are auto-discovered by CI and run daily. For unit-testable components (dataset processing, custom logic), add `*_test.py` files next to the source code.

## Step 7: Export weights (optional)

After training, export weights using the `tinker_cookbook.weights` API:

```python
from tinker_cookbook import weights

adapter_dir = weights.download(tinker_path="tinker://run-id/sampler_weights/final", output_dir="./adapter")
weights.build_hf_model(base_model="meta-llama/Llama-3.1-8B", adapter_path=adapter_dir, output_path="./model")
weights.publish_to_hf_hub(model_path="./model", repo_id="user/my-finetuned-model")
```

## Common pitfalls
- Always use `model_info.get_recommended_renderer_name()` — never hardcode renderer
- Use `cli_utils.check_log_dir()` to avoid clobbering previous runs
- For custom datasets, ensure JSONL matches the conversation format in `example_data/conversations.jsonl`
- LR too high causes instability; LR too low wastes compute


================================================
FILE: .claude/skills/tinker-cli/SKILL.md
================================================
---
name: tinker-cli
description: Guide for the Tinker CLI — managing training runs, checkpoints, downloading weights, and publishing to HuggingFace. Use when the user asks about CLI commands, listing runs, managing checkpoints from the terminal, or uploading to HF Hub.
---

# Tinker CLI

The `tinker` CLI is installed with the Tinker Python SDK. It provides commands for managing training runs and checkpoints from the terminal.

Requires `TINKER_API_KEY` environment variable to be set.

## Global options

```bash
tinker --format table   # Rich table output (default)
tinker --format json    # JSON output (for scripting)
```

## Training runs

```bash
# List recent training runs
tinker run list
tinker run list --limit 50

# Show details for a specific run
tinker run info <RUN_ID>

# Custom columns
tinker run list --columns id,model,lora,updated,status,checkpoint
```

Available columns: `id`, `model`, `owner`, `lora`, `updated`, `status`, `checkpoint`, `checkpoint_time`.

## Checkpoints

### List and inspect

```bash
# List checkpoints for a specific run
tinker checkpoint list --run-id <RUN_ID>

# List all your checkpoints across runs
tinker checkpoint list
tinker checkpoint list --limit 50

# Show checkpoint details
tinker checkpoint info <TINKER_PATH>
```

### Download

```bash
# Download and extract a checkpoint
tinker checkpoint download <TINKER_PATH>
tinker checkpoint download <TINKER_PATH> --output ./my-adapter
tinker checkpoint download <TINKER_PATH> --force  # Overwrite existing
```

### Visibility

```bash
# Make a checkpoint publicly accessible
tinker checkpoint publish <TINKER_PATH>

# Make a checkpoint private
tinker checkpoint unpublish <TINKER_PATH>
```

### TTL (expiration)

```bash
# Set checkpoint to expire in 24 hours
tinker checkpoint set-ttl <TINKER_PATH> --ttl 86400

# Remove expiration (keep indefinitely)
tinker checkpoint set-ttl <TINKER_PATH> --remove
```

### Delete

```bash
# Delete checkpoints (with confirmation prompt)
tinker checkpoint delete <TINKER_PATH>

# Delete without confirmation
tinker checkpoint delete <TINKER_PATH> -y

# Delete multiple
tinker checkpoint delete <PATH1> <PATH2> <PATH3>
```

### Upload to HuggingFace Hub

```bash
# Push checkpoint to HuggingFace
tinker checkpoint push-hf <TINKER_PATH> --repo user/my-model

# Push as public repo
tinker checkpoint push-hf <TINKER_PATH> --repo user/my-model --public

# Advanced options
tinker checkpoint push-hf <TINKER_PATH> \
    --repo user/my-model \
    --revision main \
    --commit-message "Upload fine-tuned model" \
    --create-pr \
    --no-model-card
```

Options: `--repo`, `--public`, `--revision`, `--commit-message`, `--create-pr`, `--allow-pattern`, `--ignore-pattern`, `--no-model-card`.

## Version

```bash
tinker version   # e.g. "tinker 0.15.0"
```

## Common patterns

### Script-friendly output
```bash
# Get checkpoint paths as JSON for scripting
tinker checkpoint list --format json | jq '.[].tinker_path'

# Get run IDs
tinker run list --format json | jq '.[].id'
```

### Typical workflow
```bash
# 1. Find your training run
tinker run list

# 2. List checkpoints for that run
tinker checkpoint list --run-id <RUN_ID>

# 3. Download the final checkpoint
tinker checkpoint download tinker://<RUN_ID>/sampler_weights/final -o ./adapter

# 4. Or push directly to HuggingFace
tinker checkpoint push-hf tinker://<RUN_ID>/sampler_weights/final --repo user/my-model
```

## Common pitfalls
- `TINKER_API_KEY` must be set — the CLI reads it from the environment
- Checkpoint paths use the format `tinker://<run-id>/<type>/<checkpoint-id>`
- `push-hf` uploads the raw checkpoint — for merged HF models, use `weights.build_hf_model()` in Python first (see `/weights` skill)
- `delete` is permanent and irreversible — use `-y` flag carefully


================================================
FILE: .claude/skills/tinker-sdk/SKILL.md
================================================
---
name: tinker-sdk
description: Guide for using the Tinker Python SDK APIs — ServiceClient, TrainingClient, SamplingClient, RestClient, forward_backward, optim_step, sampling, and async patterns. Use when the user asks about Tinker API basics, how to call training/sampling, or how the SDK works.
---

# Tinker Python SDK

Help the user understand and use the core Tinker SDK APIs.

## Reference docs

Read these for authoritative API documentation:
- `docs/api-reference/serviceclient.md` — ServiceClient API
- `docs/api-reference/trainingclient.md` — TrainingClient API
- `docs/api-reference/samplingclient.md` — SamplingClient API
- `docs/api-reference/restclient.md` — RestClient API
- `docs/api-reference/types.md` — All SDK types
- `docs/training-sampling.mdx` — Starter walkthrough
- `docs/async.mdx` — Sync/async patterns, futures
- `docs/losses.mdx` — Loss functions
- `docs/under-the-hood.mdx` — Clock cycles, worker pools

## ServiceClient (entry point)

`ServiceClient` is the main entry point. All other clients are created from it.

```python
from tinker import ServiceClient

svc = ServiceClient(user_metadata={"experiment": "v1"}, project_id="my-project")

# Create a new LoRA training client
tc = svc.create_lora_training_client(
    base_model="Qwen/Qwen3-8B",
    rank=32,
    seed=None,
    train_mlp=True,
    train_attn=True,
    train_unembed=True,
)

# Resume from a training checkpoint
tc = svc.create_training_client_from_state(path="tinker://...")              # weights only
tc = svc.create_training_client_from_state_with_optimizer(path="tinker://...") # weights + optimizer

# Create a sampling client
sc = svc.create_sampling_client(model_path="tinker://...", base_model=None, retry_config=None)

# Create a REST client for checkpoint/run management
rest = svc.create_rest_client()

# Query available models
caps = svc.get_server_capabilities()  # returns GetServerCapabilitiesResponse
```

All creation methods have `_async` variants.

## TrainingClient

```python
# Forward/backward pass (compute loss + gradients)
result = tc.forward_backward(data=[datum1, datum2], loss_fn="cross_entropy")

# Forward-only pass (compute loss, no gradients — useful for eval)
result = tc.forward(data=[datum1, datum2], loss_fn="cross_entropy")

# Custom loss function
result = tc.forward_backward_custom(data=[datum1, datum2], loss_fn=my_custom_loss_fn)

# Optimizer step
tc.optim_step(adam_params=AdamParams(learning_rate=2e-4))

# Checkpointing
tc.save_state(name="step_100", ttl_seconds=None)                # Full state (resumable)
tc.save_weights_for_sampler(name="step_100_sampler", ttl_seconds=None)  # Sampler-only

# Save + get SamplingClient in one call
sc = tc.save_weights_and_get_sampling_client(name="step_100")

# Load checkpoint
tc.load_state(path="tinker://...")
tc.load_state_with_optimizer(path="tinker://...")

# Metadata
info = tc.get_info()          # GetInfoResponse (model name, LoRA rank, tokenizer)
tokenizer = tc.get_tokenizer()  # HuggingFace tokenizer
```

### Loss functions
- `"cross_entropy"` — Standard SL loss
- `"importance_sampling"` — On-policy RL (default for GRPO)
- `"ppo"` — Proximal Policy Optimization
- `"cispo"` — Conservative Importance Sampling PPO
- `"dro"` — Distributionally Robust Optimization

See `docs/losses.mdx` for details and `loss_fn_config` parameters.

### Async variants

All methods have `_async` variants that return `APIFuture`:
```python
fb_future = tc.forward_backward_async(data=data, loss_fn="cross_entropy")
optim_future = tc.optim_step_async(adam_params=adam_params)
# Do other work...
fb_result = fb_future.result()
optim_result = optim_future.result()
```

**Key pattern:** Submit `forward_backward_async` and `optim_step_async` back-to-back before awaiting — this overlaps GPU computation with data preparation.

## SamplingClient

```python
from tinker import SamplingParams

sc = tc.save_weights_and_get_sampling_client(name="step_100")

response = sc.sample(
    prompt=model_input,
    num_samples=4,
    sampling_params=SamplingParams(max_tokens=256, temperature=1.0),
    include_prompt_logprobs=False,   # Set True to get per-token prompt logprobs
    topk_prompt_logprobs=0,          # Top-K logprobs per prompt token (0 = disabled)
)

for seq in response.sequences:
    print(seq.tokens, seq.logprobs, seq.stop_reason)

# Get logprobs for existing tokens (no generation)
logprobs_response = sc.compute_logprobs(prompt=model_input)

# Metadata
base_model = sc.get_base_model()    # Base model name string
tokenizer = sc.get_tokenizer()      # HuggingFace tokenizer
```

SamplingClient is picklable for multiprocessing use.

**Important:** Always create a **new** SamplingClient after saving weights. A stale client points at old weights.

## RestClient

For managing training runs and checkpoints. See also the `/tinker-cli` skill for CLI equivalents.

```python
rest = svc.create_rest_client()

# Training runs
runs = rest.list_training_runs(limit=20, offset=0, access_scope="owned")
run = rest.get_training_run(training_run_id="...")
run = rest.get_training_run_by_tinker_path(tinker_path="tinker://...")

# Checkpoints
checkpoints = rest.list_checkpoints(training_run_id="...")
all_checkpoints = rest.list_user_checkpoints(limit=100, offset=0)
rest.delete_checkpoint(training_run_id="...", checkpoint_id="...")
rest.delete_checkpoint_from_tinker_path(tinker_path="tinker://...")

# Checkpoint visibility
rest.publish_checkpoint_from_tinker_path(tinker_path="tinker://...")    # Make public
rest.unpublish_checkpoint_from_tinker_path(tinker_path="tinker://...")  # Make private

# Checkpoint TTL
rest.set_checkpoint_ttl_from_tinker_path(tinker_path="tinker://...", ttl_seconds=86400)

# Download URL
url_resp = rest.get_checkpoint_archive_url_from_tinker_path(tinker_path="tinker://...")

# Checkpoint metadata
info = rest.get_weights_info_by_tinker_path(tinker_path="tinker://...")
```

All RestClient methods have `_async` variants.

## Retry behavior

The Tinker SDK retries **all** HTTP API calls automatically (10 attempts, exponential backoff with jitter). Retried request types: timeouts (408), lock conflicts (409), rate limits (429), server errors (500+), and connection failures. The SDK respects `Retry-After` headers and attaches idempotency keys to non-GET requests.

Client errors (400, 401, 403, 404, 422) are **not** retried — these raise immediately (e.g., `tinker.BadRequestError`, `tinker.AuthenticationError`).

Override via `max_retries` on client creation:
```python
svc = tinker.ServiceClient(max_retries=3)   # reduce retries
svc = tinker.ServiceClient(max_retries=0)   # disable retries
```

**Do not** add retry wrappers around Tinker API calls in training loops — the SDK handles this. Enable retry logging with `logging.getLogger("tinker").setLevel(logging.DEBUG)`.

## Common pitfalls
- **Use ServiceClient** to create clients — `TrainingClient` and `SamplingClient` cannot be constructed directly
- Always await futures before submitting new forward_backward calls
- Submit `forward_backward_async` + `optim_step_async` back-to-back before awaiting
- Create a **new** SamplingClient after saving weights (sampler desync)
- Use `save_state` for resumable checkpoints, `save_weights_for_sampler` for sampling-only
- `forward()` computes loss without gradients — use for eval, not training


================================================
FILE: .claude/skills/tinker-types/SKILL.md
================================================
---
name: tinker-types
description: Reference for Tinker SDK types — Datum, ModelInput, TensorData, SamplingParams, response types, error types, and helper functions. Use when the user needs to build training data, construct model inputs, understand response objects, or handle errors.
---

# Tinker SDK Types

Quick reference for the core types used throughout the Tinker SDK and cookbook.

## Reference

Read `docs/api-reference/types.md` for the complete type reference.

## Core data types

### Type hierarchy
```
Datum
├── model_input: ModelInput (list of chunks)
│   ├── EncodedTextChunk (token IDs)
│   └── ImageChunk (vision inputs)
└── loss_fn_inputs: dict[str, TensorData]
    └── TensorData (numpy/torch wrapper)
```

### ModelInput
```python
from tinker import ModelInput

mi = ModelInput.from_ints([1, 2, 3, 4, 5])  # From token list
tokens = mi.to_ints()                        # Back to list
length = mi.length                           # Token count (property)
mi2 = mi.append(chunk)                       # Append a chunk
mi3 = mi.append_int(42)                      # Append a single token
mi_empty = ModelInput.empty()                # Empty input
```

### TensorData
```python
from tinker import TensorData

td = TensorData.from_numpy(np.array([1.0, 0.0, 1.0]))  # From numpy
td = TensorData.from_torch(torch.tensor([1.0, 0.0]))    # From torch
arr = td.to_numpy()                                       # Back to numpy
tensor = td.to_torch()                                    # Back to torch
lst = td.tolist()                                         # Back to list
# Fields: data (flat list), dtype ("int64"|"float32"), shape (optional)
```

### Datum
```python
from tinker import Datum, ModelInput, TensorData

datum = Datum(
    model_input=ModelInput.from_ints(tokens),
    loss_fn_inputs={"weights": TensorData.from_numpy(weights_array)},
)
```

## Configuration types

### SamplingParams
```python
from tinker import SamplingParams

params = SamplingParams(
    max_tokens=256,        # Max generation length
    temperature=1.0,       # Sampling temperature
    top_k=50,              # Top-K sampling (-1 = no limit)
    top_p=0.95,            # Nucleus sampling
    stop=["<|eot_id|>"],   # Stop sequences (strings or token IDs)
    seed=42,               # Reproducible seed
)
```

### AdamParams
```python
from tinker import AdamParams

adam = AdamParams(
    learning_rate=2e-4,
    beta1=0.9,             # Gradient moving average
    beta2=0.95,            # Gradient squared moving average
    eps=1e-12,             # Numerical stability
    weight_decay=0.0,      # Decoupled weight decay
    grad_clip_norm=1.0,    # Global gradient norm clipping (0.0 = disabled)
)
```

### LoraConfig
```python
from tinker import LoraConfig

config = LoraConfig(
    rank=32,               # LoRA rank
    seed=None,             # Initialization seed
    train_mlp=True,        # Train MLP layers
    train_attn=True,       # Train attention layers
    train_unembed=True,    # Train unembedding layer
)
```

## Response types

### ForwardBackwardOutput
Returned by `forward_backward()` and `forward()`:
```python
result = tc.forward_backward(data=batch, loss_fn="cross_entropy")
result.metrics              # dict[str, float] — training metrics (includes loss)
result.loss_fn_outputs      # list[LossFnOutput] — per-sample outputs
result.loss_fn_output_type  # str — loss output class name
```

### SampleResponse / SampledSequence
Returned by `sample()`:
```python
response = sc.sample(prompt=mi, num_samples=4, sampling_params=params)
response.sequences                # list[SampledSequence]
response.prompt_logprobs          # Optional[list[Optional[float]]] — per-prompt-token logprobs
response.topk_prompt_logprobs     # Optional[list[Optional[list[tuple[int, float]]]]] — top-K

for seq in response.sequences:
    seq.tokens       # list[int] — generated token IDs
    seq.logprobs     # Optional[list[float]] — per-token logprobs
    seq.stop_reason  # StopReason: "length" | "stop"
```

### Other response types
- `OptimStepResponse` — confirms parameter update
- `SaveWeightsResponse` — `path: str` (tinker:// path to saved weights)
- `LoadWeightsResponse` — confirms loaded weights
- `GetInfoResponse` — `model_data: ModelData` (model_name, lora_rank, tokenizer_id)
- `GetServerCapabilitiesResponse` — `supported_models: list[SupportedModel]`
- `WeightsInfoResponse` — `base_model`, `lora_rank`, `is_lora`, `train_mlp`, `train_attn`, `train_unembed`

## Checkpoint and run types

```python
from tinker import TrainingRun, Checkpoint, CheckpointType, ParsedCheckpointTinkerPath

# TrainingRun — metadata about a training run
run.training_run_id    # str
run.base_model         # str
run.is_lora            # bool
run.lora_rank          # Optional[int]
run.last_checkpoint    # Optional[Checkpoint]
run.user_metadata      # Optional[dict[str, str]]

# Checkpoint — metadata about a saved checkpoint
ckpt.checkpoint_id     # str
ckpt.checkpoint_type   # CheckpointType: "training" | "sampler"
ckpt.tinker_path       # str (tinker:// path)
ckpt.size_bytes        # Optional[int]
ckpt.public            # bool
ckpt.expires_at        # Optional[datetime]

# Parse a tinker:// path
parsed = ParsedCheckpointTinkerPath.from_tinker_path("tinker://run-id/weights/ckpt-id")
parsed.training_run_id  # str
parsed.checkpoint_type  # CheckpointType
parsed.checkpoint_id    # str
```

## Error types

All exceptions inherit from `tinker.TinkerError`:
- **`APIError`** → **`APIStatusError`**: `BadRequestError` (400), `AuthenticationError` (401), `PermissionDeniedError` (403), `NotFoundError` (404), `ConflictError` (409), `UnprocessableEntityError` (422), `RateLimitError` (429), `InternalServerError` (500+)
- **`APIConnectionError`**, **`APITimeoutError`**, **`APIResponseValidationError`**
- **`RequestFailedError`** — async request failure with error category

## Cookbook helper functions

Use these instead of manual Datum construction:
- `tinker_cookbook.supervised.data.conversation_to_datum(messages, renderer, max_length, train_on_what)` — full SL pipeline
- `tinker_cookbook.supervised.common.datum_from_model_input_weights(model_input, weights, max_length)` — from ModelInput + weights
- `renderer.build_supervised_example(messages)` — returns `(ModelInput, weights)`

## Common pitfalls
- Use helper functions instead of manual dict construction for Datum
- `TensorData` wraps arrays — don't pass raw numpy/torch directly to `loss_fn_inputs`
- `ModelInput.from_ints()` expects a flat list of integers, not nested lists
- `ModelInput.length` is a property, not a method
- Handle `tinker.RateLimitError` in production code with exponential backoff


================================================
FILE: .claude/skills/weights/SKILL.md
================================================
---
name: weights
description: Guide for the weight lifecycle — downloading trained weights from Tinker, merging LoRA adapters into HuggingFace models, and publishing to HuggingFace Hub. Use when the user asks about exporting, downloading, merging, or publishing trained model weights.
---

# Weight Lifecycle

The `tinker_cookbook.weights` subpackage provides a standard pipeline for trained weight management: **download → build → publish**.

## Reference

Read these for details:
- `tinker_cookbook/weights/__init__.py` — API overview and workflow example
- `tinker_cookbook/weights/_download.py` — Download implementation
- `tinker_cookbook/weights/_export.py` — LoRA merge implementation
- `tinker_cookbook/weights/_publish.py` — HuggingFace Hub publish
- `docs/download-weights.mdx` — Download guide
- `docs/publish-weights.mdx` — Publishing guide
- `docs/save-load.mdx` — Checkpointing (save_weights_for_sampler vs save_state)

## Full workflow

```python
from tinker_cookbook import weights

# Step 1: Download adapter from Tinker
adapter_dir = weights.download(
    tinker_path="tinker://run-id/sampler_weights/final",
    output_dir="./adapter",
)

# Step 2: Merge LoRA adapter into base model
weights.build_hf_model(
    base_model="Qwen/Qwen3.5-35B-A3B",
    adapter_path=adapter_dir,
    output_path="./model",
    dtype="bfloat16",  # or "float16", "float32"
)

# Step 3: Publish to HuggingFace Hub
url = weights.publish_to_hf_hub(
    model_path="./model",
    repo_id="user/my-finetuned-model",
    private=True,
)
```

## API reference

### `weights.download()`
Downloads and extracts a checkpoint archive from Tinker.

```python
adapter_dir = weights.download(
    tinker_path="tinker://run-id/sampler_weights/final",  # Tinker checkpoint path
    output_dir="./adapter",      # Local directory to extract to
    base_url=None,               # Optional custom Tinker API URL
)
# Returns: path to extracted directory
```

### `weights.build_hf_model()`
Merges a LoRA adapter into a base model, producing a full HuggingFace model.

```python
weights.build_hf_model(
    base_model="Qwen/Qwen3-8B",     # HF model name or local path
    adapter_path="./adapter",        # Directory with adapter_model.safetensors
    output_path="./model",           # Where to save merged model
    dtype="bfloat16",                # Weight dtype
    trust_remote_code=None,          # Override HF_TRUST_REMOTE_CODE
)
```

### `weights.publish_to_hf_hub()`
Pushes a local model directory to HuggingFace Hub.

```python
url = weights.publish_to_hf_hub(
    model_path="./model",                    # Local model directory
    repo_id="user/my-finetuned-model",       # HF repo ID
    private=True,                            # Private repo
    token=None,                              # HF token (uses HF_TOKEN env var if None)
)
# Returns: URL to published repo
```

### `weights.build_lora_adapter()` (not yet implemented)
Convert Tinker LoRA adapter to standard format for vLLM/SGLang. Currently raises `NotImplementedError` — use `build_hf_model()` instead.

## Checkpoint types (during training)

During training, there are two types of checkpoints:

- **`save_state()`** — Full state (weights + optimizer). Used for **resuming** training.
- **`save_weights_for_sampler()`** — Weights only. Used for **sampling** and **export**.

The `weights.download()` function works with sampler weights (`save_weights_for_sampler` checkpoints).

## Common pitfalls
- `download()` expects a `tinker://` path from `save_weights_for_sampler`, not `save_state`
- `build_hf_model()` requires the base model to be downloadable from HuggingFace
- Set `HF_TOKEN` environment variable for private models and publishing
- `dtype="bfloat16"` is recommended for most models


================================================
FILE: .github/workflows/claude-review.yml
================================================
name: Claude Code

permissions:
  contents: write        # allow Claude to edit files & push commits
  pull-requests: write   # allow PR comments/reviews & PR creation
  issues: write          # allow issue comments & labels
  actions: read

on:
  # Respond to @claude mentions in PRs & issues (trusted users only)
  issue_comment:
    types: [created]
  pull_request_review_comment:
    types: [created]
  issues:
    types: [opened]

env:
  CLAUDE_ARGS: >
    --model claude-opus-4-5-20251101
    --max-turns 50
    --allowedTools "Read" "Write" "Edit" "MultiEdit"
    "Glob" "Grep" "LS"
    "Bash(git:*)" "Bash(gh:*)"
    "mcp__github_inline_comment__create_inline_comment"

jobs:
  claude_mention:
    name: Respond to @claude in issues & PRs
    runs-on: ubuntu-latest
    # Only trusted users (OWNER, MEMBER, COLLABORATOR) can trigger Claude
    if: >
      (github.event_name == 'issue_comment' &&
       contains(github.event.comment.body, '@claude') &&
       contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)) ||
      (github.event_name == 'pull_request_review_comment' &&
       contains(github.event.comment.body, '@claude') &&
       contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association)) ||
      (github.event_name == 'issues' &&
       contains(github.event.issue.body, '@claude') &&
       contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.issue.author_association))
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
        with:
          fetch-depth: 1

      - name: Claude on @mention
        uses: anthropics/claude-code-action@v1
        with:
          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
          github_token: ${{ secrets.GITHUB_TOKEN }}
          claude_args: ${{ env.CLAUDE_ARGS }}


================================================
FILE: .github/workflows/downstream-compat.yaml
================================================
name: downstream-compat

on:
  workflow_dispatch:
  push:
    branches: [main]
  pull_request:

jobs:
  downstream-compat:
    runs-on: ubuntu-latest

    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: venv
        run: uv venv && uv sync --all-extras

      - name: downstream compat tests
        run: uv run pytest tests/downstream_compat/ -v
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}


================================================
FILE: .github/workflows/nightly.yaml
================================================
name: nightly

on:
  workflow_run:
    workflows: ["smoke-test-recipes"]
    types: [completed]
  workflow_dispatch:

permissions:
  contents: write  # needed to create/delete releases

jobs:
  build-and-release:
    runs-on: ubuntu-latest
    # Only run on the upstream repo (forks lack secrets and shouldn't publish releases)
    # and only if: manually triggered, or smoke tests passed on schedule
    if: >
      github.repository == 'thinking-machines-lab/tinker-cookbook' &&
      (github.event_name == 'workflow_dispatch' ||
       (github.event.workflow_run.conclusion == 'success' &&
        github.event.workflow_run.event == 'schedule'))
    steps:
      - name: checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: build
        run: uv build

      - name: smoke test
        run: |
          uv run python -c "import tinker_cookbook; print(f'Version: {tinker_cookbook.__version__}')"

      - name: get version
        id: version
        run: |
          VERSION=$(uv run python -c "import tinker_cookbook; print(tinker_cookbook.__version__)")
          echo "version=$VERSION" >> "$GITHUB_OUTPUT"

      - name: upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: tinker-cookbook-nightly
          path: dist/
          retention-days: 7

      - name: delete existing nightly release
        run: gh release delete nightly --yes --cleanup-tag || true
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: create nightly release
        run: |
          VERSION="${{ steps.version.outputs.version }}"
          REPO="${{ github.repository }}"
          SHORT_SHA="${GITHUB_SHA::8}"
          {
            echo "Automated nightly build from \`main\` at $(date -u '+%Y-%m-%d %H:%M UTC')."
            echo ""
            echo "**Version:** \`${VERSION}\`"
            echo "**Commit:** [\`${SHORT_SHA}\`](https://github.com/${REPO}/commit/${GITHUB_SHA})"
            echo ""
            echo "### Install"
            echo "\`\`\`bash"
            echo "pip install 'tinker_cookbook @ https://github.com/${REPO}/releases/download/nightly/tinker_cookbook-${VERSION}-py3-none-any.whl'"
            echo "\`\`\`"
          } > /tmp/release-notes.md
          gh release create nightly dist/* \
            --prerelease \
            --title "Nightly Build (${VERSION})" \
            --notes-file /tmp/release-notes.md
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/pre-commit.yaml
================================================
name: pre-commit

on:
  push:
    branches: [ main ]
  pull_request:

jobs:
  pre-commit:
    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: pre-commit
      uses: pre-commit/action@v3.0.1


================================================
FILE: .github/workflows/publish-pypi.yaml
================================================
name: publish-pypi

on:
  push:
    tags: ["v[0-9]+.[0-9]+.[0-9]+"]  # only semver tags like v1.2.3
  workflow_dispatch:
    inputs:
      tag:
        description: "Git tag to publish (e.g. v0.2.0). Must already exist."
        required: true

jobs:
  publish:
    runs-on: ubuntu-latest
    if: github.repository == 'thinking-machines-lab/tinker-cookbook'

    steps:
      - name: determine ref
        id: ref
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            echo "ref=${{ inputs.tag }}" >> "$GITHUB_OUTPUT"
          else
            echo "ref=${{ github.ref }}" >> "$GITHUB_OUTPUT"
          fi

      - name: checkout
        uses: actions/checkout@v4
        with:
          ref: ${{ steps.ref.outputs.ref }}
          fetch-depth: 0  # hatch-vcs needs full history for version

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: build
        run: uv build

      - name: verify version matches tag
        run: |
          BUILT_VERSION=$(ls dist/*.tar.gz | sed 's/.*tinker_cookbook-//;s/\.tar\.gz//')
          TAG_VERSION="${GITHUB_REF_NAME#v}"
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            TAG_VERSION="${{ inputs.tag }}"
            TAG_VERSION="${TAG_VERSION#v}"
          fi
          echo "Built version: $BUILT_VERSION"
          echo "Tag version: $TAG_VERSION"
          if [ "$BUILT_VERSION" != "$TAG_VERSION" ]; then
            echo "ERROR: Built version ($BUILT_VERSION) does not match tag ($TAG_VERSION)"
            exit 1
          fi

      - name: run smoke test
        run: |
          uv run python -c "import tinker_cookbook; print(f'Version: {tinker_cookbook.__version__}')"

      - name: publish
        run: uv publish --token="$PYPI_TOKEN"
        env:
          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}


================================================
FILE: .github/workflows/pyright.yaml
================================================
name: pyright

on:
  push:
    branches: [main]
  pull_request:

jobs:
  type-check:
    runs-on: ubuntu-latest

    strategy:
      matrix:
        transformers-version: ["4.57.6", "5.3.0"]

    name: type-check (transformers ${{ matrix.transformers-version }})

    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: venv
        run: uv venv && uv sync --all-extras

      - name: pin transformers
        run: uv pip install transformers==${{ matrix.transformers-version }}

      - name: pyright
        run: uv run pyright tinker_cookbook


================================================
FILE: .github/workflows/pytest.yaml
================================================
name: pytest

on:
  workflow_dispatch:
  push:
    branches: [main]
  pull_request:

jobs:
  test:
    runs-on: ubuntu-latest

    strategy:
      matrix:
        transformers-version: ["4.57.6", "5.3.0"]

    name: test (transformers ${{ matrix.transformers-version }})

    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: venv
        run: uv venv && uv sync --all-extras

      - name: pin transformers
        run: uv pip install transformers==${{ matrix.transformers-version }}

      - name: pytest (unit)
        run: uv run pytest tinker_cookbook/
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}


================================================
FILE: .github/workflows/smoke-test-evals.yaml
================================================
name: smoke-test-evals

on:
  workflow_dispatch:  # manual trigger
  schedule:
    - cron: "0 7 * * *"  # daily at 7am UTC (1h after recipes)

# Only one eval smoke test run at a time to avoid API contention
concurrency:
  group: smoke-test-evals
  cancel-in-progress: true

jobs:
  smoke-test:
    if: github.repository == 'thinking-machines-lab/tinker-cookbook'
    runs-on: ubuntu-latest
    timeout-minutes: 10

    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: venv
        run: uv venv && uv sync --all-extras

      - name: run eval smoke tests
        env:
          TINKER_API_KEY: ${{ secrets.TINKER_API_KEY }}
        run: uv run pytest tests/test_inspect_eval.py -v -x -s


================================================
FILE: .github/workflows/smoke-test-recipes.yaml
================================================
name: smoke-test-recipes

on:
  workflow_dispatch:  # manual trigger
  schedule:
    - cron: "0 6 * * *"  # daily at 6am UTC

# Only one smoke test run at a time to avoid API contention
concurrency:
  group: smoke-test-recipes
  cancel-in-progress: true

jobs:
  # Discover all smoke test files so the matrix is auto-generated.
  # Adding a new test file in tests/ automatically adds a CI job.
  discover:
    if: github.repository == 'thinking-machines-lab/tinker-cookbook'
    runs-on: ubuntu-latest
    outputs:
      tests: ${{ steps.find.outputs.tests }}
    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: find smoke tests
        id: find
        run: |
          tests=$(find tests/recipes -maxdepth 1 -name 'test_*.py' -printf '%f\n' \
            | sed 's/\.py$//' \
            | jq -R -s -c 'split("\n") | map(select(length > 0))')
          echo "tests=$tests" >> "$GITHUB_OUTPUT"

  smoke-test:
    if: github.repository == 'thinking-machines-lab/tinker-cookbook'
    needs: discover
    runs-on: ubuntu-latest
    timeout-minutes: 35

    strategy:
      fail-fast: false
      matrix:
        test: ${{ fromJson(needs.discover.outputs.tests) }}

    name: ${{ matrix.test }}

    steps:
      - name: checkout
        uses: actions/checkout@v4

      - name: install-uv
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true

      - name: venv
        run: uv venv && uv sync --all-extras

      - name: run smoke test
        env:
          TINKER_API_KEY: ${{ secrets.TINKER_API_KEY }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: uv run pytest tests/recipes/${{ matrix.test }}.py -v -x -s


================================================
FILE: .gitignore
================================================
**/__pycache__
tinker_cookbook/_version.py
.DS_Store
.env
.env.*
.venv
uv.lock


================================================
FILE: .pre-commit-config.yaml
================================================
default_install_hook_types: [pre-commit, pre-push]

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
  rev: v5.0.0
  hooks:
    - id: check-added-large-files
      args: ["--maxkb=500"]
    - id: end-of-file-fixer
      exclude: |
          (?x)
          ^(
            \.sync_state
          )$
    - id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
  rev: v0.13.2
  hooks:
    # Run the linter.
    - id: ruff-check
      exclude: tool_declaration_ts\.py$
    # Run the formatter.
    - id: ruff-format
      exclude: tool_declaration_ts\.py$


================================================
FILE: .sync_state
================================================
{
  "last_synced_sha": "b4fee215e812ae5a6b0096ba37b3d9edc4f99cd5",
  "last_sync_time": "2025-10-09T00:09:30.116486"
}


================================================
FILE: AGENTS.md
================================================
# Tinker Cookbook Agent Guide

Quick reference for agents working on `tinker-cookbook`. Full documentation is in `docs/`.

`tinker-cookbook` is a client library with training and eval code built on the Tinker service (hosted by Thinking Machines Lab) and the Tinker SDK (a separate repo with just the API). You author training/eval loops that run on a CPU machine; Tinker executes the heavy GPU work.

**Start here:** `docs/training-sampling.mdx` - Complete walkthrough of training and sampling basics.

## Documentation Map (`docs/`)

**API Fundamentals:**
- `index.mdx` - Tinker overview, division of responsibilities
- `install.mdx` - Installation, API key setup
- `training-sampling.mdx` - **Starter guide**: data prep, forward_backward, sampling, vision inputs
- `losses.mdx` - Loss functions (cross_entropy, importance_sampling, ppo, cispo, dro, forward_backward_custom)
- `save-load.mdx` - Checkpointing (save_weights_for_sampler vs save_state)
- `async.mdx` - Sync/async APIs, futures, overlapping requests
- `model-lineup.mdx` - Available models
- `under-the-hood.mdx` - Clock cycles, worker pools

**API Reference (`api-reference/`):**
- `types.md` - **All API types** (Datum, ModelInput, TensorData, SamplingParams, etc.)
- `trainingclient.md`, `samplingclient.md`, `serviceclient.md`, `restclient.md` - Client APIs

**Supervised Learning (`supervised-learning/`):**
- `../supervised-learning.mdx` - SL overview
- `sl-basic.mdx` - First SL run
- `sl-hyperparams.mdx` - LR formula, batch size
- `sl-loop.mdx` - Minimal training loop
- `prompt-distillation.mdx` - Distilling prompts
- `sweep-case-study.mdx` - Hyperparameter sweeps

**Reinforcement Learning (`rl/`):**
- `../rl.mdx` - RL overview (RLVR, RLHF)
- `rl-basic.mdx` - First RL run
- `rl-envs.mdx` - Custom Env, EnvGroupBuilder, RLDataset
- `rl-loops.mdx` - Minimal RL loop
- `rl-hyperparams.mdx` - batch_size vs group_size, async training
- `sequence-extension.mdx` - Multi-turn RL, KV-cache

**Preferences (`preferences/`):**
- `../preferences.mdx` - DPO vs RLHF overview
- `dpo-guide.mdx` - DPO training
- `rlhf-example.mdx` - RLHF pipeline

**Other:**
- `rendering.mdx` - Renderers (bridge between chat-style data and token sequences), vision inputs, TrainOnWhat
- `completers.mdx` - TokenCompleter vs MessageCompleter
- `evals.mdx` - Inline evals, Inspect AI, custom evaluators
- `lora-primer.mdx` - LoRA background
- `download-weights.mdx` / `publish-weights.mdx` - Weight export

---

## Composing Types

Agents often struggle with the nested type hierarchy. Key resources:

**Reference:** `docs/api-reference/types.md` documents all API types.

**Core types:**
- `Datum` = `model_input` (ModelInput) + `loss_fn_inputs` (dict of TensorData)
- `ModelInput` = list of chunks (EncodedTextChunk, ImageChunk)
- `TensorData` = wrapper for numpy/torch arrays with shape info

**Helper functions** (use these instead of manual construction):
- `datum_from_model_input_weights(model_input, weights, max_length)` - SL datum creation (`supervised/common.py`)
- `conversation_to_datum(messages, renderer, max_length, train_on_what)` - Full pipeline (`supervised/data.py`)
- `renderer.build_supervised_example(messages)` - Returns (ModelInput, weights)
- `ModelInput.from_ints(tokens)` - Create from token list
- `TensorData.from_numpy(arr)` / `TensorData.from_torch(tensor)` - Wrap arrays

---

## Architecture

**Builder pattern:** Config objects are `chz` dataclasses (SupervisedDatasetBuilder, RLDatasetBuilder, EnvGroupBuilder). They expose `.build()`/`__call__()` returning runtime objects.

**Key code locations:**
- SL: `tinker_cookbook/supervised/train.py`
- RL: `tinker_cookbook/rl/train.py`
- DPO: `tinker_cookbook/preference/train_dpo.py`
- Renderers: `tinker_cookbook/renderers/`
- Completers: `tinker_cookbook/completers.py`
- RL types: `tinker_cookbook/rl/types.py`
- Rollout strategies: `tinker_cookbook/rl/rollout_strategy.py` (FailFast, RetryOnFailure)
- Logging: `tinker_cookbook/utils/logtree.py`, `tinker_cookbook/rl/rollouts.py`
- Recipes: `tinker_cookbook/recipes/`

**Training outputs:** RL and SL training write human-readable HTML reports and machine-readable JSON files (metrics, rollout transcripts, per-trajectory summaries) to `log_path`. Point agents at a `log_path` directory to analyze training runs — `metrics.jsonl` for scalar metrics, `*_rollout_summaries.jsonl` for per-trajectory data, and `*_logtree.json` for full rollout transcripts including model responses. See `docs/rl/rl-logging.mdx` for the complete file reference and parsing examples.

---

## Conventions

**Subscript suffixes** for tensor names: `_P` (problems), `_G` (groups), `_T` (tokens), `_D` (datums). Example: `tokens_P_G_T[p][g][t]`

**Code style:**
- Explicit typing; avoid `Any` / `type: ignore`
- Use `safezip`, `timed`, `scope` helpers
- `@chz.chz` decorator for config serialization
- `ml_log.log_metrics` for metrics; `logtree` for transcripts

**Env lifecycle:** `Env` objects are single-use (no reset). Create via `EnvGroupBuilder`.

---

## Common Pitfalls

1. **LoRA LR:** Use `hyperparam_utils.get_lr(model_name)` - LoRA needs ~10x higher LR than full fine-tuning.

2. **Renderer mismatch:** Match `renderer_name` to model family (`llama3`, `qwen3`, `role_colon`).

3. **Async gaps:** Submit `forward_backward_async` and `optim_step_async` back-to-back before awaiting.

4. **Sampler desync:** Create a **new** sampling client after saving weights.

5. **Type construction:** Use helper functions, not manual dict construction. See `supervised/data.py` and `supervised/common.py`.

6. **Group semantics:** RL advantages are centered within each group.

7. **DPO:** Start with `dpo_beta=0.1`, LR~1e-5.

---

## Testing

```bash
# Unit tests (no API needed, colocated *_test.py files)
pytest tinker_cookbook/

# Smoke tests (requires TINKER_API_KEY + network)
pytest tests/
```

For debugging, shrink workloads via `n_batches`, `batch_size`, `group_size` in dataset builders.


================================================
FILE: CHANGELOG.md
================================================
# Changelog

A curated feed of notable changes to `tinker-cookbook`. Small bugfixes and minor argument additions are omitted—this is for changes worth knowing about.

## Format

Each entry includes:
- **Title**: A short, human-readable summary (not the commit message)
- **Date**: When it was merged
- **Type**: `new` (feature), `improvement` (enhancement to existing functionality), or `fix`
- **Tags**: What area it touches (e.g., `renderers`, `rl`, `supervised`, `eval`, `datasets`)
- **PR**: Link to the pull request

---

### [cookbook] Cap training steps with `max_step` parameter ([#328](https://github.com/thinking-machines-lab/tinker-cookbook/pull/328))
**Date:** 2026-01-28
**Type:** new
**Tags:** rl, supervised

Adds optional `max_step` config parameter to cap training steps in on-policy distillation. When set, trains for `min(max_step, dataset_length)`. Default `None` preserves existing behavior.

---

### [cookbook] Configurable KL penalty reference model ([#326](https://github.com/thinking-machines-lab/tinker-cookbook/pull/326))
**Date:** 2026-01-27
**Type:** new
**Tags:** rl

Makes the KL penalty reference model configurable in RL training. Users can now specify a different base model or a checkpoint for the KL penalty computation, rather than using the default.

---

### [cookbook] Checkpoints now have 7-day TTL by default ([#324](https://github.com/thinking-machines-lab/tinker-cookbook/pull/324))
**Date:** 2026-01-27
**Type:** improvement
**Tags:** infrastructure

Checkpoints are now set to auto-expire after 7 days by default, helping users avoid unexpected storage costs.

---

### [cookbook] Support for dedicated capacity ([#315](https://github.com/thinking-machines-lab/tinker-cookbook/pull/315))
**Date:** 2026-01-21
**Type:** new
**Tags:** infrastructure

Adds support for dedicated capacity in training configurations.

---

### [cookbook] Modal sandbox backend for code execution ([#278](https://github.com/thinking-machines-lab/tinker-cookbook/pull/278), [#291](https://github.com/thinking-machines-lab/tinker-cookbook/pull/291), [#300](https://github.com/thinking-machines-lab/tinker-cookbook/pull/300), [#302](https://github.com/thinking-machines-lab/tinker-cookbook/pull/302))
**Date:** 2026-01-07 to 2026-01-15
**Type:** new
**Tags:** sandboxes, rl

Adds Modal as an alternative sandbox backend for code execution alongside SandboxFusion. Includes:
- `ModalSandbox` and `ModalSandboxPool` for managing sandboxes
- Warm pool maintenance with configurable timeouts
- Rate limiting to respect Modal account limits
- Async API calls for better performance
- Documentation for both sandbox backends

See `tinker_cookbook/sandbox/` for the new module structure.

---

### [cookbook] Fix streaming dataset batch skipping ([#295](https://github.com/thinking-machines-lab/tinker-cookbook/pull/295))
**Date:** 2026-01-19
**Type:** fix
**Tags:** supervised

HuggingFace's shuffle is deterministic, so batch skipping now works correctly with streaming datasets. Forward skipping through batches no longer causes data inconsistencies.

---

### [cookbook] Fix supervised metrics from OptimStepResponse ([#286](https://github.com/thinking-machines-lab/tinker-cookbook/pull/286))
**Date:** 2026-01-20
**Type:** fix
**Tags:** supervised

Previously, optimization metrics (like gradient norms) from `OptimStepResponse` were being dropped in `finish_batch`. Metrics are now properly captured and merged into the step's metrics dictionary.

---

### [cookbook] Adapter to base-model merge script ([#292](https://github.com/thinking-machines-lab/tinker-cookbook/pull/292))
**Date:** 2026-01-08
**Type:** new
**Tags:** tools

New script to merge LoRA/adapter weights back into the base model.

---

### [cookbook] Fix inspect_utils for list content from parse_response ([#299](https://github.com/thinking-machines-lab/tinker-cookbook/pull/299))
**Date:** 2026-01-12
**Type:** fix
**Tags:** eval

Fixed `inspect_utils.py` which assumed `parse_response` always returns string content. Renderers like `Qwen3Renderer` return list content (with `ThinkingPart`, `ToolCallPart`, etc.) when responses contain `<think>` or `<tool_call>` blocks. Now uses `renderers.get_text_content()` which handles both formats.

---

### [cookbook] Fix Kimi K2 and DeepSeek V3 renderer parsing ([#279](https://github.com/thinking-machines-lab/tinker-cookbook/pull/279), [#285](https://github.com/thinking-machines-lab/tinker-cookbook/pull/285))
**Date:** 2026-01-05 to 2026-01-07
**Type:** fix
**Tags:** renderers

Fixes tool declaration rendering for Kimi K2 and Qwen3 to match HuggingFace templates. Also fixes DeepSeekV3ThinkingRenderer to properly parse thinking traces via a round-trip test ensuring `build_supervised_example` and `parse_response` correspondence.

---

### [sdk] Torch is now an optional dependency ([#15](https://github.com/thinking-machines-lab/tinker/pull/15))
**Date:** 2026-01-20
**Type:** improvement
**Tags:** dependencies

Moves torch to an optional dependency in the SDK. Applications that don't need torch for training can now use the SDK without installing it. Import guards added to `training_client.py`.

---

### Major renderer overhaul: tool calling, structured content ([#220](https://github.com/thinking-machines-lab/tinker-cookbook/pull/220), [#221](https://github.com/thinking-machines-lab/tinker-cookbook/pull/221), [#238](https://github.com/thinking-machines-lab/tinker-cookbook/pull/238), [#243](https://github.com/thinking-machines-lab/tinker-cookbook/pull/243), [#244](https://github.com/thinking-machines-lab/tinker-cookbook/pull/244), [#250](https://github.com/thinking-machines-lab/tinker-cookbook/pull/250))
**Date:** 2025-12-26 to 2025-12-28
**Type:** improvement
**Tags:** renderers, rl

A series of PRs that significantly improve the renderer system:

**Tool calling support:** New `ToolSpec` type for defining tools and `create_conversation_prefix_with_tools()` API on all renderers. Tool call parsing supported for Qwen3, DeepSeek V3, and Kimi K2. `UnparsedToolCall` captures tool calls that fail to parse.

**Structured message content:** The `Message.thinking` field is removed (**breaking**). Thinking content is now represented as `ThinkingPart` in the content list, alongside `TextPart`, `ImagePart`, and `ToolCallPart`. Use `get_text_content(message)` to extract text after `parse_response`.

**Clearer field names:** `RenderedMessage` fields renamed (**breaking**): `prefix` → `header`, `content` → `output`, `suffix` → `stop_overlap`. `Renderer` changed from Protocol to ABC.

**Sequence extension property:** New `has_extension_property` on `Renderer` indicates whether consecutive timesteps can be merged for O(T) instead of O(T²) compute in multi-turn RL.

**Modular architecture:** `renderers.py` split into `tinker_cookbook/renderers/` package with per-model modules (`qwen3.py`, `deepseek_v3.py`, `kimi_k2.py`, etc.). Imports unchanged.

**HF compatibility:** Various fixes to match HuggingFace chat templates, with expanded test coverage using random conversation generation.

---

### Qwen3 thinking blocks can now be preserved in history ([#142](https://github.com/thinking-machines-lab/tinker-cookbook/pull/142))
**Date:** 2025-12-06
**Type:** new
**Tags:** renderers, rl

The Qwen3Renderer now has a `strip_thinking_from_history` option. By default (`True`), it strips `<think>...</think>` blocks from previous assistant turns—matching how Qwen3 was trained. Set it to `False` if you're doing multi-turn RL and want to use sequence extension: preserving thinking lets turns merge into one sequence, reducing compute cost.

---

### Disable checkpoint saving with `save_every=0` ([#149](https://github.com/thinking-machines-lab/tinker-cookbook/pull/149))
**Date:** 2025-12-06
**Type:** improvement
**Tags:** supervised, rl

Setting `save_every=0` now disables checkpoint saving entirely (previously it crashed with a divide-by-zero). Useful for quick test runs where you don't need checkpoints.

---

### xmux: launch experiment sweeps in tmux ([#138](https://github.com/thinking-machines-lab/tinker-cookbook/pull/138))
**Date:** 2025-12-02
**Type:** new
**Tags:** tools

New `xmux` utility for running experiment sweeps. It spawns parallel jobs in a tmux session where you can monitor each experiment's progress in separate windows. See `tinker_cookbook/xmux/examples/` for usage.

---

### Optimizer state now loads correctly on resume ([#140](https://github.com/thinking-machines-lab/tinker-cookbook/pull/140), [#141](https://github.com/thinking-machines-lab/tinker-cookbook/pull/141))
**Date:** 2025-12-02
**Type:** fix
**Tags:** supervised, rl

Training resumption now properly loads optimizer state (momentum, etc.) alongside model weights. Previously, `load_state()` didn't restore the optimizer, which could affect training dynamics after a checkpoint resume.

---

### Tracing support for supervised training ([#88](https://github.com/thinking-machines-lab/tinker-cookbook/pull/88))
**Date:** 2025-11-21
**Type:** new
**Tags:** supervised, tools

Set `enable_trace=True` to generate trace events during supervised training. Visualize with Perfetto to see where time is spent. Run `python -m tinker_cookbook.utils.trace` to convert the trace file.

---

### Code RL recipe with DeepCoder ([#83](https://github.com/thinking-machines-lab/tinker-cookbook/pull/83))
**Date:** 2025-11-18
**Type:** new
**Tags:** recipes, rl

New recipe for RL on competitive programming problems using the DeepCoder dataset. Code execution is sandboxed via Sandbox Fusion. See `tinker_cookbook/recipes/code_rl/`.

---

### Configurable temperature for RL sampling ([#86](https://github.com/thinking-machines-lab/tinker-cookbook/pull/86))
**Date:** 2025-11-17
**Type:** new
**Tags:** rl

Temperature is now a configurable parameter in RL configs. Previously hardcoded to 1.0.

---

### Per-message training control with `TrainOnWhat.CUSTOMIZED` ([#85](https://github.com/thinking-machines-lab/tinker-cookbook/pull/85))
**Date:** 2025-11-14
**Type:** new
**Tags:** supervised, renderers

New `TrainOnWhat.CUSTOMIZED` option lets you set a `trainable: bool` field on each message to control which messages get loss applied. Useful for training on specific turns in a conversation.

---

### Interactive environment debugging with `play_w_env` ([#76](https://github.com/thinking-machines-lab/tinker-cookbook/pull/76))
**Date:** 2025-11-07
**Type:** new
**Tags:** rl, tools

New utility to "role-play" as the policy and interact with an Environment. Useful for debugging reward functions and environment logic. See `tinker_cookbook/recipes/multiplayer_rl/twenty_questions/play.py` for an example.

---


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Tinker Cookbook

We welcome contributions! This project is built in the spirit of open science and collaborative development.

## Development setup

```bash
git clone https://github.com/thinking-machines-lab/tinker-cookbook.git
cd tinker-cookbook
uv sync --extra dev
pre-commit install
```

This installs dev dependencies and registers pre-commit hooks that run `ruff` formatting and linting on every commit.

## Running tests

```bash
# Unit tests (no API key needed, colocated *_test.py files)
uv run pytest tinker_cookbook/

# Integration tests (requires TINKER_API_KEY)
uv run pytest tests/
```

## Code style

We use [ruff](https://docs.astral.sh/ruff/) for linting and formatting (line length: 100). Pre-commit hooks run automatically on each commit.

```bash
uv run ruff check tinker_cookbook/
uv run ruff format tinker_cookbook/
```

## Type checking

We use [pyright](https://github.com/microsoft/pyright) for static type analysis. Please use typing wherever possible; avoid `Any` and `type: ignore`; prefer casting. However, avoid convoluted generics or overly verbose code just to satisfy the type checker. Prefer single types over union types.

```bash
uv run pyright tinker_cookbook
```

## Pull request process

1. Create a feature branch from `main`
2. Make your changes with tests if applicable
3. Ensure all checks pass: `pre-commit run --all-files`
4. Open a PR with a clear description of the change

CI runs pre-commit, pyright, and pytest on every PR.

## Project structure

- `tinker_cookbook/` — Library code (supervised learning, RL, renderers, utilities)
- `tinker_cookbook/recipes/` — Example training scripts
- `tests/` — Integration tests (require API key)
- `docs/` — Documentation (MDX format, synced to docs site)

---

# Design conventions

## Organization of training scripts

We're designing the codebase with the following goals:

1. Low barrier to entry: it should be dead simple to run something and see numbers go up.
2. Extensible: it should be possible to pass in custom datasets and evals and control all the hyperparameters.
3. Science-friendly: it should be easy to run sweeps, and analyze the results.

To achieve this, we'll use the following structure around training scripts:

- There's a main training function, such as [rl/train.py](tinker_cookbook/rl/train.py) or [supervised/train.py](tinker_cookbook/supervised/train.py), which contains the main loop.
    - This function contains a detailed config object (`Config`), which isn't constructable from the command line.
    - The config contains members that specify things like datasets and evals. However, these should be chz configs (with a `.build` method that constructs the actual object) or callables (we recommend using functools.partial). This way, the config is serializable, which is useful for sweeps.
- There are launch scripts that assemble training configs (e.g., [recipes/math_rl/train.py](tinker_cookbook/recipes/math_rl/train.py)), which construct a smaller config object (`CLIConfig`) from the command line.

## Async

Async is very useful for RL, where it allows us to make many queries in parallel (e.g., sampling calls). For all of the interfaces used in RL (such as the `Env` class), all the methods that take nontrivial amounts of time should be async. For some of the other code, such as [recipes/sl_loop.py](tinker_cookbook/recipes/sl_loop.py), we've chosen not to use async methods, just to make it more beginner-friendly, as many python programmers are not familiar with async.

## Classes

There are a lot of different classes, which might make the code feel less approachable. However, they follow *the builder pattern*, and the code should be less confusing when you know the pattern.

We can illustrate the pattern with the two main examples:

- A `SupervisedDatasetBuilder` is a configuration object which builds a `SupervisedDataset`.
- An `RLDatasetBuilder` is a configuration object which builds an `RLDataset`, which generates batches of `EnvGroupBuilder` objects, which each generate a group of `Env` objects.

Here, the `SupervisedDatasetBuilder`, `RLDatasetBuilder`, and `EnvGroupBuilder` are all configuration objects, which have a `__call__` method that builds another object. You can see these objects in [supervised/types.py](tinker_cookbook/supervised/types.py) and [rl/types.py](tinker_cookbook/rl/types.py).

In general, we use a lot of configuration objects, with a `__call__` method that returns a heavyweight object (like a dataset). We use `chz` for the configuration objects -- it's similar to a dataclass but with some extra features that are nice for configs. We use either dataclasses or regular python classes for the heavyweight objects.

## Envs

An `Env` is an RL environment. For those with an RL background, it roughly corresponds to an MDP or a POMDP, however we use in more general cases (such as multi-agent settings) that don't strictly correspond to the MDP/POMDP formalism. It's roughly analogous the concept of an Env in OpenAI Gym, but unlike OpenAI Gym, we don't have a `reset` method; rather, the env should be discarded after a rollout. Any shared resources should be maintained by whatever object is creating the envs.

The `Env`s are created by `EnvGroupBuilder`s. The group of envs returned by `EnvGroupBuilder` have something in common; either they correspond to the same task (in which case we can use this information for variance reduction, as in GRPO, which centers per group); or, we can use the group to define a multi-agent environment.

- One common multi-agent environment is where we use a pairwise preference model to compare pairs of completions.
- We can also use the group to define a two-player game. Some two player games such as tic-tac-toe are currently supported through the [text_arena](tinker_cookbook/recipes/multiplayer_rl/text_arena/env.py) environments.


## Notation

We'll use subscripts to indicate the shapes of objects. For example, `tokens_P_G_T` indicates a three-dimensional array of tokens, with `P` problems, `G` groups, and `T` tokens per groups, so `tokens_P_G_T[p][g][t]` should refer to a single token. In many cases, the arrays will be ragged. E.g., the `T` axis will have different lengths for different `(p,g)`. Sometimes, a given dimension will be flattened from two dimensions. If we write `tokens_PG_T`, that means that we have a two dimensional array, where the 0th dimension is flattened from the `P` and `G` dimensions.

### Common Dimension Names

Here are the standard dimension subscripts used throughout the codebase:

- `_D`: Data/Datum dimension (for training data items)
- `_G`: Group dimension (for multiple attempts/rollouts of the same problem)
- `_P`: Problem dimension (for different problems/prompts)
- `_T`: Token/Time dimension (for sequences)

The relationship between dimensions in RL:
- A batch contains multiple problems (`_P`)
- Each problem spawns multiple attempts/environments (`_G`), forming a group
- Each attempt produces one trajectory
- Advantages are normalized within each group (across the `_G` dimension)

Examples:
- `env_group_builders_P`: A list of environment builders, one per problem
- `trajectories_G`: Multiple trajectories from attempts at the same problem
- `rewards_G`: Rewards for each attempt within a group
- `tokens_P_G_T`: Tokens with problem, group, and time dimensions
- `data_D`: A list of training data items

## Questions?

Email us at tinker@thinkingmachines.ai.


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2025 Thinking Machines Lab

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
<h1 align="center">Tinker Cookbook</h1>
<div align="center">
  <img src="assets/tinker-cover.png" width="60%" />
</div>

<div align="center">

[![pytest](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/pytest.yaml/badge.svg)](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/pytest.yaml)
[![pyright](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/pyright.yaml/badge.svg)](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/pyright.yaml)
[![smoke-test-recipes](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/smoke-test-recipes.yaml/badge.svg)](https://github.com/thinking-machines-lab/tinker-cookbook/actions/workflows/smoke-test-recipes.yaml)
[![PyPI](https://img.shields.io/pypi/v/tinker-cookbook)](https://pypi.org/project/tinker-cookbook/)

</div>

We provide two libraries for the broader community to customize their language models: `tinker` and `tinker-cookbook`.

- `tinker` is a training SDK for researchers and developers to fine-tune language models. You send API requests to us and we handle the complexities of distributed training.
- `tinker-cookbook` includes realistic examples of fine-tuning language models. It builds on the Tinker API and provides common abstractions to fine-tune language models.

## Installation

1. Sign up for Tinker [here](https://auth.thinkingmachines.ai/sign-up).
2. Once you have access, create an API key from the [console](https://tinker-console.thinkingmachines.ai) and export it as environment variable `TINKER_API_KEY`.
3. Install `tinker-cookbook` (includes the `tinker` SDK as a dependency):
   ```bash
   # Latest stable release from PyPI
   uv pip install tinker-cookbook

   # Or install the nightly build
   uv pip install 'tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git@nightly'
   ```

## Tinker

Refer to the [docs](https://tinker-docs.thinkingmachines.ai/training-sampling) to start from basics.
Here we introduce a few Tinker primitives - the basic components to fine-tune LLMs:

```python
import tinker
service_client = tinker.ServiceClient()
training_client = service_client.create_lora_training_client(
  base_model="meta-llama/Llama-3.2-1B", rank=32,
)
training_client.forward_backward(...)
training_client.optim_step(...)
training_client.save_state(...)
training_client.load_state(...)

sampling_client = training_client.save_weights_and_get_sampling_client(name="my_model")
sampling_client.sample(...)
```

See [tinker_cookbook/recipes/sl_loop.py](tinker_cookbook/recipes/sl_loop.py) and [tinker_cookbook/recipes/rl_loop.py](tinker_cookbook/recipes/rl_loop.py) for minimal examples of using these primitives to fine-tune LLMs.

To download the weights of any model:
```python
rest_client = service_client.create_rest_client()
future = rest_client.get_checkpoint_archive_url_from_tinker_path(sampling_client.model_path)
with open(f"model-checkpoint.tar.gz", "wb") as f:
    f.write(future.result())
```

### Tinker Cookbook

Besides these primitives, we also offer **Tinker Cookbook** (a.k.a. this repo), a library of a wide range of abstractions to help you customize training environments.
[`tinker_cookbook/recipes/sl_basic.py`](tinker_cookbook/recipes/sl_basic.py) and [`tinker_cookbook/recipes/rl_basic.py`](tinker_cookbook/recipes/rl_basic.py) contain minimal examples to configure supervised learning and reinforcement learning.

We also include a wide range of more sophisticated examples in the [`tinker_cookbook/recipes/`](tinker_cookbook/recipes/) folder:
1. **[Chat supervised learning](tinker_cookbook/recipes/chat_sl/)**: supervised fine-tuning on conversational datasets like Tulu3.
2. **[Math reasoning](tinker_cookbook/recipes/math_rl/)**: improve LLM reasoning capability by rewarding it for answering math questions correctly.
3. **[Preference learning](tinker_cookbook/recipes/preference/)**: showcase a three-stage RLHF pipeline: 1) supervised fine-tuning, 2) learning a reward model, 3) RL against the reward model.
4. **[Tool use](tinker_cookbook/recipes/search_tool/)**: train LLMs to better use retrieval tools to answer questions more accurately.
5. **[Prompt distillation](tinker_cookbook/recipes/prompt_distillation/)**: internalize long and complex instructions into LLMs.
6. **[Multi-Agent](tinker_cookbook/recipes/multiplayer_rl/)**: optimize LLMs to play against another LLM or themselves.

These examples are located in each subfolder, and their `README.md` files will walk you through the key implementation details, the commands to run them, and the expected performance.

### Documentation

The `docs/` directory contains a mirror of the Tinker documentation. These files are synced from our internal documentation site.

**Note:** The documentation files use MDX format (Markdown with JSX), which includes some syntax that isn't standard Markdown. You may see things like `import` statements, `<Callout>` components, or curly-brace expressions. These are artifacts of our documentation framework - the actual content should still be readable as Markdown.

If you find errors or want to improve the documentation, feel free to submit a PR editing files in `docs/`. We'll sync the changes back to our documentation site.

For the rendered documentation, visit [tinker-docs.thinkingmachines.ai](https://tinker-docs.thinkingmachines.ai).

### Import our utilities

Tinker cookbook includes several utilities. Here's a quick overview:
- [`renderers`](tinker_cookbook/renderers/) converts tokens from/to structured chat message objects
- [`hyperparam_utils`](tinker_cookbook/hyperparam_utils.py) helps calculate hyperparameters suitable for LoRAs
- [`evaluation`](tinker_cookbook/eval/evaluators.py) provides abstractions for evaluating Tinker models and [`inspect_evaluation`](tinker_cookbook/eval/inspect_evaluators.py) shows how to integrate with InspectAI to make evaluating on standard benchmarks easy.

## Development Setup

```bash
uv sync --extra dev
pre-commit install
```

This installs dev dependencies and registers pre-commit hooks that run `ruff` formatting and linting on every commit. CI enforces these checks on all pull requests.

## Contributing

This project is built in the spirit of open science and collaborative development. We believe that the best tools emerge through community involvement and shared learning.

We welcome PR contributions after our private beta is over. If you have any feedback, please email us at tinker@thinkingmachines.ai.

## Citation
If you use Tinker for your research, please cite it as:
```
Thinking Machines Lab, 2025. Tinker. https://thinkingmachines.ai/tinker/.
```

Or use this BibTeX citation:
```
@misc{tml2025tinker,
  author = {Thinking Machines Lab},
  title = {Tinker},
  year = {2025},
  url = {https://thinkingmachines.ai/tinker/},
}
```


================================================
FILE: docs/api-reference/apifuture.md
================================================
API Future classes for handling async operations with retry logic.

## `APIFuture` Objects

```python
class APIFuture(ABC, Generic[T])
```

Abstract base class for futures that can be awaited or accessed synchronously.

APIFuture provides a unified interface for handling async operations that can
be accessed both synchronously (via result()) and asynchronously (via await or result_async()).
This allows for flexible usage patterns in both sync and async contexts.

The future can be awaited directly in async contexts:
```python
result = await api_future  # Equivalent to await api_future.result_async()
```

Or accessed synchronously:
```python
result = api_future.result()  # Blocks until complete
```

Args:
- `T`: The type of the result value

Example:
```python
# In async context
future = training_client.forward_backward(data, "cross_entropy")
result = await future  # Or await future.result_async()

# In sync context
future = training_client.forward_backward(data, "cross_entropy")
result = future.result()
```

#### `result_async`

```python
async def result_async(timeout: float | None = None) -> T
```

Get the result asynchronously with optional timeout.

Args:
- `timeout`: Maximum time to wait in seconds. None means wait indefinitely.

Returns:
- The result value of type `T`

Raises:
    TimeoutError: If timeout is exceeded

#### `result`

```python
def result(timeout: float | None = None) -> T
```

Get the result synchronously with optional timeout.

Args:
- `timeout`: Maximum time to wait in seconds. None means wait indefinitely.

Returns:
- The result value of type `T`

Raises:
    TimeoutError: If timeout is exceeded

## `AwaitableConcurrentFuture` Objects

```python
class AwaitableConcurrentFuture(APIFuture[T])
```

Implementation of APIFuture that wraps a concurrent.futures.Future.

This class bridges Python's concurrent.futures with asyncio, allowing a
standard Future to be used in async contexts. It's commonly returned by
Tinker API methods to provide both sync and async access patterns.

Args:
- `future`: A concurrent.futures.Future to wrap

Example:
```python
# Internal usage - typically you receive these from API methods
concurrent_future = some_operation()
api_future = AwaitableConcurrentFuture(concurrent_future)

# Can be used synchronously
result = api_future.result()

# Or asynchronously
result = await api_future
```

#### `result`

```python
def result(timeout: float | None = None) -> T
```

Get the result synchronously with optional timeout.

Args:
- `timeout`: Maximum time to wait in seconds. None means wait indefinitely.

Returns:
- The result value of type `T`

Raises:
    TimeoutError: If timeout is exceeded
    Exception: Any exception raised by the underlying operation

Example:
```python
future = rest_client.get_training_run("run-id")
result = future.result(timeout=30)  # Wait up to 30 seconds
```

#### `result_async`

```python
async def result_async(timeout: float | None = None) -> T
```

Async version of result.

#### `future`

```python
def future() -> ConcurrentFuture[T]
```

Get the underlying concurrent.futures.Future.

Returns:
- The wrapped `ConcurrentFuture` object

Example:
```python
api_future = rest_client.get_training_run("run-id")
concurrent_future = api_future.future()
# Can now use standard concurrent.futures methods
if concurrent_future.done():
    result = concurrent_future.result()
```


================================================
FILE: docs/api-reference/exceptions.md
================================================
## `TinkerError` Objects

```python
class TinkerError(Exception)
```

Base exception for all Tinker-related errors.

## `APIError` Objects

```python
class APIError(TinkerError)
```

Base class for all API-related errors.

#### `body`

The API response body.

If the API responded with a valid JSON structure then this property will be the
decoded result.

If it isn't a valid JSON structure then this will be the raw response.

If there was no response associated with this error then it will be `None`.

## `APIResponseValidationError` Objects

```python
class APIResponseValidationError(APIError)
```

Raised when API response doesn't match expected schema.

## `APIStatusError` Objects

```python
class APIStatusError(APIError)
```

Raised when an API response has a status code of 4xx or 5xx.

## `APIConnectionError` Objects

```python
class APIConnectionError(APIError)
```

Raised when a connection error occurs while making an API request.

## `APITimeoutError` Objects

```python
class APITimeoutError(APIConnectionError)
```

Raised when an API request times out.

## `BadRequestError` Objects

```python
class BadRequestError(APIStatusError)
```

HTTP 400: The request was invalid or malformed.

## `AuthenticationError` Objects

```python
class AuthenticationError(APIStatusError)
```

HTTP 401: Authentication credentials are missing or invalid.

## `PermissionDeniedError` Objects

```python
class PermissionDeniedError(APIStatusError)
```

HTTP 403: Insufficient permissions to access the resource.

## `NotFoundError` Objects

```python
class NotFoundError(APIStatusError)
```

HTTP 404: The requested resource was not found.

## `ConflictError` Objects

```python
class ConflictError(APIStatusError)
```

HTTP 409: The request conflicts with the current state of the resource.

## `UnprocessableEntityError` Objects

```python
class UnprocessableEntityError(APIStatusError)
```

HTTP 422: The request was well-formed but contains semantic errors.

## `RateLimitError` Objects

```python
class RateLimitError(APIStatusError)
```

HTTP 429: Too many requests, rate limit exceeded.

## `InternalServerError` Objects

```python
class InternalServerError(APIStatusError)
```

HTTP 500+: An error occurred on the server.

## `SidecarError` Objects

```python
class SidecarError(TinkerError)
```

Base exception for subprocess sidecar errors.

## `SidecarStartupError` Objects

```python
class SidecarStartupError(SidecarError)
```

Raised when the sidecar subprocess fails to start or times out.

## `SidecarDiedError` Objects

```python
class SidecarDiedError(SidecarError)
```

Raised when the sidecar subprocess exits unexpectedly while requests are pending.

## `SidecarIPCError` Objects

```python
class SidecarIPCError(SidecarError)
```

Raised when communication with the sidecar subprocess fails.

## `RequestFailedError` Objects

```python
class RequestFailedError(TinkerError)
```

Raised when an asynchronous request completes in a failed state.


================================================
FILE: docs/api-reference/restclient.md
================================================
RestClient for Tinker API REST operations.

## `RestClient` Objects

```python
class RestClient(TelemetryProvider)
```

Client for REST API operations like listing checkpoints and metadata.

The RestClient provides access to various REST endpoints for querying
model information, checkpoints, and other resources. You typically get one
by calling `service_client.create_rest_client()`.

Key methods:
- list_checkpoints() - list available model checkpoints (both training and sampler)
- list_user_checkpoints() - list all checkpoints across all user's training runs
- get_training_run() - get model information and metadata as ModelEntry
- delete_checkpoint() - delete an existing checkpoint for a training run
- get_checkpoint_archive_url() - get signed URL to download checkpoint archive
- publish_checkpoint_from_tinker_path() - publish a checkpoint to make it public
- unpublish_checkpoint_from_tinker_path() - unpublish a checkpoint to make it private
- set_checkpoint_ttl_from_tinker_path() - set or remove TTL on a checkpoint

Args:
- `holder`: Internal client managing HTTP connections and async operations

Example:
```python
rest_client = service_client.create_rest_client()
training_run = rest_client.get_training_run("run-id").result()
print(f"Training Run: {training_run.training_run_id}, LoRA: {training_run.is_lora}")
checkpoints = rest_client.list_checkpoints("run-id").result()
print(f"Found {len(checkpoints.checkpoints)} checkpoints")
for checkpoint in checkpoints.checkpoints:
    print(f"  {checkpoint.checkpoint_type}: {checkpoint.checkpoint_id}")
```

#### `get_training_run`

```python
def get_training_run(
    training_run_id: types.ModelID,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> ConcurrentFuture[types.TrainingRun]
```

Get training run info.

Args:
- `training_run_id`: The training run ID to get information for

Returns:
- A `Future` containing the training run information

Example:
```python
future = rest_client.get_training_run("run-id")
response = future.result()
print(f"Training Run ID: {response.training_run_id}, Base: {response.base_model}")
```

#### `get_training_run_async`

```python
async def get_training_run_async(
    training_run_id: types.ModelID,
    access_scope: Literal["owned",
                          "accessible"] = "owned") -> types.TrainingRun
```

Async version of get_training_run.

#### `get_training_run_by_tinker_path`

```python
def get_training_run_by_tinker_path(
    tinker_path: str,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> ConcurrentFuture[types.TrainingRun]
```

Get training run info.

Args:
- `tinker_path`: The tinker path to the checkpoint

Returns:
- A `Future` containing the training run information

Example:
```python
future = rest_client.get_training_run_by_tinker_path("tinker://run-id/weights/checkpoint-001")
response = future.result()
print(f"Training Run ID: {response.training_run_id}, Base: {response.base_model}")
```

#### `get_training_run_by_tinker_path_async`

```python
async def get_training_run_by_tinker_path_async(
    tinker_path: str,
    access_scope: Literal["owned",
                          "accessible"] = "owned") -> types.TrainingRun
```

Async version of get_training_run_by_tinker_path.

#### `get_weights_info_by_tinker_path`

```python
def get_weights_info_by_tinker_path(
        tinker_path: str) -> APIFuture[types.WeightsInfoResponse]
```

Get checkpoint information from a tinker path.

Args:
- `tinker_path`: The tinker path to the checkpoint

Returns:
- An `APIFuture` containing the checkpoint information. The future is awaitable.

Example:
```python
future = rest_client.get_weights_info_by_tinker_path("tinker://run-id/weights/checkpoint-001")
response = future.result()  # or await future
print(f"Base Model: {response.base_model}, LoRA Rank: {response.lora_rank}")
```

#### `list_training_runs`

```python
def list_training_runs(
    limit: int = 20,
    offset: int = 0,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> ConcurrentFuture[types.TrainingRunsResponse]
```

List training runs with pagination support.

Args:
- `limit`: Maximum number of training runs to return (default 20)
- `offset`: Offset for pagination (default 0)

Returns:
- A `Future` containing the `TrainingRunsResponse` with training runs and cursor info

Example:
```python
future = rest_client.list_training_runs(limit=50)
response = future.result()
print(f"Found {len(response.training_runs)} training runs")
print(f"Total: {response.cursor.total_count}")
# Get next page
next_page = rest_client.list_training_runs(limit=50, offset=50)
```

#### `list_training_runs_async`

```python
async def list_training_runs_async(
    limit: int = 20,
    offset: int = 0,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> types.TrainingRunsResponse
```

Async version of list_training_runs.

#### `list_checkpoints`

```python
def list_checkpoints(
    training_run_id: types.ModelID
) -> ConcurrentFuture[types.CheckpointsListResponse]
```

List available checkpoints (both training and sampler).

Args:
- `training_run_id`: The training run ID to list checkpoints for

Returns:
- A `Future` containing the `CheckpointsListResponse` with available checkpoints

Example:
```python
future = rest_client.list_checkpoints("run-id")
response = future.result()
for checkpoint in response.checkpoints:
    if checkpoint.checkpoint_type == "training":
        print(f"Training checkpoint: {checkpoint.checkpoint_id}")
    elif checkpoint.checkpoint_type == "sampler":
        print(f"Sampler checkpoint: {checkpoint.checkpoint_id}")
```

#### `list_checkpoints_async`

```python
async def list_checkpoints_async(
        training_run_id: types.ModelID) -> types.CheckpointsListResponse
```

Async version of list_checkpoints.

#### `get_checkpoint_archive_url`

```python
def get_checkpoint_archive_url(
    training_run_id: types.ModelID, checkpoint_id: str
) -> ConcurrentFuture[types.CheckpointArchiveUrlResponse]
```

Get signed URL to download checkpoint archive.

Args:
- `training_run_id`: The training run ID to download weights for
- `checkpoint_id`: The checkpoint ID to download

Returns:
- A `Future` containing the `CheckpointArchiveUrlResponse` with signed URL and expiration

Example:
```python
future = rest_client.get_checkpoint_archive_url("run-id", "checkpoint-123")
response = future.result()
print(f"Download URL: {response.url}")
print(f"Expires at: {response.expires_at}")
# Use the URL to download the archive with your preferred HTTP client
```

#### `get_checkpoint_archive_url_async`

```python
async def get_checkpoint_archive_url_async(
        training_run_id: types.ModelID,
        checkpoint_id: str) -> types.CheckpointArchiveUrlResponse
```

Async version of get_checkpoint_archive_url.

#### `delete_checkpoint`

```python
def delete_checkpoint(training_run_id: types.ModelID,
                      checkpoint_id: str) -> ConcurrentFuture[None]
```

Delete a checkpoint for a training run.

#### `delete_checkpoint_async`

```python
async def delete_checkpoint_async(training_run_id: types.ModelID,
                                  checkpoint_id: str) -> None
```

Async version of delete_checkpoint.

#### `delete_checkpoint_from_tinker_path`

```python
def delete_checkpoint_from_tinker_path(
        tinker_path: str) -> ConcurrentFuture[None]
```

Delete a checkpoint referenced by a tinker path.

#### `delete_checkpoint_from_tinker_path_async`

```python
async def delete_checkpoint_from_tinker_path_async(tinker_path: str) -> None
```

Async version of delete_checkpoint_from_tinker_path.

#### `get_checkpoint_archive_url_from_tinker_path`

```python
def get_checkpoint_archive_url_from_tinker_path(
        tinker_path: str
) -> ConcurrentFuture[types.CheckpointArchiveUrlResponse]
```

Get signed URL to download checkpoint archive.

Args:
- `tinker_path`: The tinker path to the checkpoint

Returns:
- A `Future` containing the `CheckpointArchiveUrlResponse` with signed URL and expiration

#### `get_checkpoint_archive_url_from_tinker_path_async`

```python
async def get_checkpoint_archive_url_from_tinker_path_async(
        tinker_path: str) -> types.CheckpointArchiveUrlResponse
```

Async version of get_checkpoint_archive_url_from_tinker_path.

#### `publish_checkpoint_from_tinker_path`

```python
def publish_checkpoint_from_tinker_path(
        tinker_path: str) -> ConcurrentFuture[None]
```

Publish a checkpoint referenced by a tinker path to make it publicly accessible.

Only the exact owner of the training run can publish checkpoints.
Published checkpoints can be unpublished using the unpublish_checkpoint_from_tinker_path method.

Args:
- `tinker_path`: The tinker path to the checkpoint (e.g., "tinker://run-id/weights/0001")

Returns:
- A `Future` that completes when the checkpoint is published

Raises:
    HTTPException: 400 if checkpoint identifier is invalid
    HTTPException: 404 if checkpoint not found or user doesn't own the training run
    HTTPException: 409 if checkpoint is already public
    HTTPException: 500 if there's an error publishing the checkpoint

Example:
```python
future = rest_client.publish_checkpoint_from_tinker_path("tinker://run-id/weights/0001")
future.result()  # Wait for completion
print("Checkpoint published successfully")
```

#### `publish_checkpoint_from_tinker_path_async`

```python
async def publish_checkpoint_from_tinker_path_async(tinker_path: str) -> None
```

Async version of publish_checkpoint_from_tinker_path.

#### `unpublish_checkpoint_from_tinker_path`

```python
def unpublish_checkpoint_from_tinker_path(
        tinker_path: str) -> ConcurrentFuture[None]
```

Unpublish a checkpoint referenced by a tinker path to make it private again.

Only the exact owner of the training run can unpublish checkpoints.
This reverses the effect of publishing a checkpoint.

Args:
- `tinker_path`: The tinker path to the checkpoint (e.g., "tinker://run-id/weights/0001")

Returns:
- A `Future` that completes when the checkpoint is unpublished

Raises:
    HTTPException: 400 if checkpoint identifier is invalid
    HTTPException: 404 if checkpoint not found or user doesn't own the training run
    HTTPException: 409 if checkpoint is already private
    HTTPException: 500 if there's an error unpublishing the checkpoint

Example:
```python
future = rest_client.unpublish_checkpoint_from_tinker_path("tinker://run-id/weights/0001")
future.result()  # Wait for completion
print("Checkpoint unpublished successfully")
```

#### `unpublish_checkpoint_from_tinker_path_async`

```python
async def unpublish_checkpoint_from_tinker_path_async(
        tinker_path: str) -> None
```

Async version of unpublish_checkpoint_from_tinker_path.

#### `set_checkpoint_ttl_from_tinker_path`

```python
def set_checkpoint_ttl_from_tinker_path(
        tinker_path: str, ttl_seconds: int | None) -> ConcurrentFuture[None]
```

Set or remove the TTL on a checkpoint referenced by a tinker path.

If ttl_seconds is provided, the checkpoint will expire after that many seconds from now.
If ttl_seconds is None, any existing expiration will be removed.

Args:
- `tinker_path`: The tinker path to the checkpoint (e.g., "tinker://run-id/weights/0001")
- `ttl_seconds`: Number of seconds until expiration, or None to remove TTL

Returns:
- A `Future` that completes when the TTL is set

Raises:
    HTTPException: 400 if checkpoint identifier is invalid or ttl_seconds <= 0
    HTTPException: 404 if checkpoint not found or user doesn't own the training run
    HTTPException: 500 if there's an error setting the TTL

Example:
```python
future = rest_client.set_checkpoint_ttl_from_tinker_path("tinker://run-id/weights/0001", 86400)
future.result()  # Wait for completion
print("Checkpoint TTL set successfully")
```

#### `set_checkpoint_ttl_from_tinker_path_async`

```python
async def set_checkpoint_ttl_from_tinker_path_async(
        tinker_path: str, ttl_seconds: int | None) -> None
```

Async version of set_checkpoint_ttl_from_tinker_path.

#### `list_user_checkpoints`

```python
def list_user_checkpoints(
        limit: int = 100,
        offset: int = 0) -> ConcurrentFuture[types.CheckpointsListResponse]
```

List all checkpoints for the current user across all their training runs.

This method retrieves checkpoints from all training runs owned by the authenticated user,
sorted by time (newest first). It supports pagination for efficiently handling large
numbers of checkpoints.

Args:
- `limit`: Maximum number of checkpoints to return (default 100)
- `offset`: Offset for pagination (default 0)

Returns:
- A `Future` containing the `CheckpointsListResponse` with checkpoints and cursor info

Example:
```python
future = rest_client.list_user_checkpoints(limit=50)
response = future.result()
print(f"Found {len(response.checkpoints)} checkpoints")
print(f"Total: {response.cursor.total_count if response.cursor else 'Unknown'}")
for checkpoint in response.checkpoints:
    print(f"  {checkpoint.training_run_id}/{checkpoint.checkpoint_id}")
# Get next page if there are more checkpoints
if response.cursor and response.cursor.offset + response.cursor.limit < response.cursor.total_count:
    next_page = rest_client.list_user_checkpoints(limit=50, offset=50)
```

#### `list_user_checkpoints_async`

```python
async def list_user_checkpoints_async(limit: int = 100,
                                      offset: int = 0
                                      ) -> types.CheckpointsListResponse
```

Async version of list_user_checkpoints.

#### `get_session`

```python
def get_session(
    session_id: str,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> ConcurrentFuture[types.GetSessionResponse]
```

Get session information including all training runs and samplers.

Args:
- `session_id`: The session ID to get information for

Returns:
- A `Future` containing the `GetSessionResponse` with training_run_ids and sampler_ids

Example:
```python
future = rest_client.get_session("session-id")
response = future.result()
print(f"Training runs: {len(response.training_run_ids)}")
print(f"Samplers: {len(response.sampler_ids)}")
```

#### `get_session_async`

```python
async def get_session_async(
    session_id: str,
    access_scope: Literal["owned", "accessible"] = "owned"
) -> types.GetSessionResponse
```

Async ve
Download .txt
gitextract_vc456gw1/

├── .claude/
│   └── skills/
│       ├── checkpoints/
│       │   └── SKILL.md
│       ├── ci/
│       │   └── SKILL.md
│       ├── completers/
│       │   └── SKILL.md
│       ├── contributing/
│       │   └── SKILL.md
│       ├── datasets/
│       │   └── SKILL.md
│       ├── distillation/
│       │   └── SKILL.md
│       ├── dpo/
│       │   └── SKILL.md
│       ├── environments/
│       │   └── SKILL.md
│       ├── evals/
│       │   └── SKILL.md
│       ├── grpo/
│       │   └── SKILL.md
│       ├── hyperparams/
│       │   └── SKILL.md
│       ├── logging/
│       │   └── SKILL.md
│       ├── manage-skills/
│       │   └── SKILL.md
│       ├── models/
│       │   └── SKILL.md
│       ├── multiturn-rl/
│       │   └── SKILL.md
│       ├── new-recipe/
│       │   └── SKILL.md
│       ├── renderers/
│       │   └── SKILL.md
│       ├── rlhf/
│       │   └── SKILL.md
│       ├── setup/
│       │   └── SKILL.md
│       ├── sft/
│       │   └── SKILL.md
│       ├── tinker-cli/
│       │   └── SKILL.md
│       ├── tinker-sdk/
│       │   └── SKILL.md
│       ├── tinker-types/
│       │   └── SKILL.md
│       └── weights/
│           └── SKILL.md
├── .github/
│   └── workflows/
│       ├── claude-review.yml
│       ├── downstream-compat.yaml
│       ├── nightly.yaml
│       ├── pre-commit.yaml
│       ├── publish-pypi.yaml
│       ├── pyright.yaml
│       ├── pytest.yaml
│       ├── smoke-test-evals.yaml
│       └── smoke-test-recipes.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .sync_state
├── AGENTS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs/
│   ├── api-reference/
│   │   ├── apifuture.md
│   │   ├── exceptions.md
│   │   ├── restclient.md
│   │   ├── samplingclient.md
│   │   ├── serviceclient.md
│   │   ├── trainingclient.md
│   │   └── types.md
│   ├── async.mdx
│   ├── compatible-apis/
│   │   └── openai.mdx
│   ├── completers.mdx
│   ├── dev-tips.mdx
│   ├── docs-outline.mdx
│   ├── download-weights.mdx
│   ├── evals.mdx
│   ├── index.mdx
│   ├── install.mdx
│   ├── lora-primer.mdx
│   ├── losses.mdx
│   ├── model-lineup.mdx
│   ├── overview-building.mdx
│   ├── preferences/
│   │   ├── dpo-guide.mdx
│   │   └── rlhf-example.mdx
│   ├── preferences.mdx
│   ├── publish-weights.mdx
│   ├── rendering.mdx
│   ├── rl/
│   │   ├── rl-basic.mdx
│   │   ├── rl-envs.mdx
│   │   ├── rl-hyperparams.mdx
│   │   ├── rl-logging.mdx
│   │   ├── rl-loops.mdx
│   │   └── sequence-extension.mdx
│   ├── rl.mdx
│   ├── save-load.mdx
│   ├── supervised-learning/
│   │   ├── prompt-distillation.mdx
│   │   ├── sl-basic.mdx
│   │   ├── sl-hyperparams.mdx
│   │   ├── sl-loop.mdx
│   │   └── sweep-case-study.mdx
│   ├── supervised-learning.mdx
│   ├── support.mdx
│   ├── training-sampling.mdx
│   └── under-the-hood.mdx
├── pyproject.toml
├── tests/
│   ├── __init__.py
│   ├── compare_sampling_training_logprobs.py
│   ├── conftest.py
│   ├── downstream_compat/
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── sig_helpers.py
│   │   ├── test_checkpoint_utils.py
│   │   ├── test_cli_and_hyperparam.py
│   │   ├── test_completers.py
│   │   ├── test_model_info.py
│   │   ├── test_recipes.py
│   │   ├── test_renderers.py
│   │   ├── test_rl_train.py
│   │   ├── test_rl_types.py
│   │   ├── test_supervised.py
│   │   ├── test_tokenizer_utils.py
│   │   └── test_utils.py
│   ├── helpers.py
│   ├── recipes/
│   │   ├── __init__.py
│   │   ├── test_recipe_chat_sl.py
│   │   ├── test_recipe_dpo.py
│   │   ├── test_recipe_guess_number.py
│   │   ├── test_recipe_math_rl.py
│   │   ├── test_recipe_off_policy_reasoning.py
│   │   ├── test_recipe_on_policy_distillation.py
│   │   ├── test_recipe_on_policy_multi_teacher.py
│   │   ├── test_recipe_rlhf_pipeline.py
│   │   ├── test_recipe_shorter.py
│   │   ├── test_recipe_text_arena.py
│   │   ├── test_recipe_twenty_questions.py
│   │   └── test_recipe_vlm_classifier.py
│   ├── test_inspect_eval.py
│   ├── test_modal_sandbox.py
│   ├── third_party/
│   │   ├── __init__.py
│   │   └── test_litellm.py
│   ├── validate_temperature_logprobs.py
│   └── weights/
│       ├── __init__.py
│       ├── test_download.py
│       ├── test_export.py
│       ├── test_lifecycle.py
│       ├── test_publish.py
│       ├── test_quantized.py
│       ├── test_quantized_equivalence.py
│       └── test_strategy_consistency.py
└── tinker_cookbook/
    ├── __init__.py
    ├── chat_app/
    │   ├── README.md
    │   └── tinker_chat_cli.py
    ├── checkpoint_utils.py
    ├── checkpoint_utils_test.py
    ├── cli_utils.py
    ├── cli_utils_test.py
    ├── completers.py
    ├── display.py
    ├── distillation/
    │   ├── __init__.py
    │   ├── datasets.py
    │   └── train_on_policy.py
    ├── eval/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── custom_evaluators.py
    │   ├── custom_inspect_task.py
    │   ├── evaluators.py
    │   ├── inspect_evaluators.py
    │   ├── inspect_utils.py
    │   ├── inspect_utils_test.py
    │   └── run_inspect_evals.py
    ├── example_data/
    │   ├── conversations.jsonl
    │   └── multilingual.txt
    ├── exceptions.py
    ├── exceptions_test.py
    ├── hyperparam_utils.py
    ├── image_processing_utils.py
    ├── image_processing_utils_test.py
    ├── model_info.py
    ├── model_info_test.py
    ├── preference/
    │   ├── __init__.py
    │   ├── comparison_policy_evaluator.py
    │   ├── dpo_datasets.py
    │   ├── preference_datasets.py
    │   ├── train_dpo.py
    │   └── types.py
    ├── py.typed
    ├── recipes/
    │   ├── README.md
    │   ├── chat_sl/
    │   │   ├── README.md
    │   │   ├── chat_datasets.py
    │   │   └── train.py
    │   ├── code_rl/
    │   │   ├── README.md
    │   │   ├── code_env.py
    │   │   ├── code_grading.py
    │   │   ├── deepcoder_tool.py
    │   │   ├── lcb_utils.py
    │   │   ├── sandbox_config/
    │   │   │   └── local.yaml
    │   │   └── train.py
    │   ├── distillation/
    │   │   ├── README.md
    │   │   ├── harbor_multiturn.py
    │   │   ├── harbor_multiturn_test.py
    │   │   ├── off_policy_reasoning.py
    │   │   ├── on_policy_distillation.py
    │   │   ├── on_policy_distillation_harbor_multi_turn.py
    │   │   └── on_policy_multi_teacher.py
    │   ├── harbor_rl/
    │   │   ├── README.md
    │   │   ├── eval.py
    │   │   ├── harbor_env.py
    │   │   ├── harbor_tools.py
    │   │   ├── harbor_tools_test.py
    │   │   ├── scripts/
    │   │   │   ├── .gitignore
    │   │   │   ├── eval_terminal_bench.py
    │   │   │   └── train_terminal_bench.py
    │   │   └── train.py
    │   ├── math_rl/
    │   │   ├── README.md
    │   │   ├── arithmetic_env.py
    │   │   ├── math_env.py
    │   │   ├── math_env_test.py
    │   │   ├── math_grading.py
    │   │   └── train.py
    │   ├── multiplayer_rl/
    │   │   ├── README.md
    │   │   ├── guess_number/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   ├── text_arena/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   └── twenty_questions/
    │   │       ├── README.md
    │   │       ├── common_english_nouns.txt
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── preference/
    │   │   ├── README.md
    │   │   ├── datasets.py
    │   │   ├── dpo/
    │   │   │   ├── README.md
    │   │   │   └── train.py
    │   │   ├── rlhf/
    │   │   │   ├── README.md
    │   │   │   └── rlhf_pipeline.py
    │   │   └── shorter/
    │   │       ├── README.md
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── prompt_distillation/
    │   │   ├── README.md
    │   │   ├── create_data.py
    │   │   └── train.py
    │   ├── rl_basic.py
    │   ├── rl_loop.py
    │   ├── rubric/
    │   │   ├── README.md
    │   │   ├── data.py
    │   │   ├── debug_env.py
    │   │   ├── env.py
    │   │   ├── generate_data.py
    │   │   ├── prometheus_experimental.py
    │   │   └── train.py
    │   ├── search_tool/
    │   │   ├── README.md
    │   │   ├── chroma_pickle_test.py
    │   │   ├── embedding.py
    │   │   ├── offline_eval.py
    │   │   ├── search_env.py
    │   │   ├── tools.py
    │   │   └── train.py
    │   ├── sl_basic.py
    │   ├── sl_loop.py
    │   ├── verifiers_rl/
    │   │   ├── README.md
    │   │   ├── evaluate.py
    │   │   ├── tinker_openai.py
    │   │   ├── train.py
    │   │   ├── verifiers_env.py
    │   │   └── verifiers_pickle_test.py
    │   └── vlm_classifier/
    │       ├── README.md
    │       ├── data.py
    │       ├── eval.py
    │       ├── eval_sweep.py
    │       ├── sweep.py
    │       └── train.py
    ├── renderers/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── base.py
    │   ├── deepseek_v3.py
    │   ├── deepseek_v3_test.py
    │   ├── gpt_oss.py
    │   ├── gpt_oss_test.py
    │   ├── kimi_k2.py
    │   ├── kimi_k25.py
    │   ├── kimi_k25_test.py
    │   ├── kimi_k2_5_tool_declaration_ts.py
    │   ├── kimi_k2_test.py
    │   ├── kimi_k2_tool_declaration_test.py
    │   ├── llama3.py
    │   ├── nemotron3.py
    │   ├── nemotron3_test.py
    │   ├── parsing_test.py
    │   ├── qwen3.py
    │   ├── qwen3_5.py
    │   ├── qwen3_test.py
    │   ├── qwen3_tool_declaration_test.py
    │   ├── renderer_pickle_test.py
    │   ├── renderers_test.py
    │   ├── role_colon.py
    │   ├── testing_utils.py
    │   └── tool_calling_test.py
    ├── rl/
    │   ├── __init__.py
    │   ├── builder_pickle_test.py
    │   ├── data_processing.py
    │   ├── message_env.py
    │   ├── message_env_test.py
    │   ├── metric_util.py
    │   ├── metrics.py
    │   ├── multiturn_weight_assignment_test.py
    │   ├── play_w_env.py
    │   ├── preference_envs.py
    │   ├── problem_env.py
    │   ├── rollout_error_resilience_test.py
    │   ├── rollout_logging.py
    │   ├── rollout_logging_test.py
    │   ├── rollout_strategy.py
    │   ├── rollouts.py
    │   ├── shutdown_test.py
    │   ├── train.py
    │   └── types.py
    ├── sandbox/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── modal_sandbox.py
    │   ├── sandbox_interface.py
    │   └── sandboxfusion.py
    ├── scripts/
    │   ├── merge_tinker_adapter_to_hf_model.py
    │   └── test_tool_calling_e2e.py
    ├── supervised/
    │   ├── __init__.py
    │   ├── common.py
    │   ├── data.py
    │   ├── nll_evaluator.py
    │   ├── resume_test.py
    │   ├── train.py
    │   ├── types.py
    │   └── viz_sft_dataset.py
    ├── third_party/
    │   ├── __init__.py
    │   ├── litellm/
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── provider.py
    │   │   └── provider_test.py
    │   ├── openai_compat.py
    │   └── openai_compat_test.py
    ├── tokenizer_utils.py
    ├── tokenizer_utils_test.py
    ├── tool_use/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agent_tool_message_env.py
    │   ├── agent_tool_message_env_test.py
    │   ├── tools.py
    │   └── types.py
    ├── utils/
    │   ├── __init__.py
    │   ├── code_state.py
    │   ├── deprecation.py
    │   ├── deprecation_test.py
    │   ├── file_utils.py
    │   ├── format_colorized.py
    │   ├── logtree.py
    │   ├── logtree_formatters.py
    │   ├── logtree_test.py
    │   ├── lr_scheduling.py
    │   ├── misc_utils.py
    │   ├── ml_log.py
    │   ├── ml_log_test.py
    │   ├── trace.py
    │   └── trace_test.py
    ├── weights/
    │   ├── __init__.py
    │   ├── _artifacts.py
    │   ├── _download.py
    │   ├── _export/
    │   │   ├── __init__.py
    │   │   ├── _full.py
    │   │   ├── _quantized.py
    │   │   └── _shard.py
    │   ├── _merge.py
    │   ├── _publish.py
    │   ├── artifacts_test.py
    │   ├── download_test.py
    │   ├── export_test.py
    │   ├── merge_test.py
    │   ├── publish_test.py
    │   ├── quantized_test.py
    │   └── stress_test.py
    └── xmux/
        ├── README.md
        ├── __init__.py
        ├── control.py
        ├── core.py
        ├── examples/
        │   ├── async_rl_sweep.py
        │   ├── fake_train.py
        │   └── ml_sweep.py
        ├── run_job.py
        └── utils.py
Download .txt
Showing preview only (251K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2672 symbols across 232 files)

FILE: tests/compare_sampling_training_logprobs.py
  function get_reference_document (line 17) | def get_reference_document():
  function get_row (line 25) | async def get_row(
  class Config (line 95) | class Config:
  function main (line 105) | async def main(config: Config):

FILE: tests/conftest.py
  function pytest_collection_modifyitems (line 19) | def pytest_collection_modifyitems(config, items):

FILE: tests/downstream_compat/conftest.py
  function pytest_collection_modifyitems (line 15) | def pytest_collection_modifyitems(config, items):

FILE: tests/downstream_compat/sig_helpers.py
  function get_param_names (line 6) | def get_param_names(func) -> list[str]:
  function assert_params (line 12) | def assert_params(func, expected_params: list[str]) -> None:
  function assert_params_subset (line 20) | def assert_params_subset(func, required_params: list[str]) -> None:

FILE: tests/downstream_compat/test_checkpoint_utils.py
  class TestCheckpointRecord (line 16) | class TestCheckpointRecord:
    method test_fields (line 17) | def test_fields(self):
    method test_constructable_minimal (line 22) | def test_constructable_minimal(self):
    method test_to_dict (line 28) | def test_to_dict(self):
    method test_from_dict (line 34) | def test_from_dict(self):
    method test_roundtrip (line 40) | def test_roundtrip(self):
    method test_has_method (line 46) | def test_has_method(self):
    method test_get_method (line 51) | def test_get_method(self):
  class TestCheckpointFunctions (line 56) | class TestCheckpointFunctions:
    method test_load_checkpoints_file_signature (line 57) | def test_load_checkpoints_file_signature(self):
    method test_get_last_checkpoint_signature (line 62) | def test_get_last_checkpoint_signature(self):
    method test_save_checkpoint_signature (line 67) | def test_save_checkpoint_signature(self):
    method test_save_checkpoint_async_exists (line 72) | def test_save_checkpoint_async_exists(self):
    method test_checkpoint_record_has_signature (line 77) | def test_checkpoint_record_has_signature(self):
    method test_checkpoint_record_get_signature (line 82) | def test_checkpoint_record_get_signature(self):

FILE: tests/downstream_compat/test_cli_and_hyperparam.py
  class TestCliUtils (line 14) | class TestCliUtils:
    method test_check_log_dir_signature (line 15) | def test_check_log_dir_signature(self):
  class TestHyperparamUtils (line 21) | class TestHyperparamUtils:
    method test_get_lr_signature (line 22) | def test_get_lr_signature(self):
    method test_get_lora_lr_over_full_finetune_lr_signature (line 27) | def test_get_lora_lr_over_full_finetune_lr_signature(self):
    method test_get_lora_param_count_signature (line 32) | def test_get_lora_param_count_signature(self):
    method test_get_lr_returns_float (line 37) | def test_get_lr_returns_float(self):

FILE: tests/downstream_compat/test_completers.py
  class TestTokensWithLogprobs (line 17) | class TestTokensWithLogprobs:
    method test_fields (line 18) | def test_fields(self):
    method test_logprobs_property (line 23) | def test_logprobs_property(self):
    method test_logprobs_raises_when_none (line 27) | def test_logprobs_raises_when_none(self):
    method test_none_logprobs (line 34) | def test_none_logprobs(self):
  class TestTokenCompleter (line 39) | class TestTokenCompleter:
    method test_is_callable (line 40) | def test_is_callable(self):
    method test_call_is_async (line 43) | def test_call_is_async(self):
    method test_call_signature (line 46) | def test_call_signature(self):
  class TestMessageCompleter (line 52) | class TestMessageCompleter:
    method test_is_callable (line 53) | def test_is_callable(self):
    method test_call_is_async (line 56) | def test_call_is_async(self):
    method test_call_signature (line 59) | def test_call_signature(self):
  class TestTinkerTokenCompleter (line 65) | class TestTinkerTokenCompleter:
    method test_is_subclass_of_token_completer (line 66) | def test_is_subclass_of_token_completer(self):
    method test_has_expected_fields (line 69) | def test_has_expected_fields(self):
  class TestStopCondition (line 77) | class TestStopCondition:
    method test_is_type_alias (line 78) | def test_is_type_alias(self):

FILE: tests/downstream_compat/test_model_info.py
  class TestModelAttributes (line 16) | class TestModelAttributes:
    method test_fields (line 17) | def test_fields(self):
    method test_constructable (line 29) | def test_constructable(self):
  class TestModelInfoFunctions (line 41) | class TestModelInfoFunctions:
    method test_get_model_attributes_returns_model_attributes (line 42) | def test_get_model_attributes_returns_model_attributes(self):
    method test_get_recommended_renderer_name_returns_string (line 47) | def test_get_recommended_renderer_name_returns_string(self):
    method test_get_recommended_renderer_names_returns_list (line 52) | def test_get_recommended_renderer_names_returns_list(self):
    method test_recommended_renderer_name_is_first_of_names (line 58) | def test_recommended_renderer_name_is_first_of_names(self):
  class TestModelInfoSignatures (line 64) | class TestModelInfoSignatures:
    method test_get_model_attributes_signature (line 65) | def test_get_model_attributes_signature(self):
    method test_get_recommended_renderer_name_signature (line 70) | def test_get_recommended_renderer_name_signature(self):
    method test_get_recommended_renderer_names_signature (line 75) | def test_get_recommended_renderer_names_signature(self):

FILE: tests/downstream_compat/test_recipes.py
  class TestMathRL (line 14) | class TestMathRL:
    method test_math_env_importable (line 15) | def test_math_env_importable(self):
    method test_arithmetic_env_importable (line 20) | def test_arithmetic_env_importable(self):
    method test_get_math_dataset_builder (line 25) | def test_get_math_dataset_builder(self):
    method test_math_env_classes (line 30) | def test_math_env_classes(self):
    method test_math_grading_functions (line 39) | def test_math_grading_functions(self):
    method test_safe_grade (line 50) | def test_safe_grade(self):
  class TestCodeRL (line 61) | class TestCodeRL:
    method test_code_env_importable (line 62) | def test_code_env_importable(self):
  class TestChatSL (line 73) | class TestChatSL:
    method test_chat_datasets_importable (line 74) | def test_chat_datasets_importable(self):
    method test_tulu3_builder_exists (line 79) | def test_tulu3_builder_exists(self):
  class TestPreference (line 90) | class TestPreference:
    method test_dpo_train_importable (line 91) | def test_dpo_train_importable(self):
    method test_preference_datasets_importable (line 97) | def test_preference_datasets_importable(self):
  class TestBasicRecipes (line 108) | class TestBasicRecipes:
    method test_rl_basic_build_config (line 109) | def test_rl_basic_build_config(self):
    method test_sl_basic_build_config (line 114) | def test_sl_basic_build_config(self):
  class TestEvaluators (line 125) | class TestEvaluators:
    method test_sampling_client_evaluator_importable (line 126) | def test_sampling_client_evaluator_importable(self):
    method test_training_client_evaluator_importable (line 131) | def test_training_client_evaluator_importable(self):
    method test_evaluator_builder_importable (line 136) | def test_evaluator_builder_importable(self):
  class TestDistillation (line 147) | class TestDistillation:
    method test_prompt_only_env_importable (line 148) | def test_prompt_only_env_importable(self):
    method test_load_tulu3_prompts_importable (line 153) | def test_load_tulu3_prompts_importable(self):
  class TestPreferenceTypes (line 164) | class TestPreferenceTypes:
    method test_preference_model_builder_importable (line 165) | def test_preference_model_builder_importable(self):
  class TestSupervisedTrain (line 176) | class TestSupervisedTrain:
    method test_config_exists (line 177) | def test_config_exists(self):
    method test_main_exists (line 182) | def test_main_exists(self):
    method test_main_is_async (line 187) | def test_main_is_async(self):
  class TestLRScheduling (line 198) | class TestLRScheduling:
    method test_lr_schedule_importable (line 199) | def test_lr_schedule_importable(self):

FILE: tests/downstream_compat/test_renderers.py
  class TestTypeExports (line 51) | class TestTypeExports:
    method test_message_is_typed_dict (line 54) | def test_message_is_typed_dict(self):
    method test_text_part_constructable (line 60) | def test_text_part_constructable(self):
    method test_thinking_part_constructable (line 64) | def test_thinking_part_constructable(self):
    method test_tool_call_constructable (line 68) | def test_tool_call_constructable(self):
    method test_tool_spec_constructable (line 79) | def test_tool_spec_constructable(self):
    method test_train_on_what_has_expected_values (line 87) | def test_train_on_what_has_expected_values(self):
    method test_streaming_types_importable (line 91) | def test_streaming_types_importable(self):
    method test_content_part_types (line 98) | def test_content_part_types(self):
    method test_utf8_token_decoder_importable (line 103) | def test_utf8_token_decoder_importable(self):
    method test_render_context_importable (line 106) | def test_render_context_importable(self):
  class TestUtilityFunctions (line 115) | class TestUtilityFunctions:
    method test_ensure_text_with_string (line 116) | def test_ensure_text_with_string(self):
    method test_format_content_as_string (line 119) | def test_format_content_as_string(self):
    method test_get_text_content (line 123) | def test_get_text_content(self):
    method test_parse_content_blocks_exists (line 127) | def test_parse_content_blocks_exists(self):
    method test_ensure_list_importable (line 130) | def test_ensure_list_importable(self):
  class TestRendererRegistry (line 140) | class TestRendererRegistry:
    method test_register_and_unregister_roundtrip (line 141) | def test_register_and_unregister_roundtrip(self):
    method test_unregister_nonexistent_returns_false (line 155) | def test_unregister_nonexistent_returns_false(self):
  function test_builtin_renderer_name_resolves (line 188) | def test_builtin_renderer_name_resolves(renderer_name):
  class TestRendererInterface (line 204) | class TestRendererInterface:
    method test_build_generation_prompt_is_method (line 207) | def test_build_generation_prompt_is_method(self):
    method test_build_supervised_example_is_method (line 211) | def test_build_supervised_example_is_method(self):
    method test_build_supervised_examples_is_method (line 215) | def test_build_supervised_examples_is_method(self):
    method test_parse_response_is_method (line 219) | def test_parse_response_is_method(self):
    method test_get_stop_sequences_is_abstract (line 222) | def test_get_stop_sequences_is_abstract(self):
    method test_has_extension_property (line 225) | def test_has_extension_property(self):
    method test_tokenizer_attribute (line 228) | def test_tokenizer_attribute(self):
    method test_pickle_metadata_attributes (line 232) | def test_pickle_metadata_attributes(self):
  class TestSignatures (line 244) | class TestSignatures:
    method test_get_renderer_signature (line 247) | def test_get_renderer_signature(self):
    method test_register_renderer_signature (line 254) | def test_register_renderer_signature(self):
    method test_unregister_renderer_signature (line 259) | def test_unregister_renderer_signature(self):
    method test_build_generation_prompt_signature (line 264) | def test_build_generation_prompt_signature(self):
    method test_build_supervised_example_signature (line 269) | def test_build_supervised_example_signature(self):
    method test_parse_response_signature (line 274) | def test_parse_response_signature(self):
    method test_ensure_text_signature (line 279) | def test_ensure_text_signature(self):
    method test_format_content_as_string_signature (line 284) | def test_format_content_as_string_signature(self):
    method test_get_text_content_signature (line 289) | def test_get_text_content_signature(self):
  class TestRendererClasses (line 300) | class TestRendererClasses:
    method test_deepseekv3_thinking_renderer_importable (line 301) | def test_deepseekv3_thinking_renderer_importable(self):
    method test_qwen3_renderer_importable (line 304) | def test_qwen3_renderer_importable(self):
    method test_gpt_oss_renderer_importable (line 307) | def test_gpt_oss_renderer_importable(self):
    method test_kimi_k2_renderer_importable (line 310) | def test_kimi_k2_renderer_importable(self):
    method test_kimi_k25_renderer_importable (line 313) | def test_kimi_k25_renderer_importable(self):

FILE: tests/downstream_compat/test_rl_train.py
  class TestRLTrainConfig (line 20) | class TestRLTrainConfig:
    method test_config_exists (line 21) | def test_config_exists(self):
    method test_main_exists (line 24) | def test_main_exists(self):
    method test_main_is_async (line 27) | def test_main_is_async(self):
    method test_main_signature (line 30) | def test_main_signature(self):
  class TestRLDataProcessing (line 41) | class TestRLDataProcessing:
    method test_compute_advantages_signature (line 42) | def test_compute_advantages_signature(self):
    method test_trajectory_to_data_signature (line 47) | def test_trajectory_to_data_signature(self):
    method test_assemble_training_data_signature (line 52) | def test_assemble_training_data_signature(self):
  class TestRLMetrics (line 63) | class TestRLMetrics:
    method test_metrics_importable (line 64) | def test_metrics_importable(self):
    method test_metric_util_importable (line 73) | def test_metric_util_importable(self):
  class TestRLRollouts (line 85) | class TestRLRollouts:
    method test_do_single_rollout_importable (line 86) | def test_do_single_rollout_importable(self):

FILE: tests/downstream_compat/test_rl_types.py
  class TestTypeAliases (line 31) | class TestTypeAliases:
    method test_action_is_list_int (line 32) | def test_action_is_list_int(self):
    method test_metrics_is_dict (line 36) | def test_metrics_is_dict(self):
    method test_logs_is_dict (line 40) | def test_logs_is_dict(self):
    method test_observation_alias_exists (line 44) | def test_observation_alias_exists(self):
  class TestStepResult (line 53) | class TestStepResult:
    method test_fields (line 54) | def test_fields(self):
    method test_metrics_defaults_to_empty (line 66) | def test_metrics_defaults_to_empty(self):
  class TestTransition (line 82) | class TestTransition:
    method test_fields (line 83) | def test_fields(self):
    method test_constructable (line 88) | def test_constructable(self):
  class TestEnv (line 103) | class TestEnv:
    method test_is_abstract (line 104) | def test_is_abstract(self):
    method test_has_initial_observation (line 107) | def test_has_initial_observation(self):
    method test_initial_observation_signature (line 111) | def test_initial_observation_signature(self):
    method test_has_step (line 116) | def test_has_step(self):
    method test_step_signature (line 120) | def test_step_signature(self):
  class TestTrajectory (line 131) | class TestTrajectory:
    method test_fields (line 132) | def test_fields(self):
    method test_frozen (line 137) | def test_frozen(self):
  class TestTrajectoryGroup (line 141) | class TestTrajectoryGroup:
    method test_has_trajectories_field (line 142) | def test_has_trajectories_field(self):
    method test_get_total_rewards_method (line 147) | def test_get_total_rewards_method(self):
  class TestEnvGroupBuilder (line 157) | class TestEnvGroupBuilder:
    method test_is_abstract (line 158) | def test_is_abstract(self):
    method test_make_envs_signature (line 161) | def test_make_envs_signature(self):
    method test_compute_group_rewards_signature (line 168) | def test_compute_group_rewards_signature(self):
    method test_logging_tags_signature (line 175) | def test_logging_tags_signature(self):
  class TestRLDataset (line 187) | class TestRLDataset:
    method test_is_abstract (line 188) | def test_is_abstract(self):
    method test_get_batch_signature (line 191) | def test_get_batch_signature(self):
    method test_has_len (line 197) | def test_has_len(self):
  class TestRLDatasetBuilder (line 201) | class TestRLDatasetBuilder:
    method test_has_call (line 202) | def test_has_call(self):

FILE: tests/downstream_compat/test_supervised.py
  class TestSupervisedTypes (line 20) | class TestSupervisedTypes:
    method test_supervised_dataset_has_get_batch (line 21) | def test_supervised_dataset_has_get_batch(self):
    method test_supervised_dataset_has_len (line 24) | def test_supervised_dataset_has_len(self):
    method test_supervised_dataset_builder_is_callable (line 27) | def test_supervised_dataset_builder_is_callable(self):
    method test_chat_dataset_builder_is_subclass (line 30) | def test_chat_dataset_builder_is_subclass(self):
    method test_chat_dataset_builder_common_config_exists (line 33) | def test_chat_dataset_builder_common_config_exists(self):
  class TestSupervisedData (line 37) | class TestSupervisedData:
    method test_conversation_to_datum_signature (line 38) | def test_conversation_to_datum_signature(self):
    method test_from_conversation_file_builder_exists (line 45) | def test_from_conversation_file_builder_exists(self):
    method test_supervised_dataset_from_hf_exists (line 48) | def test_supervised_dataset_from_hf_exists(self):
    method test_streaming_supervised_dataset_from_hf_exists (line 51) | def test_streaming_supervised_dataset_from_hf_exists(self):

FILE: tests/downstream_compat/test_tokenizer_utils.py
  class TestTokenizerRegistryAPI (line 16) | class TestTokenizerRegistryAPI:
    method test_get_tokenizer_signature (line 17) | def test_get_tokenizer_signature(self):
    method test_register_tokenizer_signature (line 22) | def test_register_tokenizer_signature(self):
    method test_unregister_tokenizer_signature (line 27) | def test_unregister_tokenizer_signature(self):
    method test_is_tokenizer_registered_signature (line 32) | def test_is_tokenizer_registered_signature(self):
    method test_get_registered_tokenizer_names_callable (line 37) | def test_get_registered_tokenizer_names_callable(self):
    method test_tokenizer_type_alias_exists (line 40) | def test_tokenizer_type_alias_exists(self):
    method test_register_and_unregister_roundtrip (line 43) | def test_register_and_unregister_roundtrip(self):

FILE: tests/downstream_compat/test_utils.py
  class TestLoggerHierarchy (line 32) | class TestLoggerHierarchy:
    method test_logger_is_abstract (line 33) | def test_logger_is_abstract(self):
    method test_json_logger_is_subclass (line 36) | def test_json_logger_is_subclass(self):
    method test_pretty_print_logger_is_subclass (line 39) | def test_pretty_print_logger_is_subclass(self):
    method test_multiplex_logger_is_subclass (line 42) | def test_multiplex_logger_is_subclass(self):
    method test_setup_logging_signature (line 45) | def test_setup_logging_signature(self):
    method test_configure_logging_module_signature (line 53) | def test_configure_logging_module_signature(self):
    method test_dump_config_callable (line 58) | def test_dump_config_callable(self):
  class TestMiscUtils (line 67) | class TestMiscUtils:
    method test_dict_mean (line 68) | def test_dict_mean(self):
    method test_all_same_true (line 73) | def test_all_same_true(self):
    method test_all_same_false (line 76) | def test_all_same_false(self):
    method test_split_list (line 79) | def test_split_list(self):
    method test_concat_lists (line 83) | def test_concat_lists(self):
    method test_not_none (line 87) | def test_not_none(self):
    method test_safezip (line 90) | def test_safezip(self):
    method test_timed_is_context_manager (line 94) | def test_timed_is_context_manager(self):
  class TestTraceImports (line 103) | class TestTraceImports:
    method test_trace_importable (line 104) | def test_trace_importable(self):
    method test_logtree_importable (line 111) | def test_logtree_importable(self):

FILE: tests/helpers.py
  function run_recipe (line 17) | def run_recipe(

FILE: tests/recipes/test_recipe_chat_sl.py
  function test_chat_sl (line 10) | def test_chat_sl():
  function test_chat_sl_resume (line 23) | def test_chat_sl_resume():

FILE: tests/recipes/test_recipe_dpo.py
  function test_dpo (line 7) | def test_dpo():

FILE: tests/recipes/test_recipe_guess_number.py
  function test_guess_number (line 7) | def test_guess_number():

FILE: tests/recipes/test_recipe_math_rl.py
  function test_math_rl_sync (line 9) | def test_math_rl_sync():
  function test_math_rl_async (line 23) | def test_math_rl_async():
  function test_math_rl_stream_minibatch (line 38) | def test_math_rl_stream_minibatch():

FILE: tests/recipes/test_recipe_off_policy_reasoning.py
  function test_off_policy_reasoning (line 7) | def test_off_policy_reasoning():

FILE: tests/recipes/test_recipe_on_policy_distillation.py
  function test_on_policy_distillation (line 7) | def test_on_policy_distillation():

FILE: tests/recipes/test_recipe_on_policy_multi_teacher.py
  function test_on_policy_multi_teacher (line 7) | def test_on_policy_multi_teacher():

FILE: tests/recipes/test_recipe_rlhf_pipeline.py
  function test_rlhf_pipeline (line 7) | def test_rlhf_pipeline():

FILE: tests/recipes/test_recipe_shorter.py
  function test_shorter (line 7) | def test_shorter():

FILE: tests/recipes/test_recipe_text_arena.py
  function test_text_arena (line 7) | def test_text_arena():

FILE: tests/recipes/test_recipe_twenty_questions.py
  function test_twenty_questions (line 7) | def test_twenty_questions():

FILE: tests/recipes/test_recipe_vlm_classifier.py
  function test_vlm_classifier (line 7) | def test_vlm_classifier():

FILE: tests/test_inspect_eval.py
  function _create_api (line 42) | def _create_api(
  function _generate (line 55) | async def _generate(api: InspectAPIFromTinkerSampling) -> InspectAIModel...
  function _log_response (line 59) | def _log_response(result: InspectAIModelOutput) -> None:
  function test_thinking_model_include_reasoning (line 79) | def test_thinking_model_include_reasoning():
  function test_thinking_model_exclude_reasoning (line 96) | def test_thinking_model_exclude_reasoning():
  function test_non_thinking_model_include_reasoning (line 108) | def test_non_thinking_model_include_reasoning():
  function test_non_thinking_model_exclude_reasoning (line 124) | def test_non_thinking_model_exclude_reasoning():

FILE: tests/test_modal_sandbox.py
  function sandbox (line 34) | async def sandbox():
  function _timed (line 41) | async def _timed(coro):
  function test_write_file_latency (line 51) | async def test_write_file_latency(sandbox):
  function test_write_file_binary (line 81) | async def test_write_file_binary(sandbox):
  function test_cleanup_after_timeout (line 105) | async def test_cleanup_after_timeout():
  function test_cleanup_after_terminate (line 120) | async def test_cleanup_after_terminate():
  function test_cleanup_after_command_timeout (line 134) | async def test_cleanup_after_command_timeout():

FILE: tests/third_party/test_litellm.py
  function tinker_provider (line 18) | def tinker_provider():
  function test_acompletion_basic (line 40) | async def test_acompletion_basic(tinker_provider) -> None:
  function test_acompletion_with_system_message (line 71) | async def test_acompletion_with_system_message(tinker_provider) -> None:
  function test_acompletion_multi_turn (line 89) | async def test_acompletion_multi_turn(tinker_provider) -> None:
  function test_completion_sync (line 107) | def test_completion_sync(tinker_provider) -> None:
  function test_set_client_with_finetuned_checkpoint (line 130) | async def test_set_client_with_finetuned_checkpoint(tinker_provider) -> ...
  function test_register_idempotent (line 166) | def test_register_idempotent(tinker_provider) -> None:

FILE: tests/validate_temperature_logprobs.py
  function _default_temperatures (line 21) | def _default_temperatures() -> list[float]:
  class Config (line 26) | class Config:
  function _sample_next_token (line 42) | async def _sample_next_token(
  function _collect_sampled_token_logprobs (line 65) | async def _collect_sampled_token_logprobs(
  function _compute_logp1_for_tokens (line 93) | async def _compute_logp1_for_tokens(
  function _pairwise_ratio_metrics (line 112) | def _pairwise_ratio_metrics(
  function _sample_sequence_oneshot (line 152) | async def _sample_sequence_oneshot(
  function _resample_tokens_individually (line 177) | async def _resample_tokens_individually(
  function _compare_logprobs (line 221) | def _compare_logprobs(
  function validate_sequence_consistency (line 243) | async def validate_sequence_consistency(
  function main (line 308) | async def main(cfg: Config) -> None:

FILE: tests/weights/test_download.py
  class TestDownloadIntegration (line 16) | class TestDownloadIntegration:
    method _get_test_tinker_path (line 19) | def _get_test_tinker_path(self) -> str:
    method test_download_and_extract (line 32) | def test_download_and_extract(self):

FILE: tests/weights/test_export.py
  function _save_model_to_disk (line 41) | def _save_model_to_disk(
  function _save_expert_adapter (line 55) | def _save_expert_adapter(
  function _save_dense_adapter (line 78) | def _save_dense_adapter(
  function _run_build_and_reload (line 98) | def _run_build_and_reload(
  function _make_tiny_gpt_oss_config (line 121) | def _make_tiny_gpt_oss_config() -> PretrainedConfig:
  class TestGptOssFusedInterleaved (line 135) | class TestGptOssFusedInterleaved:
    method test_gate_and_up_deltas_in_correct_interleaved_slots (line 140) | def test_gate_and_up_deltas_in_correct_interleaved_slots(self):
    method test_up_only_does_not_modify_gate_slots (line 170) | def test_up_only_does_not_modify_gate_slots(self):
  function _make_tiny_qwen3_vl_moe_config (line 214) | def _make_tiny_qwen3_vl_moe_config() -> PretrainedConfig:
  class TestQwen3VlMoeFusedConcatenated (line 231) | class TestQwen3VlMoeFusedConcatenated:
    method test_gate_and_up_deltas_in_correct_halves (line 239) | def test_gate_and_up_deltas_in_correct_halves(self):
    method test_up_only_does_not_modify_gate_half (line 280) | def test_up_only_does_not_modify_gate_half(self):
  function _make_tiny_qwen3_moe_config (line 332) | def _make_tiny_qwen3_moe_config() -> PretrainedConfig:
  class TestQwen3MoeSeparateExperts (line 344) | class TestQwen3MoeSeparateExperts:
    method test_per_expert_weights_updated (line 347) | def test_per_expert_weights_updated(self):
  function _make_tiny_deepseek_v31_config (line 389) | def _make_tiny_deepseek_v31_config() -> PretrainedConfig:
  function _copy_hf_files (line 407) | def _copy_hf_files(repo_id: str, output_path: Path, file_names: tuple[st...
  function _save_mixed_deepseek_adapter (line 414) | def _save_mixed_deepseek_adapter(
  function _reshard_saved_model (line 454) | def _reshard_saved_model(
  function _load_saved_state_dict (line 481) | def _load_saved_state_dict(output_path: Path) -> dict[str, torch.Tensor]:
  class TestDeepSeekV31FP8Export (line 489) | class TestDeepSeekV31FP8Export:
    method test_dense_weights_change_but_only_routed_experts_are_quantized_to_fp8 (line 495) | def test_dense_weights_change_but_only_routed_experts_are_quantized_to...
  function _make_tiny_qwen3_dense_config (line 642) | def _make_tiny_qwen3_dense_config() -> PretrainedConfig:
  class TestQwen3Dense (line 654) | class TestQwen3Dense:
    method test_dense_linear_merge (line 657) | def test_dense_linear_merge(self):

FILE: tests/weights/test_lifecycle.py
  class TestFullLifecycle (line 29) | class TestFullLifecycle:
    method _train_and_save (line 38) | def _train_and_save(self, log_path: str) -> str:
    method test_train_download_build (line 76) | def test_train_download_build(self):

FILE: tests/weights/test_publish.py
  function _hf_username (line 21) | def _hf_username() -> str:
  function _create_dummy_model_dir (line 34) | def _create_dummy_model_dir(path: Path) -> None:
  class TestPublishToHfHubIntegration (line 42) | class TestPublishToHfHubIntegration:
    method test_upload_and_verify (line 43) | def test_upload_and_verify(self):

FILE: tests/weights/test_quantized.py
  function _deepseek_config (line 29) | def _deepseek_config() -> dict:
  function _deepseek_state_dict (line 44) | def _deepseek_state_dict() -> dict[str, torch.Tensor]:
  function _split_into_shards (line 95) | def _split_into_shards(
  function _create_deepseek_model (line 114) | def _create_deepseek_model(model_dir: Path, shards: dict[str, dict[str, ...
  function _create_deepseek_adapter (line 149) | def _create_deepseek_adapter(adapter_dir: Path) -> None:
  function deepseek_model (line 177) | def deepseek_model(tmp_path: Path):
  function _load_output (line 190) | def _load_output(output_dir: Path) -> dict[str, torch.Tensor]:
  class TestDenseWeights (line 210) | class TestDenseWeights:
    method test_dense_weights_change_after_merge (line 211) | def test_dense_weights_change_after_merge(self, tmp_path: Path, deepse...
    method test_dense_weights_stay_bf16 (line 228) | def test_dense_weights_stay_bf16(self, tmp_path: Path, deepseek_model):
    method test_untargeted_dense_unchanged (line 243) | def test_untargeted_dense_unchanged(self, tmp_path: Path, deepseek_mod...
  class TestRoutedExperts (line 265) | class TestRoutedExperts:
    method test_routed_experts_change_after_merge (line 266) | def test_routed_experts_change_after_merge(self, tmp_path: Path, deeps...
    method test_routed_experts_quantized_to_fp8 (line 287) | def test_routed_experts_quantized_to_fp8(self, tmp_path: Path, deepsee...
    method test_expert_has_float32_scale (line 303) | def test_expert_has_float32_scale(self, tmp_path: Path, deepseek_model):
    method test_scale_shape_matches_block_structure (line 320) | def test_scale_shape_matches_block_structure(self, tmp_path: Path, dee...
  class TestSharedExperts (line 345) | class TestSharedExperts:
    method test_shared_experts_stay_bf16 (line 346) | def test_shared_experts_stay_bf16(self, tmp_path: Path, deepseek_model):
    method test_shared_experts_no_scale (line 362) | def test_shared_experts_no_scale(self, tmp_path: Path, deepseek_model):
  class TestShardLayout (line 383) | class TestShardLayout:
    method test_two_shard_input_produces_two_shard_output (line 384) | def test_two_shard_input_produces_two_shard_output(self, tmp_path: Pat...
    method test_index_consistent (line 403) | def test_index_consistent(self, tmp_path: Path, deepseek_model):
  class TestConfigMetadata (line 434) | class TestConfigMetadata:
    method test_compression_config_present (line 435) | def test_compression_config_present(self, tmp_path: Path, deepseek_mod...
    method test_quantization_config_absent (line 453) | def test_quantization_config_absent(self, tmp_path: Path, deepseek_mod...
    method test_ignore_list_correct (line 472) | def test_ignore_list_correct(self, tmp_path: Path, deepseek_model):
    method test_model_code_files_copied (line 494) | def test_model_code_files_copied(self, tmp_path: Path, deepseek_model):
  class TestResume (line 515) | class TestResume:
    method test_crash_after_shard_1_shows_in_progress (line 516) | def test_crash_after_shard_1_shows_in_progress(self, tmp_path: Path, d...
    method test_resume_completes (line 552) | def test_resume_completes(self, tmp_path: Path, deepseek_model):
  class TestApiValidation (line 601) | class TestApiValidation:
    method test_quantize_none_does_standard_merge (line 602) | def test_quantize_none_does_standard_merge(self, tmp_path: Path, deeps...
    method test_quantize_without_serving_format_raises (line 621) | def test_quantize_without_serving_format_raises(self, tmp_path: Path):
    method test_serving_format_without_quantize_raises (line 630) | def test_serving_format_without_quantize_raises(self, tmp_path: Path):
    method test_quantize_with_wrong_dtype_raises (line 639) | def test_quantize_with_wrong_dtype_raises(self, tmp_path: Path):
    method test_unknown_quantize_raises (line 650) | def test_unknown_quantize_raises(self, tmp_path: Path):
    method test_unknown_serving_format_raises (line 660) | def test_unknown_serving_format_raises(self, tmp_path: Path):

FILE: tests/weights/test_quantized_equivalence.py
  function _create_test_model (line 49) | def _create_test_model(model_dir: Path) -> dict[str, torch.Tensor]:
  function _create_test_adapter (line 161) | def _create_test_adapter(adapter_dir: Path) -> None:
  function _load_all_output_tensors (line 195) | def _load_all_output_tensors(output_dir: Path) -> dict[str, torch.Tensor]:
  function equivalence_model (line 204) | def equivalence_model(tmp_path: Path):
  class TestScaleTensorNaming (line 241) | class TestScaleTensorNaming:
    method test_routed_expert_scales_use_weight_scale_name (line 242) | def test_routed_expert_scales_use_weight_scale_name(self, equivalence_...
    method test_no_weight_scale_inv_in_output (line 253) | def test_no_weight_scale_inv_in_output(self, equivalence_model):
  class TestCompressedTensorsConfig (line 267) | class TestCompressedTensorsConfig:
    method test_weights_strategy_is_block (line 268) | def test_weights_strategy_is_block(self, equivalence_model):
    method test_block_structure_present (line 276) | def test_block_structure_present(self, equivalence_model):
    method test_input_activations_dynamic (line 284) | def test_input_activations_dynamic(self, equivalence_model):
    method test_quant_method_compressed_tensors (line 293) | def test_quant_method_compressed_tensors(self, equivalence_model):
    method test_format_float_quantized (line 297) | def test_format_float_quantized(self, equivalence_model):
    method test_quantization_status_compressed (line 301) | def test_quantization_status_compressed(self, equivalence_model):
    method test_quantization_config_absent (line 305) | def test_quantization_config_absent(self, equivalence_model):
  class TestIgnoreList (line 314) | class TestIgnoreList:
    method test_dense_projections_in_ignore (line 315) | def test_dense_projections_in_ignore(self, equivalence_model):
    method test_shared_experts_in_ignore (line 323) | def test_shared_experts_in_ignore(self, equivalence_model):
    method test_routed_experts_not_in_ignore (line 329) | def test_routed_experts_not_in_ignore(self, equivalence_model):
    method test_lm_head_in_ignore (line 335) | def test_lm_head_in_ignore(self, equivalence_model):
  class TestWeightDtypes (line 346) | class TestWeightDtypes:
    method test_dense_weights_bf16 (line 347) | def test_dense_weights_bf16(self, equivalence_model):
    method test_routed_experts_fp8 (line 351) | def test_routed_experts_fp8(self, equivalence_model):
    method test_routed_expert_scales_float32 (line 358) | def test_routed_expert_scales_float32(self, equivalence_model):
    method test_shared_experts_bf16 (line 365) | def test_shared_experts_bf16(self, equivalence_model):
    method test_dense_weight_changed_after_merge (line 371) | def test_dense_weight_changed_after_merge(self, equivalence_model):
    method test_untargeted_embedding_unchanged (line 378) | def test_untargeted_embedding_unchanged(self, equivalence_model):
  class TestScaleShapes (line 389) | class TestScaleShapes:
    method test_scale_shape_matches_blockwise_quantization (line 390) | def test_scale_shape_matches_blockwise_quantization(self, equivalence_...
    method test_down_proj_scale_shape (line 398) | def test_down_proj_scale_shape(self, equivalence_model):
  class TestShardConsistency (line 411) | class TestShardConsistency:
    method test_two_shard_output (line 412) | def test_two_shard_output(self, equivalence_model):
    method test_index_covers_all_tensors (line 416) | def test_index_covers_all_tensors(self, equivalence_model):
    method test_all_shards_referenced_and_exist (line 422) | def test_all_shards_referenced_and_exist(self, equivalence_model):
    method test_scale_tensors_in_same_shard_as_weights (line 428) | def test_scale_tensors_in_same_shard_as_weights(self, equivalence_model):
  class TestCustomFiles (line 446) | class TestCustomFiles:
    method test_configuration_deepseek_copied (line 447) | def test_configuration_deepseek_copied(self, equivalence_model):
    method test_modeling_deepseek_copied (line 450) | def test_modeling_deepseek_copied(self, equivalence_model):

FILE: tests/weights/test_strategy_consistency.py
  function _make_tiny_qwen3_dense_config (line 23) | def _make_tiny_qwen3_dense_config() -> PretrainedConfig:
  function _save_model_to_disk (line 35) | def _save_model_to_disk(config: PretrainedConfig, path: Path) -> None:
  function _save_adapter (line 45) | def _save_adapter(path: Path, *, model_path: Path) -> None:
  function _load_all_tensors (line 75) | def _load_all_tensors(output_dir: Path) -> dict[str, torch.Tensor]:
  class TestStrategyConsistency (line 89) | class TestStrategyConsistency:
    method test_full_and_shard_produce_identical_weights (line 92) | def test_full_and_shard_produce_identical_weights(self):

FILE: tinker_cookbook/chat_app/tinker_chat_cli.py
  class Config (line 28) | class Config:
  class ChatSession (line 37) | class ChatSession:
    method __init__ (line 40) | def __init__(
    method add_user_message (line 55) | def add_user_message(self, content: str):
    method add_assistant_message (line 59) | def add_assistant_message(self, content: str):
    method clear_history (line 63) | def clear_history(self):
    method generate_response (line 67) | async def generate_response(self) -> str:
  function main (line 101) | async def main(config: Config):

FILE: tinker_cookbook/checkpoint_utils.py
  class CheckpointRecord (line 25) | class CheckpointRecord:
    method __post_init__ (line 45) | def __post_init__(self) -> None:
    method to_dict (line 53) | def to_dict(self) -> dict[str, Any]:
    method from_dict (line 70) | def from_dict(cls, d: dict[str, Any]) -> "CheckpointRecord":
    method has (line 86) | def has(self, key: str) -> bool:
    method get (line 92) | def get(self, key: str, default: Any = _MISSING) -> Any:
  function add_renderer_name_to_user_metadata (line 116) | def add_renderer_name_to_user_metadata(
  function _handle_checkpoint_renderer_check_result (line 124) | def _handle_checkpoint_renderer_check_result(
  function get_renderer_name_from_checkpoint (line 147) | def get_renderer_name_from_checkpoint(
  function get_renderer_name_from_checkpoint_async (line 164) | async def get_renderer_name_from_checkpoint_async(
  function resolve_renderer_name_from_checkpoint_or_default (line 181) | def resolve_renderer_name_from_checkpoint_or_default(
  function resolve_renderer_name_from_checkpoint_or_default_async (line 213) | async def resolve_renderer_name_from_checkpoint_or_default_async(
  function check_renderer_name_for_checkpoint (line 242) | def check_renderer_name_for_checkpoint(
  function check_renderer_name_for_checkpoint_async (line 262) | async def check_renderer_name_for_checkpoint_async(
  function load_checkpoints_file (line 291) | def load_checkpoints_file(log_dir: str) -> list[CheckpointRecord]:
  function get_last_checkpoint (line 303) | def get_last_checkpoint(log_dir: str, required_key: str = "state_path") ...
  function save_checkpoint_async (line 330) | async def save_checkpoint_async(
  function save_checkpoint (line 373) | def save_checkpoint(

FILE: tinker_cookbook/checkpoint_utils_test.py
  function _write_checkpoints_jsonl (line 14) | def _write_checkpoints_jsonl(log_dir: str, records: list[dict]) -> None:
  function test_load_checkpoints_file_missing_dir (line 21) | def test_load_checkpoints_file_missing_dir():
  function test_load_checkpoints_file_missing_file (line 27) | def test_load_checkpoints_file_missing_file():
  function test_load_checkpoints_file_reads_records (line 34) | def test_load_checkpoints_file_reads_records():
  function test_get_last_checkpoint_returns_last (line 51) | def test_get_last_checkpoint_returns_last():
  function test_get_last_checkpoint_returns_none_when_empty (line 67) | def test_get_last_checkpoint_returns_none_when_empty():
  function test_get_last_checkpoint_returns_none_when_key_missing (line 74) | def test_get_last_checkpoint_returns_none_when_key_missing():
  function test_load_checkpoints_file_without_batch (line 85) | def test_load_checkpoints_file_without_batch():
  function test_checkpoint_record_extra_round_trips (line 102) | def test_checkpoint_record_extra_round_trips():
  function test_checkpoint_record_name_only (line 115) | def test_checkpoint_record_name_only():
  function test_checkpoint_record_get_known_field (line 122) | def test_checkpoint_record_get_known_field():
  function test_checkpoint_record_get_extra_field (line 133) | def test_checkpoint_record_get_extra_field():
  function test_checkpoint_record_has_extra_field (line 142) | def test_checkpoint_record_has_extra_field():
  function test_checkpoint_record_extra_overlap_with_known_keys (line 151) | def test_checkpoint_record_extra_overlap_with_known_keys():

FILE: tinker_cookbook/cli_utils.py
  function check_log_dir (line 13) | def check_log_dir(log_dir: str, behavior_if_exists: LogdirBehavior):

FILE: tinker_cookbook/cli_utils_test.py
  function test_check_log_dir_nonexistent_is_noop (line 11) | def test_check_log_dir_nonexistent_is_noop():
  function test_check_log_dir_resume_keeps_directory (line 16) | def test_check_log_dir_resume_keeps_directory():
  function test_check_log_dir_delete_removes_directory (line 25) | def test_check_log_dir_delete_removes_directory():
  function test_check_log_dir_raise_raises (line 35) | def test_check_log_dir_raise_raises():

FILE: tinker_cookbook/completers.py
  class TokensWithLogprobs (line 22) | class TokensWithLogprobs:
    method logprobs (line 27) | def logprobs(self) -> list[float]:
  class TokenCompleter (line 33) | class TokenCompleter:
    method __call__ (line 34) | async def __call__(
  class MessageCompleter (line 40) | class MessageCompleter:
    method __call__ (line 42) | async def __call__(self, messages: list[renderers.Message]) -> rendere...
  class TinkerTokenCompleter (line 50) | class TinkerTokenCompleter(TokenCompleter):
    method __call__ (line 59) | async def __call__(
  class TinkerMessageCompleter (line 82) | class TinkerMessageCompleter(MessageCompleter):
    method __init__ (line 85) | def __init__(
    method __call__ (line 102) | async def __call__(self, messages: list[renderers.Message]) -> rendere...

FILE: tinker_cookbook/display.py
  function to_ints (line 11) | def to_ints(chunk: tinker.ModelInputChunk, tokenizer: Tokenizer):
  function colorize_example (line 19) | def colorize_example(datum: tinker.Datum, tokenizer: Tokenizer, key: str...
  function format_trajectory (line 27) | def format_trajectory(

FILE: tinker_cookbook/distillation/datasets.py
  class TeacherConfig (line 32) | class TeacherConfig:
  class DistillationDatasetConfig (line 40) | class DistillationDatasetConfig:
  class CompositeDataset (line 48) | class CompositeDataset:
    method __init__ (line 51) | def __init__(self, datasets: list[RLDataset], groups_per_batch_list: l...
    method __len__ (line 60) | def __len__(self) -> int:
    method get_batch (line 63) | def get_batch(self, i_batch: int) -> tuple[list[EnvGroupBuilder], list...
  class PromptOnlyEnv (line 84) | class PromptOnlyEnv(ProblemEnv):
    method __init__ (line 87) | def __init__(
    method get_question (line 97) | def get_question(self) -> str:
    method check_format (line 100) | def check_format(self, sample_str: str) -> bool:
    method check_answer (line 104) | def check_answer(self, sample_str: str) -> bool:
    method get_reference_answer (line 108) | def get_reference_answer(self) -> str:
    method step (line 112) | async def step(self, action: Action) -> StepResult:
  class PromptOnlyDataset (line 124) | class PromptOnlyDataset(RLDataset):
    method __init__ (line 127) | def __init__(
    method _truncate_prompt (line 147) | def _truncate_prompt(self, prompt: str) -> str:
    method get_batch (line 158) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 176) | def __len__(self) -> int:
  function load_deepmath_prompts (line 180) | def load_deepmath_prompts(split: Literal["train", "test"] = "train") -> ...
  function load_tulu3_prompts (line 192) | def load_tulu3_prompts() -> list[str] | None:
  class PromptOnlyDatasetBuilder (line 222) | class PromptOnlyDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 233) | async def __call__(self) -> tuple[PromptOnlyDataset, PromptOnlyDataset...

FILE: tinker_cookbook/distillation/train_on_policy.py
  function incorporate_kl_penalty (line 50) | async def incorporate_kl_penalty(
  class Config (line 130) | class Config:
  function prepare_minibatch (line 172) | async def prepare_minibatch(
  function do_train_step_and_get_sampling_client (line 227) | async def do_train_step_and_get_sampling_client(
  function do_sync_training (line 279) | async def do_sync_training(
  function main (line 366) | async def main(

FILE: tinker_cookbook/eval/custom_evaluators.py
  class CustomEvaluator (line 13) | class CustomEvaluator(SamplingClientEvaluator):
    method __init__ (line 18) | def __init__(
    method __call__ (line 36) | async def __call__(self, sampling_client: tinker.SamplingClient) -> di...
  function grader_fn (line 82) | def grader_fn(response: str, target: str) -> bool:
  function main (line 99) | async def main():

FILE: tinker_cookbook/eval/custom_inspect_task.py
  function example_lm_as_judge (line 52) | def example_lm_as_judge() -> Task:

FILE: tinker_cookbook/eval/evaluators.py
  class TrainingClientEvaluator (line 10) | class TrainingClientEvaluator:
    method __call__ (line 15) | async def __call__(self, training_client: tinker.TrainingClient) -> di...
  class SamplingClientEvaluator (line 19) | class SamplingClientEvaluator:
    method __call__ (line 24) | async def __call__(self, sampling_client: tinker.SamplingClient) -> di...

FILE: tinker_cookbook/eval/inspect_evaluators.py
  class InspectEvaluatorBuilder (line 19) | class InspectEvaluatorBuilder:
    method __call__ (line 60) | def __call__(self) -> SamplingClientEvaluator:
  class InspectEvaluator (line 64) | class InspectEvaluator(SamplingClientEvaluator):
    method __init__ (line 69) | def __init__(self, config: InspectEvaluatorBuilder):
    method __call__ (line 77) | async def __call__(self, sampling_client: tinker.SamplingClient) -> di...

FILE: tinker_cookbook/eval/inspect_utils.py
  function get_model_usage (line 36) | def get_model_usage(
  function convert_inspect_messages (line 51) | def convert_inspect_messages(messages: list[InspectAIChatMessage]) -> li...
  function _message_to_inspect_content (line 81) | def _message_to_inspect_content(
  class InspectAPIFromTinkerSampling (line 97) | class InspectAPIFromTinkerSampling(InspectAIModelAPI):
    method __init__ (line 105) | def __init__(
    method generate (line 143) | async def generate(

FILE: tinker_cookbook/eval/inspect_utils_test.py
  function test_message_to_inspect_content_with_thinking (line 19) | def test_message_to_inspect_content_with_thinking():
  function test_message_to_inspect_content_string_content (line 35) | def test_message_to_inspect_content_string_content():
  function test_message_to_inspect_content_text_only_parts (line 43) | def test_message_to_inspect_content_text_only_parts():
  function test_message_to_inspect_content_empty_thinking (line 54) | def test_message_to_inspect_content_empty_thinking():
  function test_convert_inspect_messages_string_content (line 71) | def test_convert_inspect_messages_string_content():
  function test_convert_inspect_messages_structured_assistant (line 84) | def test_convert_inspect_messages_structured_assistant():
  function test_convert_inspect_messages_structured_non_assistant_flattens (line 104) | def test_convert_inspect_messages_structured_non_assistant_flattens():

FILE: tinker_cookbook/eval/run_inspect_evals.py
  class Config (line 15) | class Config(InspectEvaluatorBuilder):
  function main (line 19) | async def main(config: Config):

FILE: tinker_cookbook/exceptions.py
  class TinkerCookbookError (line 51) | class TinkerCookbookError(Exception):
  class ConfigurationError (line 64) | class ConfigurationError(TinkerCookbookError, ValueError):
  class DataError (line 84) | class DataError(TinkerCookbookError, ValueError):
  class DataFormatError (line 92) | class DataFormatError(DataError):
  class DataValidationError (line 101) | class DataValidationError(DataError):
  class RendererError (line 115) | class RendererError(TinkerCookbookError, ValueError):
  class TrainingError (line 129) | class TrainingError(TinkerCookbookError, RuntimeError):
  class CheckpointError (line 138) | class CheckpointError(TrainingError):
  class AllTrajectoriesFailedError (line 146) | class AllTrajectoriesFailedError(TrainingError):
  class WeightsError (line 159) | class WeightsError(TinkerCookbookError):
  class WeightsDownloadError (line 168) | class WeightsDownloadError(WeightsError, RuntimeError):
  class WeightsMergeError (line 177) | class WeightsMergeError(WeightsError, ValueError):
  class SandboxError (line 192) | class SandboxError(TinkerCookbookError, RuntimeError):

FILE: tinker_cookbook/exceptions_test.py
  function test_all_exceptions_are_tinker_cookbook_errors (line 47) | def test_all_exceptions_are_tinker_cookbook_errors(exc_cls: type[Excepti...
  function test_stdlib_backward_compatibility (line 75) | def test_stdlib_backward_compatibility(exc_cls: type[Exception], stdlib_...
  function test_data_subtypes (line 86) | def test_data_subtypes():
  function test_training_subtypes (line 91) | def test_training_subtypes():
  function test_weights_subtypes (line 95) | def test_weights_subtypes():
  function test_sandbox_terminated_error_is_sandbox_error (line 105) | def test_sandbox_terminated_error_is_sandbox_error():
  function test_exceptions_are_picklable (line 124) | def test_exceptions_are_picklable(exc_cls: type[Exception]):
  function test_exceptions_all_is_complete (line 138) | def test_exceptions_all_is_complete():

FILE: tinker_cookbook/hyperparam_utils.py
  function _list_param_shapes_from_safetensors_remote (line 17) | def _list_param_shapes_from_safetensors_remote(
  function get_lora_lr_over_full_finetune_lr (line 66) | def get_lora_lr_over_full_finetune_lr(model_name: str, lora_alpha: int =...
  function _get_hidden_size (line 75) | def _get_hidden_size(model_name: str) -> int:
  function get_lora_param_count (line 100) | def get_lora_param_count(
  function get_lr (line 154) | def get_lr(model_name: str, is_lora: bool = True) -> float:
  function get_full_finetune_param_count (line 184) | def get_full_finetune_param_count(model_name: str) -> float:
  function get_full_finetune_lr_multiplier (line 191) | def get_full_finetune_lr_multiplier(model_name: str):
  function get_lora_lr_multiplier (line 195) | def get_lora_lr_multiplier(model_name: str):

FILE: tinker_cookbook/image_processing_utils.py
  function get_image_processor (line 27) | def get_image_processor(model_name: str) -> ImageProcessor:
  function resize_image (line 44) | def resize_image(image: Image.Image, max_size: int) -> Image.Image:

FILE: tinker_cookbook/image_processing_utils_test.py
  function _clear_cache (line 9) | def _clear_cache() -> None:
  function test_kimi_k25_trusts_remote_code_without_env (line 15) | def test_kimi_k25_trusts_remote_code_without_env(
  function test_no_trust_remote_code_by_default (line 30) | def test_no_trust_remote_code_by_default(
  function test_env_var_enables_trust_remote_code (line 44) | def test_env_var_enables_trust_remote_code(

FILE: tinker_cookbook/model_info.py
  class ModelAttributes (line 33) | class ModelAttributes:
  function get_llama_info (line 43) | def get_llama_info() -> dict[str, ModelAttributes]:
  function get_qwen_info (line 58) | def get_qwen_info() -> dict[str, ModelAttributes]:
  function get_deepseek_info (line 91) | def get_deepseek_info() -> dict[str, ModelAttributes]:
  function get_gpt_oss_info (line 100) | def get_gpt_oss_info() -> dict[str, ModelAttributes]:
  function get_moonshot_info (line 109) | def get_moonshot_info() -> dict[str, ModelAttributes]:
  function get_nvidia_info (line 118) | def get_nvidia_info() -> dict[str, ModelAttributes]:
  function get_model_attributes (line 130) | def get_model_attributes(model_name: str) -> ModelAttributes:
  function get_recommended_renderer_names (line 150) | def get_recommended_renderer_names(model_name: str) -> list[str]:
  function get_recommended_renderer_name (line 159) | def get_recommended_renderer_name(model_name: str) -> str:
  function warn_if_renderer_not_recommended (line 166) | def warn_if_renderer_not_recommended(model_name: str, renderer_name: str...

FILE: tinker_cookbook/model_info_test.py
  class TestWarnIfRendererNotRecommended (line 8) | class TestWarnIfRendererNotRecommended:
    method test_no_warning_when_renderer_is_none (line 9) | def test_no_warning_when_renderer_is_none(self, caplog: pytest.LogCapt...
    method test_no_warning_when_renderer_is_recommended (line 14) | def test_no_warning_when_renderer_is_recommended(self, caplog: pytest....
    method test_warning_when_renderer_not_recommended (line 19) | def test_warning_when_renderer_not_recommended(self, caplog: pytest.Lo...
    method test_no_warning_for_unknown_model (line 28) | def test_no_warning_for_unknown_model(self, caplog: pytest.LogCaptureF...
    method test_warning_for_thinking_renderer_on_thinking_model_alt (line 33) | def test_warning_for_thinking_renderer_on_thinking_model_alt(
    method test_warning_for_wrong_family (line 41) | def test_warning_for_wrong_family(self, caplog: pytest.LogCaptureFixtu...

FILE: tinker_cookbook/preference/comparison_policy_evaluator.py
  class ComparisonEvaluator (line 18) | class ComparisonEvaluator(SamplingClientEvaluator):
    method __init__ (line 23) | def __init__(
    method __call__ (line 43) | async def __call__(self, sampling_client: tinker.SamplingClient) -> di...

FILE: tinker_cookbook/preference/dpo_datasets.py
  class DPODatasetBuilderFromComparisons (line 16) | class DPODatasetBuilderFromComparisons(ChatDatasetBuilder):
    method __call__ (line 24) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:

FILE: tinker_cookbook/preference/preference_datasets.py
  class ComparisonDatasetBuilder (line 27) | class ComparisonDatasetBuilder:
    method get_train_and_test_datasets (line 35) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 39) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
    method get_labeled_comparisons (line 43) | def get_labeled_comparisons(
  class ChatDatasetBuilderFromComparisons (line 71) | class ChatDatasetBuilderFromComparisons(ChatDatasetBuilder):
    method comparison_renderer (line 81) | def comparison_renderer(self) -> ComparisonRenderer:
    method __call__ (line 84) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class ComparisonBuilderFromJsonl (line 126) | class ComparisonBuilderFromJsonl(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 132) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 157) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...

FILE: tinker_cookbook/preference/train_dpo.py
  class Config (line 28) | class Config:
  function create_dpo_clients (line 80) | def create_dpo_clients(
  function compute_dpo_loss (line 129) | def compute_dpo_loss(
  function do_update (line 177) | def do_update(
  function main (line 346) | def main(config: Config):
  function print_example (line 444) | def print_example(datum: tinker.Datum, tokenizer: Tokenizer, label: str ...

FILE: tinker_cookbook/preference/types.py
  class Comparison (line 24) | class Comparison:
    method swap (line 29) | def swap(self) -> "Comparison":
  class LabeledComparison (line 38) | class LabeledComparison:
    method swap (line 42) | def swap(self) -> "LabeledComparison":
  class ComparisonRenderer (line 49) | class ComparisonRenderer:
    method build_generation_prompt (line 50) | def build_generation_prompt(self, comparison: Comparison) -> types.Mod...
    method to_model_input_weights (line 53) | def to_model_input_weights(
    method tokenizer (line 59) | def tokenizer(self) -> Tokenizer:
  class ComparisonRendererFromChatRenderer (line 63) | class ComparisonRendererFromChatRenderer(ComparisonRenderer):
    method __init__ (line 65) | def __init__(self, convo_renderer: renderers.Renderer):
    method _comparison_to_convo (line 68) | def _comparison_to_convo(self, comparison: Comparison) -> list[rendere...
    method build_generation_prompt (line 78) | def build_generation_prompt(self, comparison: Comparison) -> types.Mod...
    method to_model_input_weights (line 81) | def to_model_input_weights(
    method tokenizer (line 99) | def tokenizer(self) -> Tokenizer:
  class PreferenceModel (line 103) | class PreferenceModel:
    method __call__ (line 104) | async def __call__(self, comparison: Comparison) -> float:
  class PreferenceModelBuilder (line 113) | class PreferenceModelBuilder:
    method __call__ (line 114) | def __call__(self) -> PreferenceModel:
  class PreferenceModelFromChatRenderer (line 118) | class PreferenceModelFromChatRenderer(PreferenceModel):
    method __init__ (line 119) | def __init__(self, convo_renderer: renderers.Renderer, sampling_client...
    method __call__ (line 123) | async def __call__(self, comparison: Comparison) -> float:
  class PreferenceModelBuilderFromChatRenderer (line 146) | class PreferenceModelBuilderFromChatRenderer(PreferenceModelBuilder):
    method __call__ (line 152) | def __call__(self) -> PreferenceModel:

FILE: tinker_cookbook/recipes/chat_sl/chat_datasets.py
  class Tulu3Builder (line 24) | class Tulu3Builder(ChatDatasetBuilder):
    method __call__ (line 25) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset]:
  class NoRobotsBuilder (line 54) | class NoRobotsBuilder(ChatDatasetBuilder):
    method __call__ (line 55) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset]:

FILE: tinker_cookbook/recipes/chat_sl/train.py
  class CLIConfig (line 21) | class CLIConfig:
  function get_dataset_builder (line 60) | def get_dataset_builder(
  function get_infrequent_evaluator_builders (line 91) | def get_infrequent_evaluator_builders(
  function cli_main (line 116) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/code_rl/code_env.py
  function _load_deepcoder_split (line 28) | def _load_deepcoder_split(split: Literal["train", "test"]) -> Dataset:
  function _ensure_dict (line 44) | def _ensure_dict(metadata: Any) -> dict[str, Any]:
  function _normalize_tests (line 56) | def _normalize_tests(raw_tests: Any, metadata: dict[str, Any]) -> list[d...
  function _build_question (line 91) | def _build_question(example: dict[str, Any]) -> str | None:
  function load_deepcoder_tasks (line 102) | def load_deepcoder_tasks(
  function _initial_messages (line 154) | def _initial_messages(
  class DeepcoderEnvGroupBuilder (line 171) | class DeepcoderEnvGroupBuilder(EnvGroupBuilder):
    method make_envs (line 184) | async def make_envs(self) -> Sequence[Env]:
    method logging_tags (line 212) | def logging_tags(self) -> list[str]:
  class DeepcoderDataset (line 216) | class DeepcoderDataset(RLDataset):
    method __init__ (line 219) | def __init__(
    method get_batch (line 227) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 232) | def __len__(self) -> int:
  class DeepcoderDatasetBuilder (line 237) | class DeepcoderDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 250) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:

FILE: tinker_cookbook/recipes/code_rl/code_grading.py
  function _get_sandboxfusion_client (line 23) | def _get_sandboxfusion_client() -> SandboxFusionClient:
  function _get_modal_pool (line 31) | def _get_modal_pool():
  function extract_code_from_model (line 44) | def extract_code_from_model(model_response: str) -> str | None:
  function postprocess_lcb_sample (line 52) | def postprocess_lcb_sample(sample: list[dict[str, Any]]) -> dict[str, str]:
  function _check_with_sandboxfusion (line 74) | async def _check_with_sandboxfusion(
  function _check_with_modal (line 94) | async def _check_with_modal(
  function sandbox_check_correctness (line 119) | async def sandbox_check_correctness(
  function taco_to_lcb_format (line 158) | def taco_to_lcb_format(tests: dict[str, Any]) -> list[dict[str, Any]]:

FILE: tinker_cookbook/recipes/code_rl/deepcoder_tool.py
  class DeepcoderTask (line 19) | class DeepcoderTask:
  class DeepcoderTool (line 27) | class DeepcoderTool:
    method __init__ (line 33) | def __init__(
    method check_solution (line 44) | async def check_solution(
  class DeepcoderReward (line 69) | class DeepcoderReward:
    method __call__ (line 85) | async def __call__(self, history: list[Message]) -> tuple[float, dict[...

FILE: tinker_cookbook/recipes/code_rl/lcb_utils.py
  function fetch_live_code_bench_system_prompt (line 809) | def fetch_live_code_bench_system_prompt(prompt: str, starter_code: str |...

FILE: tinker_cookbook/recipes/code_rl/train.py
  class CLIConfig (line 17) | class CLIConfig:
  function cli_main (line 64) | async def cli_main(cli_config: CLIConfig) -> None:

FILE: tinker_cookbook/recipes/distillation/harbor_multiturn.py
  function zero_reward (line 21) | async def zero_reward(history: list[Message]) -> tuple[float, dict[str, ...
  class HarborDistillationDatasetBuilder (line 27) | class HarborDistillationDatasetBuilder(HarborDatasetBuilder):
    method __call__ (line 30) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:

FILE: tinker_cookbook/recipes/distillation/harbor_multiturn_test.py
  class TestZeroReward (line 11) | class TestZeroReward:
    method test_zero_reward_returns_zero (line 12) | def test_zero_reward_returns_zero(self):
    method test_zero_reward_ignores_history_content (line 20) | def test_zero_reward_ignores_history_content(self):
    method test_env_group_builder_compute_group_rewards_returns_zeros (line 29) | def test_env_group_builder_compute_group_rewards_returns_zeros(self):

FILE: tinker_cookbook/recipes/distillation/off_policy_reasoning.py
  class OpenThoughts3Builder (line 44) | class OpenThoughts3Builder(ChatDatasetBuilder):
    method __call__ (line 50) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class CLIConfig (line 91) | class CLIConfig:
  function cli_main (line 128) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/distillation/on_policy_distillation.py
  class CLIConfig (line 49) | class CLIConfig:
  function cli_main (line 100) | async def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/distillation/on_policy_distillation_harbor_multi_turn.py
  class CLIConfig (line 56) | class CLIConfig:
  function cli_main (line 109) | async def cli_main(cli_config: CLIConfig, tasks: list[HarborTask]):

FILE: tinker_cookbook/recipes/distillation/on_policy_multi_teacher.py
  class CLIConfig (line 41) | class CLIConfig:
  function cli_main (line 94) | async def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/harbor_rl/eval.py
  class EvalConfig (line 44) | class EvalConfig:
  class TaskResult (line 62) | class TaskResult:
  function evaluate_task (line 72) | async def evaluate_task(
  function run_eval (line 169) | async def run_eval(

FILE: tinker_cookbook/recipes/harbor_rl/harbor_env.py
  function default_sandbox_factory (line 37) | async def default_sandbox_factory(image: modal.Image, timeout: int) -> S...
  class HarborTask (line 42) | class HarborTask:
  function load_harbor_tasks (line 51) | def load_harbor_tasks(dataset: str) -> list[HarborTask]:
  function _initial_messages (line 69) | def _initial_messages(
  class HarborEnvGroupBuilder (line 83) | class HarborEnvGroupBuilder(EnvGroupBuilder):
    method __init__ (line 86) | def __init__(
    method make_envs (line 113) | async def make_envs(self) -> Sequence[Env]:
    method cleanup (line 153) | async def cleanup(self) -> None:
    method logging_tags (line 161) | def logging_tags(self) -> list[str]:
  class HarborDataset (line 165) | class HarborDataset(RLDataset):
    method __init__ (line 168) | def __init__(
    method get_batch (line 176) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 181) | def __len__(self) -> int:
  class HarborDatasetBuilder (line 186) | class HarborDatasetBuilder(RLDatasetBuilder):
    method _make_env_group_builders (line 202) | def _make_env_group_builders(self, group_size: int) -> list[HarborEnvG...
    method __call__ (line 220) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:

FILE: tinker_cookbook/recipes/harbor_rl/harbor_tools.py
  class HarborBashTool (line 20) | class HarborBashTool:
    method __init__ (line 26) | def __init__(self, sandbox: SandboxInterface, command_timeout: int = 1...
    method bash (line 31) | async def bash(
  class HarborReward (line 49) | class HarborReward:
    method __call__ (line 62) | async def __call__(self, history: list[Message]) -> tuple[float, dict[...
    method _upload_tests (line 90) | async def _upload_tests(self) -> None:
    method _parse_reward (line 100) | async def _parse_reward(self) -> float:

FILE: tinker_cookbook/recipes/harbor_rl/harbor_tools_test.py
  class FakeSandbox (line 18) | class FakeSandbox:
    method __init__ (line 21) | def __init__(self) -> None:
    method sandbox_id (line 29) | def sandbox_id(self) -> str:
    method set_command_result (line 32) | def set_command_result(self, command: str, result: SandboxResult) -> N...
    method send_heartbeat (line 35) | async def send_heartbeat(self) -> None:
    method run_command (line 38) | async def run_command(
    method read_file (line 50) | async def read_file(
    method write_file (line 57) | async def write_file(
    method cleanup (line 65) | async def cleanup(self) -> None:
  class TestHarborReward (line 74) | class TestHarborReward:
    method _make_reward (line 75) | def _make_reward(self, tmp_path: Path, sandbox: FakeSandbox, **kwargs)...
    method test_reward_from_txt (line 78) | def test_reward_from_txt(self, tmp_path: Path) -> None:
    method test_reward_from_json (line 87) | def test_reward_from_json(self, tmp_path: Path) -> None:
    method test_no_reward_file (line 96) | def test_no_reward_file(self, tmp_path: Path) -> None:
    method test_zero_reward (line 104) | def test_zero_reward(self, tmp_path: Path) -> None:
    method test_grading_error (line 113) | def test_grading_error(self, tmp_path: Path) -> None:
    method test_upload_tests (line 133) | def test_upload_tests(self, tmp_path: Path) -> None:
  class TestHarborBashTool (line 158) | class TestHarborBashTool:
    method test_bash_tool_basic (line 159) | def test_bash_tool_basic(self) -> None:
    method test_bash_tool_truncation (line 175) | def test_bash_tool_truncation(self) -> None:
  class TestHarborEnvGroupBuilderPickle (line 199) | class TestHarborEnvGroupBuilderPickle:
    method _make_task (line 200) | def _make_task(self, tmp_path: Path) -> HarborTask:
    method test_pickle_roundtrip (line 208) | def test_pickle_roundtrip(self, tmp_path: Path) -> None:
    method test_pickle_with_custom_params (line 231) | def test_pickle_with_custom_params(self, tmp_path: Path) -> None:

FILE: tinker_cookbook/recipes/harbor_rl/train.py
  class CLIConfig (line 20) | class CLIConfig:
  function cli_main (line 61) | async def cli_main(

FILE: tinker_cookbook/recipes/math_rl/arithmetic_env.py
  class ArithmeticEnv (line 13) | class ArithmeticEnv(ProblemEnv):
    method __init__ (line 18) | def __init__(
    method get_question (line 29) | def get_question(self) -> str:
    method check_answer (line 32) | def check_answer(self, sample_str: str) -> bool:
    method check_format (line 40) | def check_format(self, sample_str: str) -> bool:
    method get_reference_answer (line 43) | def get_reference_answer(self) -> str:
    method standard_fewshot_prefix (line 47) | def standard_fewshot_prefix() -> list[renderers.Message]:
  class ArithmeticDataset (line 54) | class ArithmeticDataset(RLDataset):
    method __init__ (line 55) | def __init__(
    method get_batch (line 70) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method _make_env_group_builder (line 74) | def _make_env_group_builder(self, rng: np.random.RandomState) -> Probl...
    method __len__ (line 85) | def __len__(self) -> int:
  class ArithmeticDatasetBuilder (line 90) | class ArithmeticDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 98) | async def __call__(self) -> tuple[ArithmeticDataset, None]:

FILE: tinker_cookbook/recipes/math_rl/math_env.py
  class MathEnv (line 22) | class MathEnv(ProblemEnv):
    method __init__ (line 23) | def __init__(
    method question_suffix (line 39) | def question_suffix(cls) -> str:
    method get_question (line 42) | def get_question(self) -> str:
    method check_format (line 45) | def check_format(self, sample_str: str) -> bool:
    method check_answer (line 52) | def check_answer(self, sample_str: str) -> bool:
    method get_reference_answer (line 59) | def get_reference_answer(self) -> str:
    method standard_fewshot_prefix (line 63) | def standard_fewshot_prefix() -> list[renderers.Message]:
  function safe_grade (line 76) | def safe_grade(given_answer: str, ground_truth: str, grader: str = "symp...
  function extract_gsm8k_final_answer (line 92) | def extract_gsm8k_final_answer(text: str) -> str:
  function _get_hendrycks_math_test (line 113) | def _get_hendrycks_math_test() -> Dataset:
  function _get_hendrycks_math_train (line 118) | def _get_hendrycks_math_train() -> Dataset:
  class MathDataset (line 143) | class MathDataset(RLDataset):
    method __init__ (line 144) | def __init__(
    method get_batch (line 162) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 172) | def __len__(self) -> int:
    method _make_env_group_builder (line 175) | def _make_env_group_builder(
  class MathDatasetBuilder (line 192) | class MathDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 200) | async def __call__(self) -> tuple[MathDataset, MathDataset]:
  class PolarisDataset (line 221) | class PolarisDataset(MathDataset):
    method __init__ (line 222) | def __init__(
    method _make_env_group_builder (line 239) | def _make_env_group_builder(
  class PolarisDatasetBuilder (line 257) | class PolarisDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 264) | async def __call__(self) -> tuple[PolarisDataset, None]:
  class DeepMathDataset (line 274) | class DeepMathDataset(MathDataset):
    method __init__ (line 275) | def __init__(
    method _make_env_group_builder (line 290) | def _make_env_group_builder(
  class DeepMathDatasetBuilder (line 308) | class DeepMathDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 315) | async def __call__(self) -> tuple[DeepMathDataset, None]:
  class Gsm8kDataset (line 325) | class Gsm8kDataset(RLDataset):
    method __init__ (line 326) | def __init__(
    method question_suffix (line 346) | def question_suffix(cls) -> str:
    method get_batch (line 349) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 359) | def __len__(self) -> int:
    method _make_env_group_builder (line 362) | def _make_env_group_builder(
  class Gsm8kDatasetBuilder (line 380) | class Gsm8kDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 388) | async def __call__(self) -> tuple[Gsm8kDataset, Gsm8kDataset]:
  function get_math_dataset_builder (line 418) | def get_math_dataset_builder(

FILE: tinker_cookbook/recipes/math_rl/math_env_test.py
  function test_math_dataset_builder (line 6) | def test_math_dataset_builder():

FILE: tinker_cookbook/recipes/math_rl/math_grading.py
  function normalize_answer (line 36) | def normalize_answer(answer: str | None) -> str | None:
  function _fix_fracs (line 50) | def _fix_fracs(string: str) -> str:
  function _fix_a_slash_b (line 82) | def _fix_a_slash_b(string: str) -> str:
  function _remove_right_units (line 97) | def _remove_right_units(string: str) -> str:
  function _fix_sqrt (line 107) | def _fix_sqrt(string: str) -> str:
  function _strip_string (line 122) | def _strip_string(string: str) -> str:
  function extract_boxed (line 191) | def extract_boxed(text: str) -> str:
  function _sympy_parse (line 229) | def _sympy_parse(expr: str):
  function _parse_latex (line 241) | def _parse_latex(expr: str) -> str:
  function _is_float (line 259) | def _is_float(num: str) -> bool:
  function _is_int (line 267) | def _is_int(x: float) -> bool:
  function _is_frac (line 274) | def _is_frac(expr: str) -> bool:
  function _str_is_int (line 278) | def _str_is_int(x_str: str) -> bool:
  function _str_to_int (line 287) | def _str_to_int(x_str: str) -> int:
  function _inject_implicit_mixed_number (line 293) | def _inject_implicit_mixed_number(step: str):
  function _strip_properly_formatted_commas (line 303) | def _strip_properly_formatted_commas(expr: str):
  function _normalize (line 314) | def _normalize(expr: str) -> str:
  function count_unknown_letters_in_expr (line 385) | def count_unknown_letters_in_expr(expr: str):
  function should_allow_eval (line 392) | def should_allow_eval(expr: str):
  function are_equal_under_sympy (line 404) | def are_equal_under_sympy(ground_truth_normalized: str, given_normalized...
  function split_tuple (line 418) | def split_tuple(expr: str):
  function grade_answer (line 437) | def grade_answer(given_answer: str, ground_truth: str) -> bool:
  function grade_answer_math_verify (line 496) | def grade_answer_math_verify(given_answer: str, ground_truth: str) -> bool:
  class TimeoutException (line 522) | class TimeoutException(Exception):
  function run_with_timeout_signal (line 526) | def run_with_timeout_signal(

FILE: tinker_cookbook/recipes/math_rl/train.py
  class CLIConfig (line 21) | class CLIConfig:
  function get_dataset_builder (line 77) | def get_dataset_builder(
  function cli_main (line 107) | async def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/multiplayer_rl/guess_number/env.py
  class GuessNumberEnv (line 32) | class GuessNumberEnv(Env):
    method __init__ (line 33) | def __init__(self, gold_answer: int, renderer: Renderer):
    method stop_condition (line 40) | def stop_condition(self) -> StopCondition:
    method _obs (line 44) | def _obs(self) -> ModelInput:
    method initial_observation (line 49) | async def initial_observation(self) -> tuple[ModelInput, StopCondition]:
    method _get_user_turn (line 52) | def _get_user_turn(self, action_text: str) -> tuple[Message, float]:
    method step (line 71) | async def step(self, action: Action) -> StepResult:
  class GuessNumberEnvGroupBuilder (line 103) | class GuessNumberEnvGroupBuilder(EnvGroupBuilder):
    method make_envs (line 108) | async def make_envs(self) -> Sequence[Env]:
  class GuessNumberDataset (line 116) | class GuessNumberDataset(RLDataset):
    method get_batch (line 122) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 132) | def __len__(self) -> int:
  class GuessNumberDatasetBuilder (line 137) | class GuessNumberDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 146) | async def __call__(self) -> tuple[RLDataset, RLDataset]:
    method _get_train_and_test_numbers (line 164) | def _get_train_and_test_numbers(self) -> tuple[list[int], list[int]]:

FILE: tinker_cookbook/recipes/multiplayer_rl/guess_number/train.py
  class CLIConfig (line 12) | class CLIConfig:
  function build_config (line 30) | def build_config(cli_config: CLIConfig) -> train.Config:

FILE: tinker_cookbook/recipes/multiplayer_rl/text_arena/env.py
  class TwoPlayerCoordinator (line 38) | class TwoPlayerCoordinator:
    method __init__ (line 41) | def __init__(self, shared_env: ta.Env):
    method state (line 47) | def state(self) -> ta.State:
    method current_player_id (line 51) | def current_player_id(self) -> int:
    method game_done (line 56) | def game_done(self) -> bool:
    method rewards (line 61) | def rewards(self) -> dict | None:
    method wait_across_env (line 65) | async def wait_across_env(self, player_id: int) -> None:
    method make_move (line 72) | async def make_move(self, player_id: int, move: str) -> None:
  class TwoPlayerEnv (line 97) | class TwoPlayerEnv(Env):
    method __post_init__ (line 106) | def __post_init__(self):
    method stop_condition (line 112) | def stop_condition(self) -> StopCondition:
    method wait_for_turn (line 115) | async def wait_for_turn(self) -> None:
    method initial_observation (line 123) | async def initial_observation(self) -> tuple[Observation, StopCondition]:
    method opponent_step (line 128) | async def opponent_step(self) -> None:
    method step (line 141) | async def step(self, action: Action) -> StepResult:
    method get_done_step (line 164) | def get_done_step(self) -> StepResult:
    method compute_reward (line 173) | def compute_reward(self) -> float:
    method get_observation (line 180) | def get_observation(self) -> types.ModelInput:
  class TwoPlayerEnvGroupBuilder (line 191) | class TwoPlayerEnvGroupBuilder(EnvGroupBuilder):
    method make_envs (line 201) | async def make_envs(self) -> Sequence[Env]:
  class TwoPlayerTextArenaDataset (line 235) | class TwoPlayerTextArenaDataset(RLDataset):
    method __init__ (line 238) | def __init__(self, batch_size: int, builder: TwoPlayerEnvGroupBuilder,...
    method get_batch (line 246) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 253) | def __len__(self) -> int:
  class TwoPlayerTextArenaDatasetBuilder (line 258) | class TwoPlayerTextArenaDatasetBuilder(RLDatasetBuilder):
    method _construct_opponent_policy (line 267) | def _construct_opponent_policy(self, renderer: Renderer) -> TinkerMess...
    method __call__ (line 278) | async def __call__(self) -> tuple[TwoPlayerTextArenaDataset, TwoPlayer...

FILE: tinker_cookbook/recipes/multiplayer_rl/text_arena/train.py
  class CLIConfig (line 12) | class CLIConfig:
  function build_config (line 32) | def build_config(cli_config: CLIConfig) -> train.Config:

FILE: tinker_cookbook/recipes/multiplayer_rl/twenty_questions/env.py
  class TwentyQuestionsEnv (line 40) | class TwentyQuestionsEnv(Env):
    method __init__ (line 41) | def __init__(self, answerer: MessageCompleter, answer: str, renderer: ...
    method stop_condition (line 56) | def stop_condition(self) -> StopCondition:
    method _convo_for_player (line 59) | def _convo_for_player(self) -> list[Message]:
    method _get_obs (line 67) | def _get_obs(self) -> ModelInput:
    method _convo_for_answerer (line 71) | def _convo_for_answerer(self) -> list[Message]:
    method initial_observation (line 84) | async def initial_observation(self) -> tuple[ModelInput, StopCondition]:
    method _compute_reward (line 87) | def _compute_reward(self, content: str) -> float:
    method step (line 98) | async def step(self, action: Action) -> StepResult:
  function _load_words_from_file (line 145) | def _load_words_from_file() -> list[str]:
  class TwentyQuestionsEnvGroupBuilder (line 157) | class TwentyQuestionsEnvGroupBuilder(EnvGroupBuilder):
    method make_envs (line 163) | async def make_envs(self) -> Sequence[Env]:
  class TwentyQuestionsDataset (line 174) | class TwentyQuestionsDataset(RLDataset):
    method get_batch (line 181) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 192) | def __len__(self) -> int:
  class TwentyQuestionsDatasetBuilder (line 197) | class TwentyQuestionsDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 207) | async def __call__(self) -> tuple[RLDataset, RLDataset]:
    method _construct_answer_completer (line 231) | def _construct_answer_completer(self, service_client: tinker.ServiceCl...
    method _get_train_and_test_words (line 248) | def _get_train_and_test_words(self) -> tuple[list[str], list[str]]:
  function construct_minimal_20q_env (line 257) | def construct_minimal_20q_env(answer: str) -> TwentyQuestionsEnv:

FILE: tinker_cookbook/recipes/multiplayer_rl/twenty_questions/train.py
  class CLIConfig (line 14) | class CLIConfig:
  function build_config (line 33) | def build_config(cli_config: CLIConfig) -> train.Config:

FILE: tinker_cookbook/recipes/preference/datasets.py
  function _hhh_parse_conversation (line 24) | def _hhh_parse_conversation(text: str) -> list[renderers.Message]:
  function hhh_example_to_comparison (line 49) | def hhh_example_to_comparison(example: dict[str, str]) -> LabeledCompari...
  function _arena_parse_conversation (line 71) | def _arena_parse_conversation(conversation: list) -> list[renderers.Mess...
  class Tulu38BComparisonBuilder (line 105) | class Tulu38BComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 108) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 118) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class HHHComparisonBuilder (line 134) | class HHHComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 139) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 146) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class HelpSteer3ComparisonBuilder (line 151) | class HelpSteer3ComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 154) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 161) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class UltraFeedbackComparisonBuilder (line 183) | class UltraFeedbackComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 186) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 196) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class ArenaComparisonBuilder (line 212) | class ArenaComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 215) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 224) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class HelpSteer2ComparisonBuilder (line 261) | class HelpSteer2ComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 264) | def get_train_and_test_datasets(self) -> tuple[datasets.Dataset, datas...
    method example_to_labeled_comparison (line 304) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...

FILE: tinker_cookbook/recipes/preference/dpo/train.py
  class CLIConfig (line 24) | class CLIConfig:
  function get_dataset_builder (line 53) | def get_dataset_builder(
  function cli_main (line 84) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/preference/rlhf/rlhf_pipeline.py
  class CLIConfig (line 22) | class CLIConfig:
  function sft_stage (line 49) | def sft_stage(
  function train_rm (line 102) | def train_rm(
  function train_rl (line 157) | async def train_rl(
  function cli_main (line 243) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/preference/shorter/env.py
  class PreferenceModelShorter (line 28) | class PreferenceModelShorter(PreferenceModel):
    method _get_completion_length (line 33) | def _get_completion_length(self, completion: list[Message]) -> int:
    method __call__ (line 39) | async def __call__(self, comparison: Comparison) -> float:
  class ShorterComparisonBuilder (line 51) | class ShorterComparisonBuilder(ComparisonDatasetBuilder):
    method get_train_and_test_datasets (line 52) | def get_train_and_test_datasets(self) -> tuple[Dataset, Dataset | None]:
    method example_to_labeled_comparison (line 55) | def example_to_labeled_comparison(self, example: dict) -> LabeledCompa...
  class ShorterPreferenceModelBuilder (line 60) | class ShorterPreferenceModelBuilder(PreferenceModelBuilder):
    method __call__ (line 61) | def __call__(self) -> PreferenceModel:

FILE: tinker_cookbook/recipes/preference/shorter/train.py
  class CLIConfig (line 16) | class CLIConfig:
  function cli_main (line 37) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/prompt_distillation/create_data.py
  class Config (line 79) | class Config:
  function setup_clients (line 83) | def setup_clients():
  function create_data_async (line 99) | async def create_data_async(cfg: Config, sampling_client: Any, tokenizer...
  function main (line 156) | def main(cfg: Config):

FILE: tinker_cookbook/recipes/prompt_distillation/train.py
  class CLIConfig (line 20) | class CLIConfig:
  function cli_main (line 57) | def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/rl_basic.py
  function build_config_blueprint (line 11) | def build_config_blueprint() -> chz.Blueprint[train.Config]:
  function main (line 34) | def main(config: train.Config):

FILE: tinker_cookbook/recipes/rl_loop.py
  class Config (line 37) | class Config:
  function get_reward (line 50) | def get_reward(response: str, answer: str) -> float:
  function main (line 59) | def main(config: Config):

FILE: tinker_cookbook/recipes/rubric/data.py
  class Rubric (line 19) | class Rubric:
    method _convert_role (line 30) | def _convert_role(self, role: Role) -> str:
    method _flatten_convo (line 33) | def _flatten_convo(self, convo: Conversation) -> str:
    method get_grader_prompt (line 45) | def get_grader_prompt(self, convo: Conversation) -> Conversation:
    method extract_score (line 80) | def extract_score(self, response: str) -> float:
    method to_dict (line 92) | def to_dict(self) -> dict[str, str]:
    method to_json (line 99) | def to_json(self) -> str:
    method from_dict (line 103) | def from_dict(d: dict[str, str]) -> "Rubric":
    method from_json (line 111) | def from_json(json_str: str) -> "Rubric":
  class RubricBasedDatapoint (line 116) | class RubricBasedDatapoint:
    method to_json (line 125) | def to_json(self) -> str:
    method from_json (line 134) | def from_json(json_str: str) -> "RubricBasedDatapoint":
  class RubricDatapointListBuilder (line 143) | class RubricDatapointListBuilder:
    method __call__ (line 144) | def __call__(self) -> Sequence[RubricBasedDatapoint]:
  class RubricDatapointListBuilderFromJsonl (line 150) | class RubricDatapointListBuilderFromJsonl(RubricDatapointListBuilder):
    method __call__ (line 153) | def __call__(self) -> Sequence[RubricBasedDatapoint]:
  class PrometheusDatapointListBuilder (line 168) | class PrometheusDatapointListBuilder(RubricDatapointListBuilder):
    method __call__ (line 171) | def __call__(self) -> Sequence[RubricBasedDatapoint]:
    method build_rubric_datapoint (line 177) | def build_rubric_datapoint(self, item: dict[str, Any]) -> RubricBasedD...

FILE: tinker_cookbook/recipes/rubric/debug_env.py
  function get_addition_datapoint (line 13) | def get_addition_datapoint() -> RubricBasedDatapoint:
  function get_prometheus_datapoint (line 29) | def get_prometheus_datapoint() -> RubricBasedDatapoint:
  function main (line 37) | async def main(datapoint: RubricBasedDatapoint):

FILE: tinker_cookbook/recipes/rubric/env.py
  class RubricGradedEnv (line 33) | class RubricGradedEnv(Env):
    method __init__ (line 34) | def __init__(
    method rubric_items (line 53) | def rubric_items(self) -> Sequence[Rubric]:
    method convo (line 57) | def convo(self) -> Conversation:
    method stop_condition (line 61) | def stop_condition(self) -> StopCondition:
    method initial_observation (line 64) | async def initial_observation(self) -> tuple[ModelInput, StopCondition]:
    method _grade_with_rubric (line 67) | async def _grade_with_rubric(self, convo: Conversation, rubric: Rubric...
    method step (line 90) | async def step(self, action: Action) -> StepResult:
  class RubricGradedEnvGroupBuilder (line 173) | class RubricGradedEnvGroupBuilder(EnvGroupBuilder):
    method make_envs (line 179) | async def make_envs(self) -> Sequence[RubricGradedEnv]:
  class RubricGradedDataset (line 191) | class RubricGradedDataset(RLDataset):
    method get_batch (line 198) | def get_batch(self, index: int) -> Sequence[RubricGradedEnvGroupBuilder]:
    method __len__ (line 210) | def __len__(self) -> int:
  class RubricGradedDatasetBuilder (line 215) | class RubricGradedDatasetBuilder(RLDatasetBuilder):
    method _get_grader_llm (line 228) | def _get_grader_llm(self) -> MessageCompleter:
    method __call__ (line 238) | async def __call__(self) -> tuple[RubricGradedDataset, RubricGradedDat...

FILE: tinker_cookbook/recipes/rubric/generate_data.py
  function generate_one (line 7) | def generate_one(rng: random.Random) -> RubricBasedDatapoint:
  function generate_dataset (line 19) | def generate_dataset(

FILE: tinker_cookbook/recipes/rubric/prometheus_experimental.py
  class CLIConfig (line 15) | class CLIConfig:
  function get_dataset_builder (line 62) | def get_dataset_builder(
  function cli_main (line 82) | async def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/rubric/train.py
  class CLIConfig (line 15) | class CLIConfig:
  function get_dataset_builder (line 65) | def get_dataset_builder(
  function cli_main (line 91) | async def cli_main(cli_config: CLIConfig):

FILE: tinker_cookbook/recipes/search_tool/chroma_pickle_test.py
  class TestChromatoolPickle (line 14) | class TestChromatoolPickle:
    method test_pickle_excludes_clients (line 15) | def test_pickle_excludes_clients(self) -> None:

FILE: tinker_cookbook/recipes/search_tool/embedding.py
  function get_gemini_client (line 20) | def get_gemini_client(
  function get_gemini_embedding (line 52) | async def get_gemini_embedding(

FILE: tinker_cookbook/recipes/search_tool/offline_eval.py
  class CLIConfig (line 31) | class CLIConfig:
  class EvaluationResult (line 45) | class EvaluationResult(TypedDict):
  function split_data_by_source (line 51) | def split_data_by_source(data: list[SearchR1Datum]) -> dict[str, list[Se...
  function sample_k_from_each_source (line 59) | def sample_k_from_each_source(
  function evaluate_single_item (line 79) | async def evaluate_single_item(
  function evaluate_one_dataset (line 109) | async def evaluate_one_dataset(data: list[SearchR1Datum], config: CLICon...
  function cli_main (line 158) | async def cli_main(config: CLIConfig):

FILE: tinker_cookbook/recipes/search_tool/search_env.py
  class SearchR1Datum (line 44) | class SearchR1Datum(TypedDict):
  function process_single_row (line 50) | def process_single_row(row_series: pd.Series) -> SearchR1Datum:
  function download_search_r1_dataset (line 90) | def download_search_r1_dataset(split: Literal["train", "test"]) -> list[...
  function _initial_messages (line 111) | def _initial_messages(
  class SearchEnvGroupBuilder (line 125) | class SearchEnvGroupBuilder(EnvGroupBuilder):
    method __init__ (line 128) | def __init__(
    method make_envs (line 148) | async def make_envs(self) -> Sequence[Env]:
    method logging_tags (line 173) | def logging_tags(self) -> list[str]:
  class SearchRLDataset (line 177) | class SearchRLDataset(RLDataset):
    method __init__ (line 180) | def __init__(
    method get_batch (line 188) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 193) | def __len__(self) -> int:
  class SearchR1DatasetBuilder (line 198) | class SearchR1DatasetBuilder(RLDatasetBuilder):
    method __call__ (line 216) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:

FILE: tinker_cookbook/recipes/search_tool/tools.py
  function normalize_answer (line 27) | def normalize_answer(s: str) -> str:
  class EmbeddingConfig (line 54) | class EmbeddingConfig:
  class RetrievalConfig (line 61) | class RetrievalConfig:
  class ChromaTool (line 66) | class ChromaTool:
    method __init__ (line 75) | def __init__(
    method __getstate__ (line 97) | def __getstate__(self) -> dict:
    method _ensure_clients (line 104) | async def _ensure_clients(self) -> tuple[AsyncClientAPI, genai.Client]:
    method build (line 122) | async def build(
    method _get_embeddings_with_retry (line 164) | async def _get_embeddings_with_retry(
    method _query_chroma_with_retry (line 176) | async def _query_chroma_with_retry(
    method search (line 201) | async def search(
  class TextAnswerReward (line 227) | class TextAnswerReward:
    method __call__ (line 236) | async def __call__(self, history: list[Message]) -> tuple[float, dict[...
    method _extract_answer (line 257) | def _extract_answer(self, text: str) -> str | None:
    method _check_answer (line 265) | def _check_answer(self, text: str) -> bool:

FILE: tinker_cookbook/recipes/search_tool/train.py
  class CLIConfig (line 19) | class CLIConfig:
  function cli_main (line 60) | async def cli_main(cli_config: CLIConfig) -> None:

FILE: tinker_cookbook/recipes/sl_basic.py
  function build_config_blueprint (line 14) | def build_config_blueprint() -> chz.Blueprint[train.Config]:
  function main (line 45) | def main(config: train.Config):

FILE: tinker_cookbook/recipes/sl_loop.py
  class Config (line 24) | class Config:
  function main (line 37) | def main(config: Config):

FILE: tinker_cookbook/recipes/verifiers_rl/evaluate.py
  function log_results (line 18) | def log_results(
  function evaluate (line 64) | async def evaluate(
  class CLIConfig (line 136) | class CLIConfig:
  function cli_main (line 148) | async def cli_main(cfg: CLIConfig):

FILE: tinker_cookbook/recipes/verifiers_rl/tinker_openai.py
  class TinkerAsyncOpenAIClient (line 29) | class TinkerAsyncOpenAIClient(AsyncOpenAI):
    method __init__ (line 34) | def __init__(
    method set_sampling_client (line 45) | def set_sampling_client(self, sampling_client: tinker.SamplingClient) ...
    method chat (line 49) | def chat(self) -> OpenAIAsyncChat:
    method completions (line 53) | def completions(self) -> OpenAIAsyncCompletions:
  class TinkerChatCompletions (line 57) | class TinkerChatCompletions(OpenAIAsyncChatCompletions):
    method __init__ (line 58) | def __init__(self, parent: TinkerAsyncOpenAIClient) -> None:
    method create (line 62) | async def create(
    method create (line 67) | async def create(
    method create (line 72) | async def create(self, *args: Any, stream: bool, **kwargs: Any) -> Cha...
    method create (line 74) | async def create(self, *args: Any, **kwargs: Any) -> ChatCompletion | ...
  class TinkerCompletions (line 160) | class TinkerCompletions(OpenAIAsyncCompletions):
    method __init__ (line 161) | def __init__(self, parent: TinkerAsyncOpenAIClient) -> None:
    method create (line 165) | async def create(
    method create (line 170) | async def create(
    method create (line 175) | async def create(
    method create (line 179) | async def create(self, *args: Any, **kwargs: Any) -> Completion | Asyn...
  class TinkerAsyncChat (line 236) | class TinkerAsyncChat(OpenAIAsyncChat):
    method __init__ (line 237) | def __init__(self, parent: TinkerAsyncOpenAIClient) -> None:
    method completions (line 241) | def completions(self) -> OpenAIAsyncChatCompletions:
  class TinkerAsyncCompletionStream (line 245) | class TinkerAsyncCompletionStream(AsyncStream[Completion]):
    method __init__ (line 246) | def __init__(self, final: Completion) -> None:
    method __aiter__ (line 249) | def __aiter__(self):
    method __anext__ (line 253) | async def __anext__(self) -> Completion:
    method __await__ (line 256) | def __await__(self):
    method get_final_response (line 262) | async def get_final_response(self) -> Completion:

FILE: tinker_cookbook/recipes/verifiers_rl/train.py
  class CLIConfig (line 28) | class CLIConfig:
  function cli_main (line 61) | async def cli_main(cli_config: CLIConfig, env: Any | None):

FILE: tinker_cookbook/recipes/verifiers_rl/verifiers_env.py
  function set_vf_env (line 23) | def set_vf_env(env: vf.Environment) -> None:
  function get_vf_env (line 28) | def get_vf_env() -> vf.Environment | None:
  function convert_states_to_trajectory_group (line 33) | def convert_states_to_trajectory_group(states: list[vf.State]) -> Trajec...
  class VerifiersRLDataset (line 80) | class VerifiersRLDataset(RLDataset):
    method __init__ (line 81) | def __init__(
    method __len__ (line 91) | def __len__(self) -> int:
    method get_batch (line 94) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
  class VerifiersRLDatasetBuilder (line 114) | class VerifiersRLDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 121) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:
  class VerifiersEnvGroupBuilder (line 140) | class VerifiersEnvGroupBuilder(EnvGroupBuilder):
    method __init__ (line 151) | def __init__(
    method __getstate__ (line 167) | def __getstate__(self) -> dict:
    method __setstate__ (line 173) | def __setstate__(self, state: dict) -> None:
    method get_rollout_inputs (line 190) | def get_rollout_inputs(self, group_size: int) -> list[vf.RolloutInput]:
    method make_envs (line 202) | async def make_envs(self):
    method logging_tags (line 205) | def logging_tags(self) -> list[str]:

FILE: tinker_cookbook/recipes/verifiers_rl/verifiers_pickle_test.py
  class TestVerifiersEnvGroupBuilderPickle (line 14) | class TestVerifiersEnvGroupBuilderPickle:
    method test_pickle_excludes_vf_env (line 15) | def test_pickle_excludes_vf_env(self) -> None:

FILE: tinker_cookbook/recipes/vlm_classifier/data.py
  class ClassifierDatasetConfig (line 36) | class ClassifierDatasetConfig:
  class ClassifierDataset (line 64) | class ClassifierDataset(SupervisedDataset):
    method __init__ (line 65) | def __init__(self, config: ClassifierDatasetConfig):
    method get_shuffled_indices (line 90) | def get_shuffled_indices(self, seed: int = 0) -> list[int]:
    method _sample_per_class (line 108) | def _sample_per_class(self, dataset: datasets.Dataset) -> datasets.Dat...
    method get_class_name (line 135) | def get_class_name(self, label: str) -> str:
    method build_supervised_example (line 142) | def build_supervised_example(
    method get_batch (line 191) | def get_batch(self, index: int) -> list[tinker.Datum]:
    method __len__ (line 207) | def __len__(self) -> int:
    method set_epoch (line 214) | def set_epoch(self, seed: int = 0):
  class Caltech101DatasetBuilder (line 223) | class Caltech101DatasetBuilder(SupervisedDatasetBuilder):
    method __call__ (line 245) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class Flowers102DatasetBuilder (line 289) | class Flowers102DatasetBuilder(SupervisedDatasetBuilder):
    method __call__ (line 311) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class OxfordPetsDatasetBuilder (line 355) | class OxfordPetsDatasetBuilder(SupervisedDatasetBuilder):
    method __call__ (line 377) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class StanfordCarsDatasetBuilder (line 421) | class StanfordCarsDatasetBuilder(SupervisedDatasetBuilder):
    method __call__ (line 443) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  function get_dataset_builder (line 494) | def get_dataset_builder(

FILE: tinker_cookbook/recipes/vlm_classifier/eval.py
  class ClassifierEvaluatorConfig (line 25) | class ClassifierEvaluatorConfig:
  class ClassifierOutput (line 50) | class ClassifierOutput(TypedDict):
  class ClassifierEvaluator (line 58) | class ClassifierEvaluator(SamplingClientEvaluator):
    method __init__ (line 63) | def __init__(
    method get_class_name (line 89) | def get_class_name(self, label: str) -> str:
    method build_generation_prompt (line 96) | def build_generation_prompt(
    method generate_output (line 132) | async def generate_output(
    method get_metrics_for_output (line 153) | def get_metrics_for_output(
    method __call__ (line 166) | async def __call__(self, sampling_client: tinker.SamplingClient) -> di...
  class Caltech101EvaluatorBuilder (line 237) | class Caltech101EvaluatorBuilder:
    method __call__ (line 255) | def __call__(self) -> ClassifierEvaluator:
  class Flowers102EvaluatorBuilder (line 276) | class Flowers102EvaluatorBuilder:
    method __call__ (line 294) | def __call__(self) -> ClassifierEvaluator:
  class OxfordPetsEvaluatorBuilder (line 315) | class OxfordPetsEvaluatorBuilder:
    method __call__ (line 333) | def __call__(self) -> ClassifierEvaluator:
  class StanfordCarsEvaluatorBuilder (line 354) | class StanfordCarsEvaluatorBuilder:
    method __call__ (line 372) | def __call__(self) -> ClassifierEvaluator:
  function get_evaluator_builder (line 400) | def get_evaluator_builder(
  class EvalConfig (line 430) | class EvalConfig:
  function run_eval (line 455) | def run_eval(eval_config: EvalConfig):

FILE: tinker_cookbook/recipes/vlm_classifier/eval_sweep.py
  function get_checkpoint_at_step (line 42) | def get_checkpoint_at_step(
  function parse_hyperparams_from_experiment_name (line 67) | def parse_hyperparams_from_experiment_name(experiment_name: str) -> dict...
  class EvalConfig (line 110) | class EvalConfig:
  function evaluate_experiment (line 140) | async def evaluate_experiment(
  function evaluate_sweep (line 202) | async def evaluate_sweep(
  function run_eval_sweep (line 235) | def run_eval_sweep(eval_config: EvalConfig):

FILE: tinker_cookbook/recipes/vlm_classifier/sweep.py
  class ExperimentConfig (line 30) | class ExperimentConfig:
  function run_experiment (line 82) | def run_experiment(experiment_config: ExperimentConfig):
  class SweepConfig (line 154) | class SweepConfig:
  function run_sweep (line 191) | def run_sweep(sweep_config: SweepConfig):

FILE: tinker_cookbook/recipes/vlm_classifier/train.py
  class ExperimentConfig (line 29) | class ExperimentConfig:
  function run_experiment (line 85) | def run_experiment(experiment_config: ExperimentConfig):

FILE: tinker_cookbook/renderers/__init__.py
  function register_renderer (line 52) | def register_renderer(
  function get_registered_renderer_names (line 71) | def get_registered_renderer_names() -> list[str]:
  function is_renderer_registered (line 76) | def is_renderer_registered(name: str) -> bool:
  function unregister_renderer (line 81) | def unregister_renderer(name: str) -> bool:
  function get_renderer (line 96) | def get_renderer(

FILE: tinker_cookbook/renderers/base.py
  class StrictBase (line 40) | class StrictBase(pydantic.BaseModel):
    method __str__ (line 47) | def __str__(self) -> str:
  class ToolCall (line 51) | class ToolCall(StrictBase):
    class FunctionBody (line 68) | class FunctionBody(pydantic.BaseModel):
  class UnparsedToolCall (line 91) | class UnparsedToolCall(StrictBase):
  class TextPart (line 113) | class TextPart(TypedDict):
  class ImagePart (line 121) | class ImagePart(TypedDict):
  class ThinkingPart (line 130) | class ThinkingPart(TypedDict):
  class StreamingMessageHeader (line 146) | class StreamingMessageHeader:
  class StreamingTextDelta (line 157) | class StreamingTextDelta:
  class StreamingThinkingDelta (line 170) | class StreamingThinkingDelta:
  class Utf8TokenDecoder (line 194) | class Utf8TokenDecoder:
    method __post_init__ (line 214) | def __post_init__(self) -> None:
    method _is_valid_decode (line 223) | def _is_valid_decode(self, text: str) -> bool:
    method decode (line 232) | def decode(self, tokens: list[int]) -> str | None:
    method flush (line 274) | def flush(self) -> str:
    method reset (line 291) | def reset(self) -> None:
    method has_pending (line 295) | def has_pending(self) -> bool:
  function _longest_matching_suffix_prefix (line 305) | def _longest_matching_suffix_prefix(text: str, tag: str) -> int:
  class StreamingParser (line 337) | class StreamingParser:
    method __post_init__ (line 371) | def __post_init__(self) -> None:
    method feed (line 380) | def feed(self, token: int) -> Iterator["MessageDelta"]:
    method _emit_deltas (line 403) | def _emit_deltas(self) -> Iterator["MessageDelta"]:
    method _emit_remaining (line 417) | def _emit_remaining(self) -> Iterator["MessageDelta"]:
    method finish (line 430) | def finish(self) -> Iterator["MessageDelta"]:
    method reset (line 448) | def reset(self) -> None:
  class ReasoningStreamingParser (line 465) | class ReasoningStreamingParser(StreamingParser):
    method __post_init__ (line 479) | def __post_init__(self) -> None:
    method _emit_deltas (line 483) | def _emit_deltas(self) -> Iterator["MessageDelta"]:
    method _emit_remaining (line 549) | def _emit_remaining(self) -> Iterator["MessageDelta"]:
    method reset (line 564) | def reset(self) -> None:
  class Message (line 578) | class Message(TypedDict):
  class RenderContext (line 621) | class RenderContext:
  class ToolSpec (line 648) | class ToolSpec(TypedDict):
  function ensure_text (line 677) | def ensure_text(content: Content) -> str:
  function ensure_list (line 692) | def ensure_list(content: Content) -> list[ContentPart]:
  function content_to_jsonable (line 699) | def content_to_jsonable(content: Content) -> str | list[dict[str, Any]]:
  function message_to_jsonable (line 721) | def message_to_jsonable(message: Message) -> dict[str, Any]:
  function remove_thinking (line 742) | def remove_thinking(parts: list[ContentPart]) -> list[ContentPart]:
  function get_text_content (line 747) | def get_text_content(message: Message) -> str:
  function format_content_as_string (line 759) | def format_content_as_string(content: Content, separator: str = "\n") ->...
  function _parse_tool_call_json (line 791) | def _parse_tool_call_json(tool_call_str: str, raw_text: str) -> ToolCall...
  function parse_content_blocks (line 823) | def parse_content_blocks(
  function parse_think_blocks (line 891) | def parse_think_blocks(content: str) -> list[ContentPart] | None:
  function _tool_call_payload (line 931) | def _tool_call_payload(tool_call: ToolCall) -> dict[str, object]:
  class RenderedMessage (line 941) | class RenderedMessage:
  class TrainOnWhat (line 985) | class TrainOnWhat(StrEnum):
  function _unpickle_renderer (line 995) | def _unpickle_renderer(
  class Renderer (line 1015) | class Renderer(ABC):
    method __init__ (line 1045) | def __init__(self, tokenizer: Tokenizer):
    method __reduce__ (line 1048) | def __reduce__(self) -> tuple:
    method has_extension_property (line 1069) | def has_extension_property(self) -> bool:
    method _bos_tokens (line 1088) | def _bos_tokens(self) -> list[int]:
    method get_stop_sequences (line 1092) | def get_stop_sequences(self) -> list[str] | list[int]:
    method render_message (line 1097) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method parse_response (line 1117) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _normalize_response_tokens (line 1138) | def _normalize_response_tokens(self, response: list[int]) -> list[int]:
    method _end_message_token (line 1150) | def _end_message_token(self) -> int:
    method _parse_response_for_streaming (line 1159) | def _parse_response_for_streaming(self, response: list[int]) -> tuple[...
    method parse_response_streaming (line 1172) | def parse_response_streaming(self, response: list[int]) -> Iterator[Me...
    method to_openai_message (line 1205) | def to_openai_message(self, message: Message) -> dict:
    method create_conversation_prefix_with_tools (line 1277) | def create_conversation_prefix_with_tools(
    method _get_generation_suffix (line 1298) | def _get_generation_suffix(self, role: Role, ctx: RenderContext) -> li...
    method build_generation_prompt (line 1322) | def build_generation_prompt(
    method build_supervised_examples (line 1378) | def build_supervised_examples(
    method build_supervised_example (line 1400) | def build_supervised_example(
  function tokens_weights_from_strings_weights (line 1526) | def tokens_weights_from_strings_weights(
  function parse_response_for_stop_token (line 1540) | def parse_response_for_stop_token(
  class ImageProcessorProtocol (line 1567) | class ImageProcessorProtocol(Protocol):
    method get_number_of_image_patches (line 1571) | def get_number_of_image_patches(
    method get_resize_config (line 1576) | def get_resize_config(self, image_data: dict[str, Any]) -> dict[str, A...
  function image_to_chunk (line 1580) | def image_to_chunk(

FILE: tinker_cookbook/renderers/deepseek_v3.py
  class _DeepSeekV3BaseRenderer (line 32) | class _DeepSeekV3BaseRenderer(Renderer):
    method __init__ (line 49) | def __init__(
    method has_extension_property (line 69) | def has_extension_property(self) -> bool:
    method render_message (line 80) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _get_special_token (line 193) | def _get_special_token(self, name: str) -> int:
    method _bos_tokens (line 201) | def _bos_tokens(self) -> list[int]:
    method _end_message_token (line 205) | def _end_message_token(self) -> int:
    method get_stop_sequences (line 208) | def get_stop_sequences(self) -> list[int]:
    method _parse_deepseek_tool_calls (line 211) | def _parse_deepseek_tool_calls(
    method _parse_response_content (line 254) | def _parse_response_content(
    method parse_response (line 297) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _parse_response_for_streaming (line 301) | def _parse_response_for_streaming(self, response: list[int]) -> tuple[...
    method to_openai_message (line 313) | def to_openai_message(self, message: Message) -> dict:
    method create_conversation_prefix_with_tools (line 360) | def create_conversation_prefix_with_tools(
  class DeepSeekV3ThinkingRenderer (line 404) | class DeepSeekV3ThinkingRenderer(_DeepSeekV3BaseRenderer):
    method render_message (line 420) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method build_generation_prompt (line 448) | def build_generation_prompt(
    method _normalize_response_tokens (line 467) | def _normalize_response_tokens(self, response: list[int]) -> list[int]:
  class DeepSeekV3DisableThinkingRenderer (line 483) | class DeepSeekV3DisableThinkingRenderer(_DeepSeekV3BaseRenderer):
    method has_extension_property (line 497) | def has_extension_property(self) -> bool:
    method render_message (line 501) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...

FILE: tinker_cookbook/renderers/deepseek_v3_test.py
  function test_deepseek_parse_response_extracts_thinking (line 29) | def test_deepseek_parse_response_extracts_thinking():
  function test_deepseek_parse_response_no_thinking_returns_string (line 53) | def test_deepseek_parse_response_no_thinking_returns_string():
  function test_deepseek_parse_response_multiple_think_blocks (line 68) | def test_deepseek_parse_response_multiple_think_blocks():
  function test_deepseek_thinking_preserved_with_tool_calls (line 94) | def test_deepseek_thinking_preserved_with_tool_calls():
  function test_deepseek_post_tool_formatting (line 135) | def test_deepseek_post_tool_formatting():
  function _is_message (line 197) | def _is_message(obj) -> bool:
  function _assert_deepseek_streaming_matches_batch (line 201) | def _assert_deepseek_streaming_matches_batch(renderer, response_str: str):
  class TestDeepSeekStreamingBatchEquivalence (line 220) | class TestDeepSeekStreamingBatchEquivalence:
    method thinking_renderer (line 224) | def thinking_renderer(self):
    method non_thinking_renderer (line 229) | def non_thinking_renderer(self):
    method test_simple_text (line 233) | def test_simple_text(self, thinking_renderer):
    method test_thinking_then_text (line 238) | def test_thinking_then_text(self, thinking_renderer):
    method test_multiple_think_blocks (line 244) | def test_multiple_think_blocks(self, thinking_renderer):
    method test_empty_response (line 250) | def test_empty_response(self, thinking_renderer):
    method test_non_thinking_renderer (line 253) | def test_non_thinking_renderer(self, non_thinking_renderer):
    method test_no_end_token (line 258) | def test_no_end_token(self, thinking_renderer):

FILE: tinker_cookbook/renderers/gpt_oss.py
  function _json_type_to_typescript (line 35) | def _json_type_to_typescript(schema: dict) -> str:
  function _json_schema_to_typescript (line 68) | def _json_schema_to_typescript(schema: dict) -> str:
  function _schema_comments (line 85) | def _schema_comments(schema: dict) -> list[str]:
  function _format_parameters_block (line 103) | def _format_parameters_block(schema: dict) -> str:
  function _format_tool_definition (line 132) | def _format_tool_definition(tool: ToolSpec) -> str:
  class GptOssRenderer (line 144) | class GptOssRenderer(Renderer):
    method __init__ (line 193) | def __init__(
    method render_message (line 211) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _render_tool_calls (line 285) | def _render_tool_calls(self, tool_calls: list[ToolCall]) -> str:
    method _render_tool_result_message (line 305) | def _render_tool_result_message(self, message: Message, ctx: RenderCon...
    method _get_system_message (line 351) | def _get_system_message(self) -> Message | None:
    method _bos_tokens (line 370) | def _bos_tokens(self) -> list[int]:
    method _warn_if_user_system_message (line 374) | def _warn_if_user_system_message(self, messages: list[Message]) -> None:
    method build_generation_prompt (line 385) | def build_generation_prompt(
    method build_supervised_example (line 395) | def build_supervised_example(
    method _return_token (line 408) | def _return_token(self) -> int:
    method _call_token (line 414) | def _call_token(self) -> int:
    method get_stop_sequences (line 419) | def get_stop_sequences(self) -> list[int]:
    method parse_response (line 424) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method to_openai_message (line 460) | def to_openai_message(self, message: Message) -> dict:
    method _parse_harmony_output (line 508) | def _parse_harmony_output(
    method _parse_harmony_messages (line 549) | def _parse_harmony_messages(self, content: str) -> list[dict[str, str ...
    method create_conversation_prefix_with_tools (line 613) | def create_conversation_prefix_with_tools(

FILE: tinker_cookbook/renderers/gpt_oss_test.py
  function test_gptoss_parse_response_extracts_thinking (line 15) | def test_gptoss_parse_response_extracts_thinking():
  function test_gptoss_parse_response_multiple_analysis (line 39) | def test_gptoss_parse_response_multiple_analysis():
  function test_gptoss_parse_response_final_only (line 59) | def test_gptoss_parse_response_final_only():
  function test_gptoss_parse_response_no_channels (line 76) | def test_gptoss_parse_response_no_channels():
  function test_gptoss_parse_response_tool_call (line 92) | def test_gptoss_parse_response_tool_call():
  function test_gptoss_parse_response_tool_call_with_analysis (line 110) | def test_gptoss_parse_response_tool_call_with_analysis():
  function test_gptoss_parse_response_invalid_tool_call_json (line 135) | def test_gptoss_parse_response_invalid_tool_call_json():
  function test_gptoss_parse_response_tool_call_recipient_before_channel (line 151) | def test_gptoss_parse_response_tool_call_recipient_before_channel():
  function test_gptoss_parse_response_commentary_preamble (line 167) | def test_gptoss_parse_response_commentary_preamble():

FILE: tinker_cookbook/renderers/kimi_k2.py
  function _split_tool_calls_section (line 41) | def _split_tool_calls_section(content: str) -> tuple[str, str | None]:
  function _extract_tool_name (line 49) | def _extract_tool_name(tool_id: str) -> str:
  function _parse_tool_calls_section (line 58) | def _parse_tool_calls_section(
  class KimiK2Renderer (line 86) | class KimiK2Renderer(Renderer):
    method __init__ (line 105) | def __init__(self, tokenizer: Tokenizer, strip_thinking_from_history: ...
    method _ensure_system_message (line 109) | def _ensure_system_message(self, messages: list[Message]) -> list[Mess...
    method render_message (line 139) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _encode_multipart_content (line 229) | def _encode_multipart_content(self, content: list[ContentPart]) -> lis...
    method build_generation_prompt (line 234) | def build_generation_prompt(
    method build_supervised_examples (line 274) | def build_supervised_examples(
    method build_supervised_example (line 321) | def build_supervised_example(
    method _end_message_token (line 410) | def _end_message_token(self) -> int:
    method get_stop_sequences (line 415) | def get_stop_sequences(self) -> list[int]:
    method parse_response (line 418) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _parse_response_for_streaming (line 443) | def _parse_response_for_streaming(self, response: list[int]) -> tuple[...
    method to_openai_message (line 470) | def to_openai_message(self, message: Message) -> dict:
    method create_conversation_prefix_with_tools (line 517) | def create_conversation_prefix_with_tools(

FILE: tinker_cookbook/renderers/kimi_k25.py
  class KimiK25Renderer (line 22) | class KimiK25Renderer(KimiK2Renderer):
    method __init__ (line 43) | def __init__(
    method _encode_multipart_content (line 54) | def _encode_multipart_content(self, content: list[ContentPart]) -> lis...
    method _image_prefix (line 81) | def _image_prefix(self) -> str:
    method _image_suffix (line 85) | def _image_suffix(self) -> str:
    method build_generation_prompt (line 88) | def build_generation_prompt(
    method _normalize_response_tokens (line 97) | def _normalize_response_tokens(self, response: list[int]) -> list[int]:
    method create_conversation_prefix_with_tools (line 107) | def create_conversation_prefix_with_tools(
  class KimiK25DisableThinkingRenderer (line 133) | class KimiK25DisableThinkingRenderer(KimiK25Renderer):
    method build_generation_prompt (line 145) | def build_generation_prompt(

FILE: tinker_cookbook/renderers/kimi_k25_test.py
  function kimi_tokenizer (line 41) | def kimi_tokenizer():
  function kimi_renderer (line 52) | def kimi_renderer(kimi_tokenizer):
  function kimi_renderer_disable_thinking (line 58) | def kimi_renderer_disable_thinking(kimi_tokenizer):
  function hf_generation_prompt_length (line 64) | def hf_generation_prompt_length(kimi_tokenizer):
  function get_hf_tokens (line 84) | def get_hf_tokens(
  function get_tool_spec (line 111) | def get_tool_spec() -> ToolSpec:
  function get_basic_conversation_for_generation (line 139) | def get_basic_conversation_for_generation() -> list[Message]:
  function get_basic_conversation_for_supervised (line 149) | def get_basic_conversation_for_supervised() -> list[Message]:
  function get_tool_call_conversation_for_generation (line 158) | def get_tool_call_conversation_for_generation() -> tuple[list[Message], ...
  function get_tool_call_conversation_for_supervised (line 189) | def get_tool_call_conversation_for_supervised() -> tuple[list[Message], ...
  function get_multi_tool_call_conversation_for_generation (line 227) | def get_multi_tool_call_conversation_for_generation() -> tuple[list[Mess...
  function get_multi_step_tool_conversation_for_generation (line 273) | def get_multi_step_tool_conversation_for_generation() -> tuple[list[Mess...
  function test_typescript_tool_declaration_basic (line 332) | def test_typescript_tool_declaration_basic():
  function test_typescript_tool_declaration_with_enum (line 346) | def test_typescript_tool_declaration_with_enum():
  function test_typescript_tool_declaration_description (line 355) | def test_typescript_tool_declaration_description():
  function test_typescript_tool_declaration_empty (line 363) | def test_typescript_tool_declaration_empty():
  function test_typescript_tool_declaration_multiple_tools (line 369) | def test_typescript_tool_declaration_multiple_tools():
  function test_kimi_k25_generation_prompt_has_think_prefill (line 410) | def test_kimi_k25_generation_prompt_has_think_prefill(kimi_tokenizer, ki...
  function test_kimi_k25_disable_thinking_generation_prompt (line 419) | def test_kimi_k25_disable_thinking_generation_prompt(
  function test_kimi_k25_custom_prefill_overrides_default (line 430) | def test_kimi_k25_custom_prefill_overrides_default(kimi_tokenizer, kimi_...
  function test_kimi_k25_basic_conversation_matches_hf (line 446) | def test_kimi_k25_basic_conversation_matches_hf(
  function test_kimi_k25_tool_call_conversation_matches_hf (line 464) | def test_kimi_k25_tool_call_conversation_matches_hf(
  function test_kimi_k25_multi_tool_calls_matches_hf (line 494) | def test_kimi_k25_multi_tool_calls_matches_hf(
  function test_kimi_k25_multi_step_tool_calls_matches_hf (line 524) | def test_kimi_k25_multi_step_tool_calls_matches_hf(
  function test_kimi_k25_tool_declaration_is_typescript (line 559) | def test_kimi_k25_tool_declaration_is_typescript(kimi_renderer):
  function test_kimi_k25_tool_declaration_matches_hf (line 578) | def test_kimi_k25_tool_declaration_matches_hf(
  function test_kimi_k25_thinking_preserved_in_suffix (line 625) | def test_kimi_k25_thinking_preserved_in_suffix(build_mode: str, kimi_tok...
  function test_kimi_k25_thinking_stripped_in_history (line 662) | def test_kimi_k25_thinking_stripped_in_history(build_mode: str, kimi_tok...
  function test_kimi_k25_eot_parsing (line 708) | def test_kimi_k25_eot_parsing(kimi_tokenizer, kimi_renderer):
  function test_kimi_k25_parse_response_restores_prefilled_think_tag (line 729) | def test_kimi_k25_parse_response_restores_prefilled_think_tag(kimi_token...
  function test_kimi_k25_parse_response_streaming_restores_prefilled_think_tag (line 744) | def test_kimi_k25_parse_response_streaming_restores_prefilled_think_tag(
  function test_kimi_k25_image_content (line 775) | def test_kimi_k25_image_content(image_dimensions_and_expected_tokens: tu...

FILE: tinker_cookbook/renderers/kimi_k2_5_tool_declaration_ts.py
  class _SchemaRegistry (line 19) | class _SchemaRegistry:
    method __init__ (line 22) | def __init__(self):
    method register_definitions (line 26) | def register_definitions(self, defs: dict[str, Any]):
    method resolve_ref (line 33) | def resolve_ref(self, ref: str) -> dict[str, Any]:
  function _format_description (line 47) | def _format_description(description: str, indent: str = "") -> str:
  class _BaseType (line 51) | class _BaseType:
    method __init__ (line 55) | def __init__(
    method to_typescript_style (line 64) | def to_typescript_style(self, indent: str = "") -> str:
    method format_docstring (line 67) | def format_docstring(self, indent: str) -> str:
  class _ParameterTypeScalar (line 80) | class _ParameterTypeScalar(_BaseType):
    method __init__ (line 83) | def __init__(self, type: str, extra_props: dict[str, Any] | None = None):
    method to_typescript_style (line 94) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeObject (line 101) | class _ParameterTypeObject(_BaseType):
    method __init__ (line 105) | def __init__(self, json_schema_object: dict[str, Any], registry: _Sche...
    method to_typescript_style (line 139) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeArray (line 177) | class _ParameterTypeArray(_BaseType):
    method __init__ (line 180) | def __init__(self, json_schema_object: dict[str, Any], registry: _Sche...
    method to_typescript_style (line 187) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeEnum (line 204) | class _ParameterTypeEnum(_BaseType):
    method __init__ (line 208) | def __init__(self, json_schema_object: dict[str, Any]):
    method to_typescript_style (line 237) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeAnyOf (line 241) | class _ParameterTypeAnyOf(_BaseType):
    method __init__ (line 244) | def __init__(
    method to_typescript_style (line 252) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeUnion (line 256) | class _ParameterTypeUnion(_BaseType):
    method __init__ (line 259) | def __init__(self, json_schema_object: dict[str, Any]):
    method to_typescript_style (line 273) | def to_typescript_style(self, indent: str = "") -> str:
  class _ParameterTypeRef (line 277) | class _ParameterTypeRef(_BaseType):
    method __init__ (line 281) | def __init__(self, json_schema_object: dict[str, Any], registry: _Sche...
    method to_typescript_style (line 293) | def to_typescript_style(self, indent: str = "") -> str:
  class _Parameter (line 309) | class _Parameter:
    method parse_extended (line 321) | def parse_extended(cls, attributes: dict[str, Any]) -> "_Parameter":
    method to_typescript_style (line 332) | def to_typescript_style(self, indent: str = "") -> str:
  function _parse_parameter_type (line 349) | def _parse_parameter_type(
  function _openai_function_to_typescript_style (line 384) | def _openai_function_to_typescript_style(
  function encode_tools_to_typescript_style (line 436) | def encode_tools_to_typescript_style(

FILE: tinker_cookbook/renderers/kimi_k2_test.py
  function _is_message (line 23) | def _is_message(obj) -> TypeGuard[Message]:
  function _get_basic_4turn (line 33) | def _get_basic_4turn() -> list[Message]:
  function _get_tool_call_conversation (line 42) | def _get_tool_call_conversation() -> list[Message]:
  function test_kimi_streaming_simple_text (line 73) | def test_kimi_streaming_simple_text():
  function test_kimi_streaming_with_thinking (line 93) | def test_kimi_streaming_with_thinking():
  function test_kimi_streaming_matches_batch (line 116) | def test_kimi_streaming_matches_batch():
  function test_kimi_streaming_content_index_increments (line 135) | def test_kimi_streaming_content_index_increments():
  function test_kimi_streaming_multiple_think_blocks (line 152) | def test_kimi_streaming_multiple_think_blocks():
  function test_kimi_streaming_empty_response (line 177) | def test_kimi_streaming_empty_response():
  function test_kimi_streaming_no_unnecessary_buffering (line 191) | def test_kimi_streaming_no_unnecessary_buffering():
  function test_kimi_streaming_with_emoji (line 205) | def test_kimi_streaming_with_emoji():
  function _assert_streaming_matches_batch (line 245) | def _assert_streaming_matches_batch(renderer, response_str: str):
  class TestKimiK2StreamingBatchEquivalence (line 285) | class TestKimiK2StreamingBatchEquivalence:
    method renderer (line 289) | def renderer(self):
    method test_simple_text (line 293) | def test_simple_text(self, renderer):
    method test_thinking_then_text (line 296) | def test_thinking_then_text(self, renderer):
    method test_empty_thinking (line 303) | def test_empty_thinking(self, renderer):
    method test_long_thinking (line 306) | def test_long_thinking(self, renderer):
    method test_multiple_think_blocks (line 316) | def test_multiple_think_blocks(self, renderer):
    method test_empty_response (line 322) | def test_empty_response(self, renderer):
    method test_whitespace_only (line 325) | def test_whitespace_only(self, renderer):
    method test_special_characters (line 328) | def test_special_characters(self, renderer):
    method test_emoji (line 334) | def test_emoji(self, renderer):
    method test_code_blocks (line 339) | def test_code_blocks(self, renderer):
    method test_html_like_content (line 346) | def test_html_like_content(self, renderer):
    method test_tool_call_with_thinking (line 352) | def test_tool_call_with_thinking(self, renderer):
    method test_tool_call_without_thinking (line 363) | def test_tool_call_without_thinking(self, renderer):
    method test_text_then_tool_call (line 373) | def test_text_then_tool_call(self, renderer):
    method test_multiple_tool_calls (line 384) | def test_multiple_tool_calls(self, renderer):
    method test_multiline_thinking (line 398) | def test_multiline_thinking(self, renderer):
    method test_no_end_token (line 404) | def test_no_end_token(self, renderer):
    method test_content_index_ordering (line 422) | def test_content_index_ordering(self, renderer):
  function test_kimi_k2_thinking_stripped_when_no_suffix_messages (line 448) | def test_kimi_k2_thinking_stripped_when_no_suffix_messages():
  function test_kimi_k2_thinking_preserved_in_suffix_after_last_non_tool_call (line 499) | def test_kimi_k2_thinking_preserved_in_suffix_after_last_non_tool_call():
  function test_kimi_k2_thinking_preserved_when_no_non_tool_call_assistant (line 553) | def test_kimi_k2_thinking_preserved_when_no_non_tool_call_assistant():
  function test_kimi_k2_build_supervised_examples_last_assistant_matches (line 592) | def test_kimi_k2_build_supervised_examples_last_assistant_matches():
  function test_kimi_k2_build_supervised_examples_all_assistant_matches (line 610) | def test_kimi_k2_build_supervised_examples_all_assistant_matches():
  function test_kimi_k2_build_supervised_examples_warns_on_non_assistant_mode (line 652) | def test_kimi_k2_build_supervised_examples_warns_on_non_assistant_mode():
  function test_kimi_k2_build_supervised_examples_all_assistant_matches_with_tool_calls (line 683) | def test_kimi_k2_build_supervised_examples_all_assistant_matches_with_to...

FILE: tinker_cookbook/renderers/kimi_k2_tool_declaration_test.py
  function test_tool_declaration_message_order (line 50) | def test_tool_declaration_message_order(tools, expected_order):
  function test_tool_declaration_no_duplicate_system (line 64) | def test_tool_declaration_no_duplicate_system():
  function test_tool_json_keys_are_sorted (line 87) | def test_tool_json_keys_are_sorted():
  function test_tool_declaration_matches_hf_tokens (line 135) | def test_tool_declaration_matches_hf_tokens():
  function test_tool_declaration_string_matches_hf (line 192) | def test_tool_declaration_string_matches_hf():
  function test_empty_tools_list (line 228) | def test_empty_tools_list():
  function test_custom_system_prompt_with_tools (line 240) | def test_custom_system_prompt_with_tools():
  function _find_first_diff_index (line 259) | def _find_first_diff_index(list1, list2):

FILE: tinker_cookbook/renderers/llama3.py
  class Llama3Renderer (line 15) | class Llama3Renderer(Renderer):
    method has_extension_property (line 38) | def has_extension_property(self) -> bool:
    method render_message (line 42) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _bos_tokens (line 58) | def _bos_tokens(self) -> list[int]:
    method _end_message_token (line 62) | def _end_message_token(self) -> int:
    method get_stop_sequences (line 66) | def get_stop_sequences(self) -> list[int]:
    method parse_response (line 69) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:

FILE: tinker_cookbook/renderers/nemotron3.py
  function _render_extra_keys (line 49) | def _render_extra_keys(obj: Mapping[str, object], handled_keys: set[str]...
  function _format_nemotron3_tool_declaration (line 65) | def _format_nemotron3_tool_declaration(tool: ToolSpec) -> str:
  class Nemotron3Renderer (line 102) | class Nemotron3Renderer(Qwen3_5Renderer):
    method _normalize_messages (line 122) | def _normalize_messages(self, messages: list[Message]) -> list[Message]:
    method build_generation_prompt (line 133) | def build_generation_prompt(self, messages: list[Message], *args: obje...
    method build_supervised_example (line 136) | def build_supervised_example(self, messages: list[Message], *args: obj...
    method _assistant_header_suffix (line 139) | def _assistant_header_suffix(self, message: Message, ctx: RenderContex...
    method render_message (line 177) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _format_thinking_text (line 191) | def _format_thinking_text(self, thinking: str) -> str:
    method _wrap_qwen_tool_response_chunks (line 195) | def _wrap_qwen_tool_response_chunks(
    method _format_tool_calls_chunks (line 209) | def _format_tool_calls_chunks(self, message: Message) -> list[ImagePar...
    method _postprocess_parsed_message (line 233) | def _postprocess_parsed_message(self, message: Message) -> None:
    method create_conversation_prefix_with_tools (line 256) | def create_conversation_prefix_with_tools(
  class Nemotron3DisableThinkingRenderer (line 317) | class Nemotron3DisableThinkingRenderer(Nemotron3Renderer):
    method _get_generation_suffix (line 325) | def _get_generation_suffix(self, role: Role, ctx: RenderContext) -> li...

FILE: tinker_cookbook/renderers/nemotron3_test.py
  function nemotron_tokenizer (line 36) | def nemotron_tokenizer():
  function nemotron_renderer (line 41) | def nemotron_renderer(nemotron_tokenizer):
  function nemotron_renderer_disable_thinking (line 46) | def nemotron_renderer_disable_thinking(nemotron_tokenizer):
  function _hf_generation_tokens (line 50) | def _hf_generation_tokens(tokenizer, hf_messages, tools=None, enable_thi...
  function _hf_supervised_tokens (line 62) | def _hf_supervised_tokens(tokenizer, hf_messages, tools=None, enable_thi...
  function get_basic_conversation_for_generation (line 79) | def get_basic_conversation_for_generation() -> list[Message]:
  function get_basic_conversation_for_supervised (line 89) | def get_basic_conversation_for_supervised() -> list[Message]:
  function get_thinking_conversation_for_supervised (line 98) | def get_thinking_conversation_for_supervised() -> list[Message]:
  function get_multiturn_thinking_conversation (line 113) | def get_multiturn_thinking_conversation() -> list[Message]:
  function get_tool_spec (line 136) | def get_tool_spec() -> ToolSpec:
  function get_rich_tool_spec (line 158) | def get_rich_tool_spec() -> ToolSpec:
  function get_tool_call_conversation_for_generation (line 189) | def get_tool_call_conversation_for_generation() -> tuple[list[Message], ...
  function get_historical_tool_call_with_nonempty_text_conversation (line 218) | def get_historical_tool_call_with_nonempty_text_conversation() -> tuple[
  function get_tool_call_conversation_for_supervised (line 270) | def get_tool_call_conversation_for_supervised() -> tuple[list[Message], ...
  function test_tool_declaration_xml_format (line 311) | def test_tool_declaration_xml_format():
  function test_tool_declaration_not_json_per_line (line 331) | def test_tool_declaration_not_json_per_line():
  function test_tool_declaration_minimal_tool (line 339) | def test_tool_declaration_minimal_tool():
  function test_tool_declaration_extra_schema_keys_match_hf (line 349) | def test_tool_declaration_extra_schema_keys_match_hf(nemotron_tokenizer,...
  function test_create_conversation_prefix_system_before_tools (line 377) | def test_create_conversation_prefix_system_before_tools(nemotron_renderer):
  function test_create_conversation_prefix_without_system_prompt (line 393) | def test_create_conversation_prefix_without_system_prompt(nemotron_rende...
  function test_create_conversation_prefix_xml_tool_format (line 402) | def test_create_conversation_prefix_xml_tool_format(nemotron_renderer):
  function test_create_conversation_prefix_no_tools (line 415) | def test_create_conversation_prefix_no_tools(nemotron_renderer):
  function test_generation_prompt_ends_with_think (line 428) | def test_generation_prompt_ends_with_think(nemotron_tokenizer, nemotron_...
  function test_disable_thinking_generation_prompt (line 437) | def test_disable_thinking_generation_prompt(nemotron_tokenizer, nemotron...
  function test_custom_prefill_overrides_think (line 446) | def test_custom_prefill_overrides_think(nemotron_tokenizer, nemotron_ren...
  function test_basic_conversation_generation_matches_hf (line 461) | def test_basic_conversation_generation_matches_hf(nemotron_tokenizer, ne...
  function test_disable_thinking_generation_matches_hf (line 474) | def test_disable_thinking_generation_matches_hf(
  function test_basic_conversation_supervised_matches_hf (line 501) | def test_basic_conversation_supervised_matches_hf(nemotron_tokenizer, ne...
  function test_thinking_conversation_supervised_matches_hf (line 514) | def test_thinking_conversation_supervised_matches_hf(nemotron_tokenizer,...
  function test_multiturn_thinking_supervised_matches_hf (line 527) | def test_multiturn_thinking_supervised_matches_hf(nemotron_tokenizer, ne...
  function test_think_block_added_to_all_assistant_history (line 544) | def test_think_block_added_to_all_assistant_history(nemotron_tokenizer, ...
  function test_tool_declaration_matches_hf (line 560) | def test_tool_declaration_matches_hf(build_mode: str, nemotron_tokenizer...
  function test_tool_call_conversation_generation_matches_hf (line 592) | def test_tool_call_conversation_generation_matches_hf(nemotron_tokenizer...
  function test_tool_call_conversation_supervised_matches_hf (line 614) | def test_tool_call_conversation_supervised_matches_hf(nemotron_tokenizer...
  function test_historical_tool_call_with_nonempty_text_generation_matches_hf (line 636) | def test_historical_tool_call_with_nonempty_text_generation_matches_hf(
  function test_historical_tool_call_with_nonempty_text_supervised_matches_hf (line 666) | def test_historical_tool_call_with_nonempty_text_supervised_matches_hf(
  function test_parse_response_plain_text (line 695) | def test_parse_response_plain_text(nemotron_tokenizer, nemotron_renderer):
  function test_parse_response_with_thinking (line 705) | def test_parse_response_with_thinking(nemotron_tokenizer, nemotron_rende...
  function test_parse_response_for_streaming_with_thinking (line 723) | def test_parse_response_for_streaming_with_thinking(nemotron_tokenizer, ...
  function test_parse_response_tool_call (line 752) | def test_parse_response_tool_call(nemotron_tokenizer, nemotron_renderer):
  function test_renderer_types (line 780) | def test_renderer_types(nemotron_renderer, nemotron_renderer_disable_thi...
  function test_renderer_is_not_qwen35 (line 785) | def test_renderer_is_not_qwen35(nemotron_renderer):

FILE: tinker_cookbook/renderers/parsing_test.py
  function test_parse_content_blocks_no_special_tags (line 36) | def test_parse_content_blocks_no_special_tags():
  function test_parse_content_blocks_single_think_block (line 42) | def test_parse_content_blocks_single_think_block():
  function test_parse_content_blocks_multiple_think_blocks (line 56) | def test_parse_content_blocks_multiple_think_blocks():
  function test_parse_content_blocks_empty_blocks_omitted (line 71) | def test_parse_content_blocks_empty_blocks_omitted():
  function test_parse_content_blocks_whitespace_handling (line 83) | def test_parse_content_blocks_whitespace_handling():
  function test_parse_content_blocks_tool_call_only (line 96) | def test_parse_content_blocks_tool_call_only():
  function test_parse_content_blocks_interleaved (line 110) | def test_parse_content_blocks_interleaved():
  function test_parse_content_blocks_invalid_tool_call (line 129) | def test_parse_content_blocks_invalid_tool_call():
  function test_format_content_as_string_roundtrip (line 146) | def test_format_content_as_string_roundtrip():
  function test_longest_matching_suffix_prefix (line 166) | def test_longest_matching_suffix_prefix():
  function test_utf8_decoder_non_monotonic_decodability (line 200) | def test_utf8_decoder_non_monotonic_decodability():
  function test_utf8_decoder_with_real_tokenizer_ascii (line 244) | def test_utf8_decoder_with_real_tokenizer_ascii():
  function test_utf8_decoder_handles_replacement_chars (line 278) | def test_utf8_decoder_handles_replacement_chars():
  function test_utf8_decoder_mixed_ascii_and_emoji (line 317) | def test_utf8_decoder_mixed_ascii_and_emoji():
  function test_thinking_generation_parse_correspondence (line 364) | def test_thinking_generation_parse_correspondence(model_name, renderer_c...

FILE: tinker_cookbook/renderers/qwen3.py
  function _merge_consecutive_text_parts (line 38) | def _merge_consecutive_text_parts(
  class Qwen3Renderer (line 60) | class Qwen3Renderer(Renderer):
    method __init__ (line 88) | def __init__(self, tokenizer: Tokenizer, strip_thinking_from_history: ...
    method has_extension_property (line 107) | def has_extension_property(self) -> bool:
    method _get_qwen_role_for_message (line 118) | def _get_qwen_role_for_message(self, message: Message) -> str:
    method _wrap_qwen_tool_response (line 128) | def _wrap_qwen_tool_response(self, content: str) -> str:
    method render_message (line 132) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method _end_message_token (line 191) | def _end_message_token(self) -> int:
    method get_stop_sequences (line 196) | def get_stop_sequences(self) -> list[int]:
    method parse_response (line 199) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _parse_response_for_streaming (line 233) | def _parse_response_for_streaming(self, response: list[int]) -> tuple[...
    method to_openai_message (line 269) | def to_openai_message(self, message: Message) -> dict:
    method _to_openai_tool_arguments (line 321) | def _to_openai_tool_arguments(self, arguments: str) -> str | dict:
    method create_conversation_prefix_with_tools (line 329) | def create_conversation_prefix_with_tools(
  class Qwen3DisableThinkingRenderer (line 375) | class Qwen3DisableThinkingRenderer(Qwen3Renderer):
    method render_message (line 388) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
  class Qwen3InstructRenderer (line 414) | class Qwen3InstructRenderer(Qwen3Renderer):
    method has_extension_property (line 424) | def has_extension_property(self) -> bool:
  class Qwen3VLRenderer (line 433) | class Qwen3VLRenderer(Qwen3Renderer):
    method __init__ (line 453) | def __init__(
    method _format_thinking_text (line 465) | def _format_thinking_text(self, thinking: str) -> str:
    method _assistant_header_suffix (line 469) | def _assistant_header_suffix(self, message: Message, ctx: RenderContex...
    method _preprocess_message_parts (line 473) | def _preprocess_message_parts(
    method _wrap_qwen_tool_response_chunks (line 512) | def _wrap_qwen_tool_response_chunks(
    method _format_tool_calls_chunks (line 522) | def _format_tool_calls_chunks(self, message: Message) -> list[ImagePar...
    method render_message (line 539) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
  class Qwen3VLInstructRenderer (line 589) | class Qwen3VLInstructRenderer(Qwen3VLRenderer):

FILE: tinker_cookbook/renderers/qwen3_5.py
  class Qwen3_5Renderer (line 45) | class Qwen3_5Renderer(Qwen3VLRenderer):
    method _get_generation_suffix (line 57) | def _get_generation_suffix(self, role: Role, ctx: RenderContext) -> li...
    method _assistant_header_suffix (line 67) | def _assistant_header_suffix(self, message: Message, ctx: RenderContex...
    method _format_thinking_text (line 81) | def _format_thinking_text(self, thinking: str) -> str:
    method _to_openai_tool_arguments (line 85) | def _to_openai_tool_arguments(self, arguments: str) -> str | dict:
    method _parse_qwen3_5_tool_call_xml (line 89) | def _parse_qwen3_5_tool_call_xml(self, raw_text: str) -> ToolCall | Un...
    method _normalize_response_tokens (line 135) | def _normalize_response_tokens(self, response: list[int]) -> list[int]:
    method _postprocess_parsed_message (line 154) | def _postprocess_parsed_message(self, message: Message) -> None:
    method parse_response (line 199) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _parse_response_for_streaming (line 208) | def _parse_response_for_streaming(self, response: list[int]) -> tuple[...
    method _format_tool_call_xml (line 214) | def _format_tool_call_xml(self, tool_call: ToolCall) -> str:
    method _format_tool_calls_chunks (line 227) | def _format_tool_calls_chunks(self, message: Message) -> list[ImagePar...
    method create_conversation_prefix_with_tools (line 238) | def create_conversation_prefix_with_tools(
  class Qwen3_5DisableThinkingRenderer (line 291) | class Qwen3_5DisableThinkingRenderer(Qwen3_5Renderer):
    method _get_generation_suffix (line 300) | def _get_generation_suffix(self, role: Role, ctx: RenderContext) -> li...

FILE: tinker_cookbook/renderers/qwen3_test.py
  function _is_message (line 28) | def _is_message(obj) -> TypeGuard[Message]:
  function test_qwen3_parse_response_extracts_thinking (line 38) | def test_qwen3_parse_response_extracts_thinking():
  function test_qwen3_parse_response_multiple_think_blocks (line 64) | def test_qwen3_parse_response_multiple_think_blocks():
  function test_qwen3_parse_response_no_thinking_returns_string (line 85) | def test_qwen3_parse_response_no_thinking_returns_string():
  function test_qwen3_parse_response_with_tool_calls (line 101) | def test_qwen3_parse_response_with_tool_calls():
  function test_qwen3_parse_response_tool_call_only (line 128) | def test_qwen3_parse_response_tool_call_only():
  function _get_basic_2turn (line 156) | def _get_basic_2turn() -> list[Message]:
  function _get_basic_3turn (line 163) | def _get_basic_3turn() -> list[Message]:
  function _get_basic_4turn (line 171) | def _get_basic_4turn() -> list[Message]:
  function test_qwen3_disable_thinking_supervised (line 180) | def test_qwen3_disable_thinking_supervised():
  function test_qwen3_disable_thinking_generation (line 213) | def test_qwen3_disable_thinking_generation():
  function test_qwen3_disable_thinking_4turn (line 238) | def test_qwen3_disable_thinking_4turn():
  function test_qwen3_streaming_simple_text (line 271) | def test_qwen3_streaming_simple_text():
  function test_qwen3_streaming_with_thinking (line 291) | def test_qwen3_streaming_with_thinking():
  function test_qwen3_streaming_matches_batch (line 314) | def test_qwen3_streaming_matches_batch():
  function test_qwen3_streaming_content_index_increments (line 333) | def test_qwen3_streaming_content_index_increments():
  function test_qwen3_streaming_empty_response (line 350) | def test_qwen3_streaming_empty_response():
  function test_qwen3_streaming_multiple_think_blocks (line 364) | def test_qwen3_streaming_multiple_think_blocks():
  function test_qwen3_streaming_no_unnecessary_buffering (line 389) | def test_qwen3_streaming_no_unnecessary_buffering():
  function test_qwen3_streaming_with_emoji (line 403) | def test_qwen3_streaming_with_emoji():
  function test_qwen3_streaming_supported_by_text_variants (line 427) | def test_qwen3_streaming_supported_by_text_variants(renderer_name):
  function _assert_streaming_matches_batch (line 446) | def _assert_streaming_matches_batch(renderer, response_str: str):
  class TestQwen3StreamingBatchEquivalence (line 486) | class TestQwen3StreamingBatchEquivalence:
    method renderer (line 490) | def renderer(self):
    method test_simple_text (line 494) | def test_simple_text(self, renderer):
    method test_thinking_then_text (line 497) | def test_thinking_then_text(self, renderer):
    method test_empty_thinking (line 504) | def test_empty_thinking(self, renderer):
    method test_long_thinking (line 507) | def test_long_thinking(self, renderer):
    method test_multiple_think_blocks (line 517) | def test_multiple_think_blocks(self, renderer):
    method test_empty_response (line 523) | def test_empty_response(self, renderer):
    method test_whitespace_only (line 526) | def test_whitespace_only(self, renderer):
    method test_special_characters (line 529) | def test_special_characters(self, renderer):
    method test_emoji (line 535) | def test_emoji(self, renderer):
    method test_code_blocks (line 540) | def test_code_blocks(self, renderer):
    method test_html_like_content (line 547) | def test_html_like_content(self, renderer):
    method test_tool_call_with_thinking (line 553) | def test_tool_call_with_thinking(self, renderer):
    method test_tool_call_without_thinking (line 561) | def test_tool_call_without_thinking(self, renderer):
    method test_multiline_thinking (line 568) | def test_multiline_thinking(self, renderer):
    method test_no_end_token (line 574) | def test_no_end_token(self, renderer):
    method test_content_index_ordering (line 591) | def test_content_index_ordering(self, renderer):
  function qwen3_5_tokenizer (line 622) | def qwen3_5_tokenizer():
  function qwen3_5_renderer (line 627) | def qwen3_5_renderer(qwen3_5_tokenizer):
  function test_qwen3_5_parse_response_restores_prefilled_think_tag (line 631) | def test_qwen3_5_parse_response_restores_prefilled_think_tag(qwen3_5_tok...
  function test_qwen3_5_parse_response_streaming_restores_prefilled_think_tag (line 649) | def test_qwen3_5_parse_response_streaming_restores_prefilled_think_tag(
  function test_qwen3_5_streaming_matches_batch_with_prefilled_think (line 675) | def test_qwen3_5_streaming_matches_batch_with_prefilled_think(qwen3_5_to...
  function test_qwen3_5_normalize_noop_when_think_present (line 693) | def test_qwen3_5_normalize_noop_when_think_present(qwen3_5_tokenizer, qw...

FILE: tinker_cookbook/renderers/qwen3_tool_declaration_test.py
  function _hf_tools_for_model (line 33) | def _hf_tools_for_model(
  function _hf_template_kwargs (line 42) | def _hf_template_kwargs(renderer_name: str) -> dict:
  function test_qwen3_tool_json_formatting (line 50) | def test_qwen3_tool_json_formatting(model_name: str, renderer_name: str):
  function test_qwen3_tool_declaration_matches_hf_tokens (line 112) | def test_qwen3_tool_declaration_matches_hf_tokens(model_name: str, rende...
  function test_qwen3_tool_declaration_string_matches_hf (line 163) | def test_qwen3_tool_declaration_string_matches_hf(model_name: str, rende...
  function test_qwen3_multiple_tools (line 208) | def test_qwen3_multiple_tools(model_name: str, renderer_name: str):
  function test_qwen3_empty_tools_list (line 253) | def test_qwen3_empty_tools_list(model_name: str, renderer_name: str):
  function test_qwen3_custom_system_prompt_with_tools (line 268) | def test_qwen3_custom_system_prompt_with_tools(model_name: str, renderer...
  function test_qwen3_preserves_insertion_order (line 313) | def test_qwen3_preserves_insertion_order(model_name: str, renderer_name:...

FILE: tinker_cookbook/renderers/renderer_pickle_test.py
  function renderer_and_model (line 31) | def renderer_and_model(request: pytest.FixtureRequest) -> tuple[str, str]:
  class TestRendererPickle (line 35) | class TestRendererPickle:
    method test_pickle_roundtrip (line 36) | def test_pickle_roundtrip(self, renderer_and_model: tuple[str, str]) -...
    method test_pickle_metadata_set (line 49) | def test_pickle_metadata_set(self, renderer_and_model: tuple[str, str]...
    method test_pickle_without_metadata_raises (line 59) | def test_pickle_without_metadata_raises(self) -> None:
    method test_pickle_with_manual_metadata (line 70) | def test_pickle_with_manual_metadata(self) -> None:
    method test_pickle_without_metadata_vl_renderer (line 85) | def test_pickle_without_metadata_vl_renderer(self) -> None:
    method test_pickle_with_explicit_model_name (line 97) | def test_pickle_with_explicit_model_name(self) -> None:
    method test_pickle_custom_registered_renderer (line 109) | def test_pickle_custom_registered_renderer(self) -> None:
  class TestMessageCompleterPickle (line 130) | class TestMessageCompleterPickle:
    method test_tinker_message_completer_pickle_structure (line 131) | def test_tinker_message_completer_pickle_structure(self) -> None:

FILE: tinker_cookbook/renderers/renderers_test.py
  function _rand_str (line 71) | def _rand_str(rng: random.Random, length: int = 8) -> str:
  function _rand_tool_call (line 75) | def _rand_tool_call(rng: random.Random) -> ToolCall:
  function generate_conversation (line 85) | def generate_conversation(
  function get_basic_3turn_conversation (line 147) | def get_basic_3turn_conversation() -> list[Message]:
  function get_basic_2turn_conversation (line 159) | def get_basic_2turn_conversation() -> list[Message]:
  function get_system_message_3turn_conversation (line 170) | def get_system_message_3turn_conversation() -> list[Message]:
  function get_system_message_2turn_conversation (line 186) | def get_system_message_2turn_conversation() -> list[Message]:
  function get_basic_4turn_conversation (line 201) | def get_basic_4turn_conversation() -> list[Message]:
  function get_tool_call_conversation (line 214) | def get_tool_call_conversation() -> list[Message]:
  function get_tool_call_gen_conversation (line 245) | def get_tool_call_gen_conversation() -> list[Message]:
  function get_4turn_thinking_conversation (line 253) | def get_4turn_thinking_conversation() -> list[Message]:
  function get_thinking_with_whitespace_conversation (line 278) | def get_thinking_with_whitespace_conversation() -> list[Message]:
  function get_multiturn_thinking_conversation (line 292) | def get_multiturn_thinking_conversation() -> list[Message]:
  function get_multiturn_tool_conversation (line 317) | def get_multiturn_tool_conversation() -> list[Message]:
  function get_multiturn_thinking_and_tool_conversation (line 368) | def get_multiturn_thinking_and_tool_conversation() -> list[Message]:
  function _conversation_has_tools (line 518) | def _conversation_has_tools(messages: list[Message]) -> bool:
  function _add_llama3_date_prefix (line 523) | def _add_llama3_date_prefix(messages: list[Message]) -> list[Message]:
  function test_generation_against_hf_chat_templates (line 538) | def test_generation_against_hf_chat_templates(
  function test_supervised_example_against_hf_chat_templates (line 632) | def test_supervised_example_against_hf_chat_templates(
  function test_tokenization_boundary_with_whitespace (line 707) | def test_tokenization_boundary_with_whitespace(model_name: str):
  function test_tool_call_supervised_rendering (line 760) | def test_tool_call_supervised_rendering(model_name: str):
  function test_strip_thinking_from_history_default (line 819) | def test_strip_thinking_from_history_default(model_name: str, renderer_c...
  function test_strip_thinking_from_history_false (line 851) | def test_strip_thinking_from_history_false(model_name: str, renderer_cla...
  function _split_by_weights (line 876) | def _split_by_weights(tokens: list[int], weights: list[float]) -> tuple[...
  function get_2turn_with_thinking (line 909) | def get_2turn_with_thinking() -> list[Message]:
  function test_supervised_generation_parse_consistency (line 975) | def test_supervised_generation_parse_consistency(
  function test_eot_parsing (line 1112) | def test_eot_parsing(model_name: str, renderer_name: str):
  function test_supervised_example_no_user_messages (line 1181) | def test_supervised_example_no_user_messages(model_name: str, renderer_n...
  function _verify_extension_property (line 1218) | def _verify_extension_property(renderer, messages: list[Message], tokeni...
  function test_extension_property_holds (line 1345) | def test_extension_property_holds(
  function test_extension_property_breaks_when_expected (line 1368) | def test_extension_property_breaks_when_expected():
  function cleanup_custom_renderer (line 1386) | def cleanup_custom_renderer():
  function test_register_and_get_custom_renderer (line 1395) | def test_register_and_get_custom_renderer(cleanup_custom_renderer):
  function cleanup_custom_tokenizer (line 1428) | def cleanup_custom_tokenizer():
  function test_register_and_get_custom_tokenizer (line 1437) | def test_register_and_get_custom_tokenizer(cleanup_custom_tokenizer):

FILE: tinker_cookbook/renderers/role_colon.py
  class RoleColonRenderer (line 15) | class RoleColonRenderer(Renderer):
    method has_extension_property (line 29) | def has_extension_property(self) -> bool:
    method render_message (line 33) | def render_message(self, message: Message, ctx: RenderContext) -> Rend...
    method get_stop_sequences (line 52) | def get_stop_sequences(self) -> list[str]:
    method parse_response (line 55) | def parse_response(self, response: list[int]) -> tuple[Message, bool]:
    method _bos_tokens (line 87) | def _bos_tokens(self) -> list[int]:
    method create_conversation_prefix_with_tools (line 94) | def create_conversation_prefix_with_tools(

FILE: tinker_cookbook/renderers/testing_utils.py
  function extract_token_ids (line 11) | def extract_token_ids(result: Any) -> list[int]:
  function skip_if_deepseek_tokenizer_bug (line 35) | def skip_if_deepseek_tokenizer_bug(model_name: str) -> None:

FILE: tinker_cookbook/renderers/tool_calling_test.py
  function test_qwen3_tool_response_rendering (line 34) | def test_qwen3_tool_response_rendering(model_name: str, renderer_name: s...
  function test_qwen3_parse_single_tool_call (line 82) | def test_qwen3_parse_single_tool_call(model_name: str, renderer_name: str):
  function test_qwen3_parse_multiple_tool_calls (line 123) | def test_qwen3_parse_multiple_tool_calls(model_name: str, renderer_name:...
  function test_kimi_k2_parse_tool_call (line 169) | def test_kimi_k2_parse_tool_call():
  function test_deepseek_parse_tool_call (line 195) | def test_deepseek_parse_tool_call():
  function test_qwen3_parse_invalid_tool_call_json (line 225) | def test_qwen3_parse_invalid_tool_call_json():
  function test_qwen3_mixed_valid_invalid_tool_calls (line 249) | def test_qwen3_mixed_valid_invalid_tool_calls():
  function test_deepseek_parse_invalid_tool_call_json (line 282) | def test_deepseek_parse_invalid_tool_call_json():
  function test_kimi_k2_parse_invalid_tool_call_json (line 301) | def test_kimi_k2_parse_invalid_tool_call_json():

FILE: tinker_cookbook/rl/builder_pickle_test.py
  class TestProblemGroupBuilderPickle (line 17) | class TestProblemGroupBuilderPickle:
    method test_pickle_roundtrip (line 18) | def test_pickle_roundtrip(self) -> None:
    method test_pickle_with_convo_prefix (line 41) | def test_pickle_with_convo_prefix(self) -> None:
  class TestRolloutTask (line 58) | class TestRolloutTask:
    method test_pickle_roundtrip (line 59) | def test_pickle_roundtrip(self) -> None:
  class TestRolloutExecutorContextVar (line 90) | class TestRolloutExecutorContextVar:
    method test_default_is_none (line 91) | def test_default_is_none(self) -> None:
    method test_set_and_get (line 95) | def test_set_and_get(self) -> None:

FILE: tinker_cookbook/rl/data_processing.py
  function compute_advantages (line 23) | def compute_advantages(trajectory_groups_P: list[TrajectoryGroup]) -> li...
  function _is_prefix (line 40) | def _is_prefix(seq1: FlatOb, seq2: FlatOb) -> bool:
  function _flat_ob_token_len (line 47) | def _flat_ob_token_len(flat_ob: FlatOb) -> int:
  function _flat_ob_to_model_input (line 57) | def _flat_ob_to_model_input(flat_ob: FlatOb) -> tinker.ModelInput:
  function _flatten_chunks (line 76) | def _flatten_chunks(chunks: list[tinker.ModelInputChunk]) -> FlatOb:
  function trajectory_to_data (line 86) | def trajectory_to_data(traj: Trajectory, traj_advantage: float) -> list[...
  function assemble_training_data (line 174) | def assemble_training_data(
  function remove_constant_reward_groups (line 196) | def remove_constant_reward_groups(

FILE: tinker_cookbook/rl/message_env.py
  class MessageStepResult (line 24) | class MessageStepResult:
  class MessageEnv (line 35) | class MessageEnv(ABC):
    method initial_observation (line 39) | async def initial_observation(self) -> list[Message]:
    method step (line 44) | async def step(self, message: Message) -> MessageStepResult:
  class EnvFromMessageEnv (line 49) | class EnvFromMessageEnv(types.Env):
    method __init__ (line 56) | def __init__(
    method _render_in_thread (line 71) | async def _render_in_thread(self, messages: list[Message], **kwargs) -...
    method initial_observation (line 80) | async def initial_observation(self) -> tuple[tinker.ModelInput, StopCo...
    method step (line 85) | async def step(self, action: types.Action) -> types.StepResult:

FILE: tinker_cookbook/rl/message_env_test.py
  function _make_model_input (line 24) | def _make_model_input(tokens: list[int]) -> tinker.ModelInput:
  class StubMessageEnv (line 28) | class StubMessageEnv(MessageEnv):
    method __init__ (line 31) | def __init__(
    method initial_observation (line 40) | async def initial_observation(self) -> list[Message]:
    method step (line 43) | async def step(self, message: Message) -> MessageStepResult:
  function _make_renderer (line 48) | def _make_renderer(
  class TestInitialObservation (line 74) | class TestInitialObservation:
    method test_returns_rendered_prompt_and_stop_condition (line 75) | def test_returns_rendered_prompt_and_stop_condition(self):
    method test_render_runs_in_thread (line 91) | def test_render_runs_in_thread(self):
  class TestStepParseFailure (line 109) | class TestStepParseFailure:
    method test_parse_failure_returns_failed_reward (line 110) | def test_parse_failure_returns_failed_reward(self):
    method test_parse_failure_no_terminate (line 133) | def test_parse_failure_no_terminate(self):
  class TestStepSuccess (line 153) | class TestStepSuccess:
    method test_delegates_to_message_env_and_renders (line 154) | def test_delegates_to_message_env_and_renders(self):
    method test_custom_stop_condition_from_message_env (line 190) | def test_custom_stop_condition_from_message_env(self):
    method test_none_stop_condition_falls_back_to_base (line 208) | def test_none_stop_condition_falls_back_to_base(self):
  class TestMaxTrajectoryTokens (line 227) | class TestMaxTrajectoryTokens:
    method test_context_overflow_terminates_episode (line 228) | def test_context_overflow_terminates_episode(self):
    method test_within_limit_continues (line 256) | def test_within_limit_continues(self):
    method test_no_limit_set (line 279) | def test_no_limit_set(self):
  class TestStepThreading (line 298) | class TestStepThreading:
    method test_step_renders_in_thread (line 299) | def test_step_renders_in_thread(self):
  class TestLogsPassthrough (line 320) | class TestLogsPassthrough:
    method test_logs_forwarded_on_success (line 323) | def test_logs_forwarded_on_success(self):
    method test_logs_forwarded_on_context_overflow (line 341) | def test_logs_forwarded_on_context_overflow(self):
    method test_no_logs_on_parse_error (line 365) | def test_no_logs_on_parse_error(self):
    method test_empty_logs_by_default (line 385) | def test_empty_logs_by_default(self):

FILE: tinker_cookbook/rl/metric_util.py
  function _compute_by_group_metrics (line 30) | def _compute_by_group_metrics(trajectory_groups_P: list[TrajectoryGroup]...
  function compute_trajectory_metrics (line 49) | def compute_trajectory_metrics(
  function _compute_trajectory_metrics (line 73) | def _compute_trajectory_metrics(trajectory_groups_P: list[TrajectoryGrou...
  function dataset_to_env_group_builders (line 112) | def dataset_to_env_group_builders(dataset: RLDataset) -> list[EnvGroupBu...
  class RLTestSetEvaluator (line 119) | class RLTestSetEvaluator(SamplingClientEvaluator):
    method __init__ (line 120) | def __init__(
    method eval_token_completer (line 134) | async def eval_token_completer(
    method __call__ (line 169) | async def __call__(
    method _eval_with_executor (line 187) | async def _eval_with_executor(
    method _collect_eval_metrics (line 210) | def _collect_eval_metrics(

FILE: tinker_cookbook/rl/metrics.py
  function compute_kl_sample_train (line 18) | def compute_kl_sample_train(
  function compute_post_kl (line 53) | async def compute_post_kl(
  function incorporate_kl_penalty (line 87) | async def incorporate_kl_penalty(
  function discounted_future_sum_vectorized (line 131) | def discounted_future_sum_vectorized(x: torch.Tensor, gamma: float) -> t...
  function compute_sampling_client_metrics (line 152) | def compute_sampling_client_metrics(

FILE: tinker_cookbook/rl/multiturn_weight_assignment_test.py
  function _make_transition (line 26) | def _make_transition(
  function _get_mask (line 43) | def _get_mask(datum: tinker.Datum) -> list[float]:
  function _stub_tool (line 48) | async def _stub_tool() -> ToolResult:
  function _zero_reward_fn (line 53) | async def _zero_reward_fn(history) -> tuple[float, dict[str, float]]:
  class TestMultiTurnPrefixTrajectory (line 62) | class TestMultiTurnPrefixTrajectory:
    method _make_trajectory (line 70) | def _make_trajectory(self) -> Trajectory:
    method test_returns_single_datum (line 84) | def test_returns_single_datum(self):
    method test_mask_matches_expected (line 88) | def test_mask_matches_expected(self):
    method test_mask_sum_equals_action_token_count (line 97) | def test_mask_sum_equals_action_token_count(self):
  class TestSingleTurnTrajectory (line 109) | class TestSingleTurnTrajectory:
    method test_single_turn_mask (line 110) | def test_single_turn_mask(self):
  class TestPrefixBreak (line 128) | class TestPrefixBreak:
    method test_non_prefix_observation_produces_two_datums (line 129) | def test_non_prefix_observation_produces_two_datums(self):
    method test_each_datum_has_correct_mask (line 140) | def test_each_datum_has_correct_mask(self):
  function _make_stub_renderer (line 160) | def _make_stub_renderer():
  function _make_stub_policy (line 229) | def _make_stub_policy():
  function _run_e2e_rollout (line 245) | def _run_e2e_rollout():
  class TestEndToEndToolUseRollout (line 262) | class TestEndToEndToolUseRollout:
    method test_trajectory_has_two_transitions (line 263) | def test_trajectory_has_two_transitions(self):
    method test_produces_single_datum (line 269) | def test_produces_single_datum(self):
    method test_mask_only_on_agent_tokens (line 273) | def test_mask_only_on_agent_tokens(self):

FILE: tinker_cookbook/rl/play_w_env.py
  function get_async_input (line 30) | async def get_async_input(prompt: str, multiline: bool = False) -> str:
  class ManualPolicy (line 51) | class ManualPolicy(TokenCompleter):
    method __init__ (line 52) | def __init__(self, tokenizer: Tokenizer, multiline: bool = True, show_...
    method __call__ (line 58) | async def __call__(self, ob: tinker.ModelInput, stop: StopCondition) -...
  function print_trajectory_summary (line 73) | def print_trajectory_summary(trajectory: Trajectory):
  function play_env (line 89) | async def play_env(
  function main (line 103) | async def main():

FILE: tinker_cookbook/rl/preference_envs.py
  class PreferenceEnv (line 39) | class PreferenceEnv(Env):
    method __init__ (line 40) | def __init__(
    method stop_condition (line 49) | def stop_condition(self) -> StopCondition:
    method initial_observation (line 52) | async def initial_observation(self) -> tuple[Observation, StopCondition]:
    method step (line 55) | async def step(self, action: Action) -> StepResult:
  class TournamentPattern (line 75) | class TournamentPattern(StrEnum):
  function get_pairs_chunked (line 80) | def get_pairs_chunked(n: int, pattern: TournamentPattern, chunk_size: in...
  function get_pairs (line 94) | def get_pairs(n: int, pattern: TournamentPattern) -> list[tuple[int, int]]:
  class PairwisePreferenceGroupBuilder (line 104) | class PairwisePreferenceGroupBuilder(EnvGroupBuilder):
    method make_envs (line 114) | async def make_envs(self) -> Sequence[Env]:
    method _preprocess_message (line 119) | def _preprocess_message(self, message: renderers.Message) -> renderers...
    method get_response_message (line 125) | def get_response_message(self, trajectory: Trajectory) -> tuple[list[r...
    method comparison_reward_for_second_messages (line 131) | def comparison_reward_for_second_messages(
    method compute_group_rewards (line 142) | async def compute_group_rewards(
    method logging_tags (line 217) | def logging_tags(self) -> list[str]:
  class PairwisePreferenceDataset (line 221) | class PairwisePreferenceDataset(RLDataset):
    method __init__ (line 222) | def __init__(
    method get_batch (line 241) | def get_batch(self, index: int) -> list[EnvGroupBuilder]:
    method _labeled_comparison_to_env_group (line 248) | def _labeled_comparison_to_env_group(self, lc: LabeledComparison) -> E...
    method __len__ (line 258) | def __len__(self) -> int:
  class PairwisePreferenceRLDatasetBuilder (line 263) | class PairwisePreferenceRLDatasetBuilder(RLDatasetBuilder):
    method __call__ (line 273) | async def __call__(self) -> tuple[PairwisePreferenceDataset, None]:

FILE: tinker_cookbook/rl/problem_env.py
  class ProblemEnv (line 25) | class ProblemEnv(Env):
    method __init__ (line 26) | def __init__(
    method stop_condition (line 37) | def stop_condition(self) -> StopCondition:
    method get_question (line 41) | def get_question(self) -> str:
    method check_answer (line 45) | def check_answer(self, sample_str: str) -> bool:
    method check_format (line 49) | def check_format(self, sample_str: str) -> bool:
    method get_reference_answer (line 53) | def get_reference_answer(self) -> str:
    method initial_observation (line 57) | async def initial_observation(self) -> tuple[Observation, StopCondition]:
    method step (line 63) | async def step(self, action: Action) -> StepResult:
  class ProblemGroupBuilder (line 101) | class ProblemGroupBuilder(EnvGroupBuilder):
    method make_envs (line 106) | async def make_envs(self) -> Sequence[Env]:
    method compute_group_rewards (line 109) | async def compute_group_rewards(
    method logging_tags (line 114) | def logging_tags(self) -> list[str]:

FILE: tinker_cookbook/rl/rollout_error_resilience_test.py
  function _make_trajectory (line 39) | def _make_trajectory() -> Trajectory:
  class _FakePolicy (line 54) | class _FakePolicy(TokenCompleter):
    method __init__ (line 57) | def __init__(self, fail_indices: set[int] | None = None, error: BaseEx...
    method __call__ (line 62) | async def __call__(self, model_input, stop):
  class _FakeEnv (line 70) | class _FakeEnv(Env):
    method initial_observation (line 71) | async def initial_observation(self):
    method step (line 74) | async def step(self, action):
  class _FakeEnvGroupBuilder (line 83) | class _FakeEnvGroupBuilder(EnvGroupBuilder):
    method __init__ (line 84) | def __init__(self, n_envs: int = 4):
    method make_envs (line 88) | async def make_envs(self):
  class TestRolloutStrategyFromConfig (line 98) | class TestRolloutStrategyFromConfig:
    method test_false_returns_fail_fast (line 99) | def test_false_returns_fail_fast(self):
    method test_true_returns_retry_on_failure (line 104) | def test_true_returns_retry_on_failure(self):
    method test_strategy_instance_passed_through (line 110) | def test_strategy_instance_passed_through(self):
    method test_fail_fast_instance_passed_through (line 114) | def test_fail_fast_instance_passed_through(self):
    method test_invalid_value_raises (line 118) | def test_invalid_value_raises(self):
  class TestStrategyPickle (line 128) | class TestStrategyPickle:
    method test_fail_fast_pickleable (line 129) | def test_fail_fast_pickleable(self):
    method test_retry_on_failure_pickleable (line 134) | def test_retry_on_failure_pickleable(self):
  class TestRolloutError (line 146) | class TestRolloutError:
    method test_pickleable (line 147) | def test_pickleable(self):
  class TestTrajectoryGroupErrors (line 154) | class TestTrajectoryGroupErrors:
    method test_default_no_errors (line 155) | def test_default_no_errors(self):
    method test_with_errors (line 159) | def test_with_errors(self):
    method test_pickleable_with_errors (line 164) | def test_pickleable_with_errors(self):
    method test_get_total_rewards_unaffected (line 170) | def test_get_total_rewards_unaffected(self):
  class TestRolloutErrorCounter (line 188) | class TestRolloutErrorCounter:
    method test_ingest_successful_group (line 189) | def test_ingest_successful_group(self):
    method test_ingest_none_increments_groups_skipped (line 195) | def test_ingest_none_increments_groups_skipped(self):
    method test_ingest_group_with_errors (line 202) | def test_ingest_group_with_errors(self):
    method test_cumulative_across_ingests (line 216) | def test_cumulative_across_ingests(self):
  class TestFailFastStrategy (line 236) | class TestFailFastStrategy:
    method test_default_strategy_raises_on_error (line 237) | def test_default_strategy_raises_on_error(self):
    method test_success_returns_all_trajectories (line 244) | def test_success_returns_all_trajectories(self):
    method test_cancelled_error_propagates (line 251) | def test_cancelled_error_propagates(self):
  class TestRetryOnFailureStrategy (line 263) | class TestRetryOnFailureStrategy:
    method test_no_errors_returns_all_trajectories (line 264) | def test_no_errors_returns_all_trajectories(self):
    method test_retry_recovers_from_transient_failure (line 271) | def test_retry_recovers_from_transient_failure(self):
    method test_retry_creates_fresh_envs (line 282) | def test_retry_creates_fresh_envs(self):
    method test_all_fail_raises_after_retries (line 290) | def test_all_fail_raises_after_retries(self):
    method test_budget_exhausted_cancels_and_raises (line 298) | def test_budget_exhausted_cancels_and_raises(self):
    method test_zero_retries_raises_on_any_failure (line 308) | def test_zero_retries_raises_on_any_failure(self):
    method test_cancelled_error_not_swallowed (line 315) | def test_cancelled_error_not_swallowed(self):
    method test_keyboard_interrupt_not_swallowed (line 321) | def test_keyboard_interrupt_not_swallowed(self):
    method test_make_envs_failure_during_retry_propagates (line 327) | def test_make_envs_failure_during_retry_propagates(self):
  class TestImplErrorHandling (line 351) | class TestImplErrorHandling:
    method test_fail_fast_propagates_error (line 352) | def test_fail_fast_propagates_error(self):
    method test_retry_strategy_returns_none_on_group_error (line 373) | def test_retry_strategy_returns_none_on_group_error(self):
    method test_all_trajectories_failed_returns_none (line 392) | def test_all_trajectories_failed_returns_none(self):

FILE: tinker_cookbook/rl/rollout_logging.py
  class RolloutSummaryExportConfig (line 17) | class RolloutSummaryExportConfig:
  class RolloutSummaryGroup (line 27) | class RolloutSummaryGroup:
  function _json_safe (line 35) | def _json_safe(value: Any) -> Any:
  function write_rollout_summaries_jsonl (line 51) | def write_rollout_summaries_jsonl(
  function rollout_summaries_jsonl_path (line 109) | def rollout_summaries_jsonl_path(log_path: str, file_prefix: str) -> Path:
  function write_rollout_summaries_jsonl_from_groups (line 114) | def write_rollout_summaries_jsonl_from_groups(

FILE: tinker_cookbook/rl/rollout_logging_test.py
  function test_write_rollout_summaries_jsonl_handles_numpy_scalars (line 13) | def test_write_rollout_summaries_jsonl_handles_numpy_scalars(tmp_path: P...

FILE: tinker_cookbook/rl/rollout_strategy.py
  class RolloutResult (line 32) | class RolloutResult:
  class RolloutStrategy (line 40) | class RolloutStrategy(ABC):
    method catches_group_errors (line 52) | def catches_group_errors(self) -> bool:
    method execute (line 58) | async def execute(
  class FailFast (line 73) | class FailFast(RolloutStrategy):
    method execute (line 80) | async def execute(
  class RetryOnFailure (line 95) | class RetryOnFailure(RolloutStrategy):
    method catches_group_errors (line 119) | def catches_group_errors(self) -> bool:
    method execute (line 122) | async def execute(
  function rollout_strategy_from_config (line 205) | def rollout_strategy_from_config(

FILE: tinker_cookbook/rl/rollouts.py
  class RolloutErrorCounter (line 29) | class RolloutErrorCounter:
    method ingest (line 40) | def ingest(self, result: TrajectoryGroup | None) -> None:
    method get_metrics (line 48) | def get_metrics(self, prefix: str = "rollout_errors") -> dict[str, flo...
  function _log_transition_logs (line 59) | def _log_transition_logs(logs: dict[str, Any]) -> None:
  function _log_transition_metrics (line 72) | def _log_transition_metrics(metrics: dict[str, Any] | None) -> None:
  function _log_single_trajectory_details (line 89) | def _log_single_trajectory_details(traj: Trajectory, final_reward: float...
  function do_single_rollout (line 116) | async def do_single_rollout(policy: TokenCompleter, env: Env) -> Traject...
  function do_group_rollout (line 144) | async def do_group_rollout(
  function set_rollout_executor (line 190) | def set_rollout_executor(executor: Executor | None) -> None:
  function get_rollout_executor (line 205) | def get_rollout_executor() -> Executor | None:
  class _RolloutTask (line 211) | class _RolloutTask:
  function _run_rollout_sync (line 223) | def _run_rollout_sync(task: _RolloutTask) -> TrajectoryGroup | None:
  function do_group_rollout_and_filter_constant_reward (line 243) | async def do_group_rollout_and_filter_constant_reward(
  function _do_group_rollout_and_filter_constant_reward_impl (line 291) | async def _do_group_rollout_and_filter_constant_reward_impl(

FILE: tinker_cookbook/rl/shutdown_test.py
  class TestAsyncCounter (line 16) | class TestAsyncCounter:
    method test_decrement_and_get (line 17) | def test_decrement_and_get(self):
    method test_concurrent_decrements (line 26) | def test_concurrent_decrements(self):
  class TestShutdownCascade (line 38) | class TestShutdownCascade:
    method test_dataloader_enqueues_shutdown_sentinels (line 39) | def test_dataloader_enqueues_shutdown_sentinels(self):
    method test_last_worker_signals_training_loop (line 57) | def test_last_worker_signals_training_loop(self):
    method test_full_cascade_no_hang (line 75) | def test_full_cascade_no_hang(self):
    method test_cascade_with_early_shutdown (line 155) | def test_cascade_with_early_shutdown(self):
    method test_requeue_skipped_during_shutdown (line 221) | def test_requeue_skipped_during_shutdown(self):
    method test_none_items_pass_through_during_shutdown (line 252) | def test_none_items_pass_through_during_shutdown(self):

FILE: tinker_cookbook/rl/train.py
  class KLReferenceConfig (line 74) | class KLReferenceConfig:
  function gather_with_progress (line 84) | async def gather_with_progress(
  function _get_evaluator_name (line 110) | def _get_evaluator_name(evaluator: SamplingClientEvaluator) -> str:
  function _sanitize_filename_component (line 118) | def _sanitize_filename_component(text: str) -> str:
  function _maybe_export_rollout_summary_jsonl (line 124) | def _maybe_export_rollout_summary_jsonl(
  function _get_logtree_scope (line 158) | def _get_logtree_scope(
  function _select_representative_inds (line 183) | def _select_representative_inds(scores: list[float], num_inds: int) -> l...
  function print_group (line 191) | def print_group(traj_group: TrajectoryGroup, tokenizer: Tokenizer):
  function _remove_mask (line 245) | def _remove_mask(datum: tinker.Datum) -> tinker.Datum:
  function _training_logprobs_from_fwd_bwd (line 252) | def _training_logprobs_from_fwd_bwd(
  function train_step (line 259) | async def train_step(
  class StreamMinibatchConfig (line 314) | class StreamMinibatchConfig:
  class AsyncConfig (line 332) | class AsyncConfig:
  class Config (line 344) | class Config:
  function run_single_evaluation (line 443) | async def run_single_evaluation(
  function run_evaluations_parallel (line 479) | async def run_evaluations_parallel(
  function do_sync_training_with_stream_minibatch (line 510) | async def do_sync_training_with_stream_minibatch(
  class WrappedTrajectoryGroup (line 642) | class WrappedTrajectoryGroup:
  class _Shutdown (line 659) | class _Shutdown:
  class _AsyncCounter (line 669) | class _AsyncCounter:
    method __init__ (line 672) | def __init__(self, start: int):
    method decrement_and_get (line 676) | async def decrement_and_get(self) -> int:
  function do_async_training (line 683) | async def do_async_training(
  function save_checkpoint_and_get_sampling_client (line 1003) | async def save_checkpoint_and_get_sampling_client(
  function prepare_minibatch (line 1028) | async def prepare_minibatch(
  function compute_full_batch_metrics_and_get_sampling_client (line 1067) | async def compute_full_batch_metrics_and_get_sampling_client(
  function do_train_step_streaming_and_get_sampling_client (line 1107) | async def do_train_step_streaming_and_get_sampling_client(
  function do_train_step_and_get_sampling_client (line 1242) | async def do_train_step_and_get_sampling_client(
  function do_sync_training (line 1292) | async def do_sync_training(
  function main (line 1418) | async def main(

FILE: tinker_cookbook/rl/types.py
  class StepResult (line 24) | class StepResult:
  class Transition (line 42) | class Transition:
  class Env (line 59) | class Env(ABC):
    method initial_observation (line 66) | async def initial_observation(self) -> tuple[Observation, StopCondition]:
    method step (line 70) | async def step(self, action: Action) -> StepResult:
  class Trajectory (line 75) | class Trajectory:
  class RolloutError (line 86) | class RolloutError:
  class EnvGroupBuilder (line 100) | class EnvGroupBuilder(ABC):
    method make_envs (line 127) | async def make_envs(self) -> Sequence[Env]:
    method compute_group_rewards (line 130) | async def compute_group_rewards(
    method cleanup (line 142) | async def cleanup(self) -> None:
    method logging_tags (line 155) | def logging_tags(self) -> list[str]:
  class TrajectoryGroup (line 168) | class TrajectoryGroup:
    method get_total_rewards (line 182) | def get_total_rewards(self) -> list[float]:
  class RLDataset (line 194) | class RLDataset(ABC):
    method get_batch (line 201) | def get_batch(self, index: int) -> Sequence[EnvGroupBuilder]:
    method __len__ (line 205) | def __len__(self) -> int:
  class RLDatasetBuilder (line 210) | class RLDatasetBuilder:
    method __call__ (line 216) | async def __call__(self) -> tuple[RLDataset, RLDataset | None]:

FILE: tinker_cookbook/sandbox/__init__.py
  class SandboxBackend (line 19) | class SandboxBackend(StrEnum):

FILE: tinker_cookbook/sandbox/modal_sandbox.py
  function _read_stream_capped (line 38) | async def _read_stream_capped(stream: object, max_bytes: int) -> str:
  function _is_sandbox_terminated (line 64) | def _is_sandbox_terminated(e: BaseException) -> bool:
  class ModalSandbox (line 73) | class ModalSandbox:
    method __init__ (line 87) | def __init__(
    method create (line 102) | async def create(
    method sandbox_id (line 122) | def sandbox_id(self) -> str:
    method send_heartbeat (line 125) | async def send_heartbeat(self) -> None:
    method run_command (line 128) | async def run_command(
    method read_file (line 152) | async def read_file(
    method write_file (line 162) | async def write_file(
    method cleanup (line 206) | async def cleanup(self) -> None:
  class ModalSandboxPool (line 213) | class ModalSandboxPool:
    method __init__ (line 225) | def __init__(
    method _create (line 246) | async def _create(self) -> ModalSandbox:
    method _maintain_pool (line 251) | async def _maintain_pool(self) -> None:
    method _maintain_pool_step (line 260) | async def _maintain_pool_step(self) -> None:
    method run_in_workdir (line 281) | async def run_in_workdir(
    method terminate (line 330) | async def terminate(self) -> None:

FILE: tinker_cookbook/sandbox/sandbox_interface.py
  class SandboxResult (line 11) | class SandboxResult:
  class SandboxTerminatedError (line 20) | class SandboxTerminatedError(SandboxError):
  class SandboxInterface (line 27) | class SandboxInterface(Protocol):
    method sandbox_id (line 35) | def sandbox_id(self) -> str:
    method send_heartbeat (line 39) | async def send_heartbeat(self) -> None:
    method run_command (line 46) | async def run_command(
    method read_file (line 67) | async def read_file(
    method write_file (line 79) | async def write_file(
    method cleanup (line 92) | async def cleanup(self) -> None:
  class SandboxResource (line 97) | class SandboxResource:
    method __init__ (line 100) | def __init__(self, sandbox: SandboxInterface):
    method send_heartbeat (line 103) | async def send_heartbeat(self) -> None:
    method cleanup (line 106) | async def cleanup(self) -> None:

FILE: tinker_cookbook/sandbox/sandboxfusion.py
  class SandboxFusionClient (line 23) | class SandboxFusionClient:
    method __init__ (line 37) | def __init__(
    method _get_session (line 47) | async def _get_session(self) -> aiohttp.ClientSession:
    method run (line 67) | async def run(
    method close (line 117) | async def close(self) -> None:

FILE: tinker_cookbook/scripts/merge_tinker_adapter_to_hf_model.py
  function main (line 18) | def main():

FILE: tinker_cookbook/scripts/test_tool_calling_e2e.py
  function print_result (line 87) | def print_result(
  function test_model (line 110) | async def test_model(
  function main (line 162) | async def main():

FILE: tinker_cookbook/supervised/common.py
  function compute_mean_nll (line 11) | def compute_mean_nll(
  function create_rightshifted_model_input_and_leftshifted_targets (line 31) | def create_rightshifted_model_input_and_leftshifted_targets(
  function datum_from_model_input_weights (line 68) | def datum_from_model_input_weights(

FILE: tinker_cookbook/supervised/data.py
  function conversation_to_datum (line 20) | def conversation_to_datum(
  function _one_of (line 33) | def _one_of(a: Any, b: Any) -> bool:
  class SupervisedDatasetFromHFDataset (line 37) | class SupervisedDatasetFromHFDataset(SupervisedDataset):
    method __init__ (line 38) | def __init__(
    method get_batch (line 54) | def get_batch(self, index: int) -> list[tinker.Datum]:
    method set_epoch (line 64) | def set_epoch(self, seed: int = 0):
    method __len__ (line 67) | def __len__(self) -> int:
  class StreamingSupervisedDatasetFromHFDataset (line 71) | class StreamingSupervisedDatasetFromHFDataset(SupervisedDataset):
    method __init__ (line 72) | def __init__(
    method get_batch (line 93) | def get_batch(self, index: int) -> list[tinker.Datum]:
    method set_epoch (line 117) | def set_epoch(self, seed: int = 0):
    method __len__ (line 122) | def __len__(self) -> int:
  class FromConversationFileBuilder (line 127) | class FromConversationFileBuilder(ChatDatasetBuilder):
    method __call__ (line 132) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:

FILE: tinker_cookbook/supervised/nll_evaluator.py
  class NLLEvaluator (line 10) | class NLLEvaluator(TrainingClientEvaluator):
    method __init__ (line 11) | def __init__(self, data: list[tinker.Datum], name: str = "test"):
    method __call__ (line 15) | async def __call__(self, training_client: tinker.TrainingClient) -> di...
    method from_dataset (line 25) | def from_dataset(cls, dataset: SupervisedDataset, name: str = "test") ...

FILE: tinker_cookbook/supervised/resume_test.py
  class StopTrainingException (line 18) | class StopTrainingException(Exception):
  function create_mock_logger_with_jsonl (line 22) | def create_mock_logger_with_jsonl(
  function checkpoint_resume (line 53) | def checkpoint_resume():

FILE: tinker_cookbook/supervised/train.py
  class Config (line 39) | class Config:
  class SubmittedBatch (line 86) | class SubmittedBatch:
  function run_evals (line 99) | async def run_evals(
  function main (line 152) | async def main(config: Config):

FILE: tinker_cookbook/supervised/types.py
  class SupervisedDataset (line 16) | class SupervisedDataset:
    method get_batch (line 21) | def get_batch(self, index: int) -> list[tinker.Datum]:
    method __len__ (line 24) | def __len__(self) -> int:
    method set_epoch (line 27) | def set_epoch(self, seed: int = 0):
  class SupervisedDatasetBuilder (line 40) | class SupervisedDatasetBuilder:
    method __call__ (line 45) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
  class ChatDatasetBuilderCommonConfig (line 50) | class ChatDatasetBuilderCommonConfig:
  class ChatDatasetBuilder (line 65) | class ChatDatasetBuilder(SupervisedDatasetBuilder):
    method __call__ (line 73) | def __call__(self) -> tuple[SupervisedDataset, SupervisedDataset | None]:
    method tokenizer (line 80) | def tokenizer(self) -> Tokenizer:
    method renderer (line 84) | def renderer(self) -> renderers.Renderer:

FILE: tinker_cookbook/supervised/viz_sft_dataset.py
  class Config (line 19) | class Config:
  function run (line 27) | def run(cfg: Config):

FILE: tinker_cookbook/third_party/litellm/provider.py
  class _SamplingResult (line 63) | class _SamplingResult:
  function _prepare_messages_with_tools (line 74) | def _prepare_messages_with_tools(
  function _sample_chat_completion (line 101) | async def _sample_chat_completion(
  function _sampling_result_to_chat_completion_dict (line 154) | def _sampling_result_to_chat_completion_dict(result: _SamplingResult) ->...
  function _extract_sampling_params (line 207) | def _extract_sampling_params(optional_params: dict[str, Any]) -> dict[st...
  function _build_model_response (line 225) | def _build_model_response(
  function _map_tinker_error (line 262) | def _map_tinker_error(exc: Exception) -> Exception:
  class _ClientBundle (line 306) | class _ClientBundle:
  class TinkerLiteLLMProvider (line 315) | class TinkerLiteLLMProvider(CustomLLM):
    method __init__ (line 318) | def __init__(
    method _get_service_client (line 326) | def _get_service_client(self) -> tinker.ServiceClient:
    method _get_or_create_client (line 331) | def _get_or_create_client(self, base_model: str) -> _ClientBundle:
    method set_client (line 348) | def set_client(
    method acompletion (line 372) | async def acompletion(
    method completion (line 415) | def completion(
  function register_litellm_provider (line 477) | def register_litellm_provider(

FILE: tinker_cookbook/third_party/litellm/provider_test.py
  class FakeSampledSequence (line 26) | class FakeSampledSequence:
  class FakeSampleResponse (line 33) | class FakeSampleResponse:
  function _make_sampling_result (line 37) | def _make_sampling_result(
  class TestExtractSamplingParams (line 63) | class TestExtractSamplingParams:
    method test_all_params (line 64) | def test_all_params(self) -> None:
    method test_max_completion_tokens (line 83) | def test_max_completion_tokens(self) -> None:
    method test_empty (line 87) | def test_empty(self) -> None:
  class TestPrepareMessagesWithTools (line 96) | class TestPrepareMessagesWithTools:
    method test_extracts_system_message (line 97) | def test_extracts_system_message(self) -> None:
    method test_no_system_message (line 122) | def test_no_system_message(self) -> None:
  class TestSamplingResultToDict (line 147) | class TestSamplingResultToDict:
    method test_basic_response (line 148) | def test_basic_response(self) -> None:
    method test_parse_failure_gives_length_finish (line 161) | def test_parse_failure_gives_length_finish(self) -> None:
    method test_tool_calls_in_response (line 166) | def test_tool_calls_in_response(self) -> None:
    method test_tool_call_without_id_gets_generated (line 180) | def test_tool_call_without_id_gets_generated(self) -> None:
    method test_list_content_formatted_as_string (line 189) | def test_list_content_formatted_as_string(self) -> None:
  class TestSampleChatCompletion (line 204) | class TestSampleChatCompletion:
    method test_basic_flow (line 206) | async def test_basic_flow(self) -> None:
    method test_with_tools (line 241) | async def test_with_tools(self) -> None:
    method test_custom_stop_sequences (line 277) | async def test_custom_stop_sequences(self) -> None:
  class TestTinkerLiteLLMProvider (line 310) | class TestTinkerLiteLLMProvider:
    method test_register_adds_to_provider_map (line 311) | def test_register_adds_to_provider_map(self) -> None:
    method test_set_client_creates_bundle (line 344) | def test_set_client_creates_bundle(self) -> None:
    method test_set_client_updates_existing_bundle (line 365) | def test_set_client_updates_existing_bundle(self) -> None:
    method test_acompletion_requires_base_model (line 387) | async def test_acompletion_requires_base_model(self) -> None:
    method test_acompletion_basic (line 409) | async def test_acompletion_basic(self) -> None:

FILE: tinker_cookbook/third_party/openai_compat.py
  function openai_messages_to_tinker (line 20) | def openai_messages_to_tinker(messages: list[dict[str, Any]]) -> list[Me...
  function openai_tools_to_tinker (line 38) | def openai_tools_to_tinker(tools: list[dict[str, Any]]) -> list[ToolSpec]:

FILE: tinker_cookbook/third_party/openai_compat_test.py
  class TestOpenAIMessagesToTinker (line 16) | class TestOpenAIMessagesToTinker:
    method test_basic_messages (line 17) | def test_basic_messages(self) -> None:
    method test_message_with_tool_call_id (line 28) | def test_message_with_tool_call_id(self) -> None:
    method test_message_with_name (line 35) | def test_message_with_name(self) -> None:
    method test_message_with_tool_calls (line 40) | def test_message_with_tool_calls(self) -> None:
    method test_none_content_becomes_empty_string (line 61) | def test_none_content_becomes_empty_string(self) -> None:
  class TestOpenAIToolsToTinker (line 72) | class TestOpenAIToolsToTinker:
    method test_basic_tool (line 73) | def test_basic_tool(self) -> None:
    method test_skips_non_function_tools (line 93) | def test_skips_non_function_tools(self) -> None:
    method test_missing_description (line 105) | def test_missing_description(self) -> None:
    method test_empty_tools (line 115) | def test_empty_tools(self) -> None:

FILE: tinker_cookbook/tokenizer_utils.py
  function register_tokenizer (line 28) | def register_tokenizer(
  function get_registered_tokenizer_names (line 47) | def get_registered_tokenizer_names() -> list[str]:
  function is_tokenizer_registered (line 52) | def is_tokenizer_registered(name: str) -> bool:
  function unregister_tokenizer (line 57) | def unregister_tokenizer(name: str) -> bool:
  function get_tokenizer (line 72) | def get_tokenizer(model_name: str) -> Tokenizer:
  function _get_hf_tokenizer (line 85) | def _get_hf_tokenizer(model_name: str) -> Tokenizer:

FILE: tinker_cookbook/tokenizer_utils_test.py
  function _clear_cache (line 9) | def _clear_cache() -> None:
  function test_kimi_k2_thinking_trusts_remote_code_without_env (line 15) | def test_kimi_k2_thinking_trusts_remote_code_without_env(
  function test_kimi_k25_trusts_remote_code_without_env (line 30) | def test_kimi_k25_trusts_remote_code_without_env(
  function test_no_trust_remote_code_by_default (line 45) | def test_no_trust_remote_code_by_default(
  function test_env_var_enables_trust_remote_code (line 59) | def test_env_var_enables_trust_remote_code(
  function test_env_var_falsy_values_do_not_enable (line 74) | def test_env_var_falsy_values_do_not_enable(

FILE: tinker_cookbook/tool_use/agent_tool_message_env.py
  class AgentToolMessageEnv (line 24) | class AgentToolMessageEnv(MessageEnv):
    method __post_init__ (line 37) | def __post_init__(self) -> None:
    method initial_observation (line 40) | async def initial_observation(self) -> list[Message]:
    method _handle_tool_calls (line 45) | async def _handle_tool_calls(self, tool_calls: list[ToolCall]) -> list...
    method step (line 69) | async def step(self, message: Message) -> MessageStepResult:
  function build_agent_tool_env (line 126) | def build_agent_tool_env(

FILE: tinker_cookbook/tool_use/agent_tool_message_env_test.py
  function _noop_reward (line 16) | async def _noop_reward(history: list[Message]) -> tuple[float, dict[str,...
  class StubTool (line 20) | class StubTool:
    method __init__ (line 23) | def __init__(self, name: str, response: str, should_stop: bool = False):
    method name (line 29) | def name(self) -> str:
    method description (line 33) | def description(self) -> str:
    method parameters_schema (line 37) | def parameters_schema(self) -> dict[str, Any]:
    method run (line 40) | async def run(self, input: ToolInput) -> ToolResult:
    method to_spec (line 48) | def to_spec(self) -> ToolSpec:
  function _make_tool_call (line 56) | def _make_tool_call(name: str, arguments: str = "{}", call_id: str = "ca...
  class TestStepLogs (line 65) | class TestStepLogs:
    method test_logs_assistant_content (line 68) | def test_logs_assistant_content(self):
    method test_logs_empty_when_no_content (line 82) | def test_logs_empty_when_no_content(self):
    method test_logs_multimodal_content (line 96) | def test_logs_multimodal_content(self):
    method test_logs_tool_calls_and_results (line 117) | def test_logs_tool_calls_and_results(self):
    method test_logs_multiple_tool_calls (line 137) | def test_logs_multiple_tool_calls(self):
    method test_logs_no_tool_calls (line 160) | def test_logs_no_tool_calls(self):

FILE: tinker_cookbook/tool_use/tools.py
  function simple_tool_result (line 25) | def simple_tool_result(
  function error_tool_result (line 71) | def error_tool_result(
  function _extract_annotated_info (line 115) | def _extract_annotated_info(annotation: Any) -> tuple[Any, FieldInfo | N...
  class FunctionTool (line 140) | class FunctionTool:
    method __init__ (line 147) | def __init__(self, fn: Callable[..., Any]):
    method name (line 155) | def name(self) -> str:
    method description (line 159) | def description(self) -> str:
    method _build_params_model (line 162) | def _build_params_model(self) -> type[BaseModel]:
    method parameters_schema (line 190) | def parameters_schema(self) -> dict[str, Any]:
    method to_spec (line 194) | def to_spec(self) -> ToolSpec:
    method run (line 202) | async def run(self, input: ToolInput) -> ToolResult:
    method __get__ (line 249) | def __get__(self, obj: Any, objtype: type | None = None) -> FunctionTool:
  function tool (line 263) | def tool(fn: Callable[..., Any]) -> FunctionTool:
  function handle_tool_call (line 294) | async def handle_tool_call(

FILE: tinker_cookbook/tool_use/types.py
  class ToolInput (line 12) | class ToolInput:
  class ToolResult (line 20) | class ToolResult:
  class Tool (line 30) | class Tool(Protocol):
    method name (line 34) | def name(self) -> str:
    method description (line 39) | def description(self) -> str:
    method parameters_schema (line 44) | def parameters_schema(self) -> dict[str, Any]:
    method run (line 48) | async def run(self, input: ToolInput) -> ToolResult:
    method to_spec (line 52) | def to_spec(self) -> ToolSpec:

FILE: tinker_cookbook/utils/code_state.py
  function code_state (line 11) | def code_state(modules: Sequence[str | ModuleType] = ("tinker_cookbook",...

FILE: tinker_cookbook/utils/deprecation.py
  function _parse_version (line 47) | def _parse_version(v: str) -> tuple[int, ...]:
  function _current_version (line 68) | def _current_version() -> tuple[int, ...]:
  function _check_past_removal (line 77) | def _check_past_removal(removal_
Condensed preview — 366 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,789K chars).
[
  {
    "path": ".claude/skills/checkpoints/SKILL.md",
    "chars": 4290,
    "preview": "---\nname: checkpoints\ndescription: Guide for checkpointing — saving, loading, and resuming training with CheckpointRecor"
  },
  {
    "path": ".claude/skills/ci/SKILL.md",
    "chars": 4021,
    "preview": "---\nname: ci\ndescription: Guide for testing conventions and CI pipelines — unit tests, integration smoke tests, pytest m"
  },
  {
    "path": ".claude/skills/completers/SKILL.md",
    "chars": 2584,
    "preview": "---\nname: completers\ndescription: Guide for using completers — TokenCompleter and MessageCompleter for text generation d"
  },
  {
    "path": ".claude/skills/contributing/SKILL.md",
    "chars": 2924,
    "preview": "---\nname: contributing\ndescription: Guide for contributing to the tinker-cookbook repo — development setup, code style, "
  },
  {
    "path": ".claude/skills/datasets/SKILL.md",
    "chars": 4033,
    "preview": "---\nname: datasets\ndescription: Guide for dataset construction — SupervisedDatasetBuilder, RLDatasetBuilder, ChatDataset"
  },
  {
    "path": ".claude/skills/distillation/SKILL.md",
    "chars": 6677,
    "preview": "---\nname: distillation\ndescription: Set up and run knowledge distillation (on-policy, off-policy, or multi-teacher) from"
  },
  {
    "path": ".claude/skills/dpo/SKILL.md",
    "chars": 5297,
    "preview": "---\nname: dpo\ndescription: Set up and run Direct Preference Optimization (DPO) training on preference datasets using the"
  },
  {
    "path": ".claude/skills/environments/SKILL.md",
    "chars": 5125,
    "preview": "---\nname: environments\ndescription: Guide for defining RL environments — the Env protocol, EnvGroupBuilder, RLDataset, a"
  },
  {
    "path": ".claude/skills/evals/SKILL.md",
    "chars": 3453,
    "preview": "---\nname: evals\ndescription: Guide for evaluation — inline evaluators, Inspect AI integration, and custom evaluators for"
  },
  {
    "path": ".claude/skills/grpo/SKILL.md",
    "chars": 6201,
    "preview": "---\nname: grpo\ndescription: Set up and run reinforcement learning with verifiable rewards (RLVR/GRPO) for math, code, or"
  },
  {
    "path": ".claude/skills/hyperparams/SKILL.md",
    "chars": 4656,
    "preview": "---\nname: hyperparams\ndescription: Guide for hyperparameter selection — learning rate formulas, LoRA rank, batch size, g"
  },
  {
    "path": ".claude/skills/logging/SKILL.md",
    "chars": 6864,
    "preview": "---\nname: logging\ndescription: Guide for training outputs, metrics logging, logtree reports, tracing/profiling, and debu"
  },
  {
    "path": ".claude/skills/manage-skills/SKILL.md",
    "chars": 7266,
    "preview": "---\nname: manage-skills\ndescription: Create, update, or organize Claude Code skills in this repo. Use when adding a new "
  },
  {
    "path": ".claude/skills/models/SKILL.md",
    "chars": 4999,
    "preview": "---\nname: models\ndescription: Guide for choosing models in Tinker — available model families, model types (base, instruc"
  },
  {
    "path": ".claude/skills/multiturn-rl/SKILL.md",
    "chars": 6295,
    "preview": "---\nname: multiturn-rl\ndescription: Set up and run multi-turn RL training for interactive environments (terminal tasks, "
  },
  {
    "path": ".claude/skills/new-recipe/SKILL.md",
    "chars": 5813,
    "preview": "---\nname: new-recipe\ndescription: Scaffold a new training recipe for the Tinker cookbook following repo conventions. Use"
  },
  {
    "path": ".claude/skills/renderers/SKILL.md",
    "chars": 3651,
    "preview": "---\nname: renderers\ndescription: Guide for using renderers — the bridge between chat-style messages and token sequences."
  },
  {
    "path": ".claude/skills/rlhf/SKILL.md",
    "chars": 5525,
    "preview": "---\nname: rlhf\ndescription: Set up and run the full RLHF pipeline (SFT, reward model training, RL from reward model) usi"
  },
  {
    "path": ".claude/skills/setup/SKILL.md",
    "chars": 2449,
    "preview": "---\nname: setup\ndescription: Guide for installing Tinker, setting up the environment, getting an API key, and verifying "
  },
  {
    "path": ".claude/skills/sft/SKILL.md",
    "chars": 5523,
    "preview": "---\nname: sft\ndescription: Set up and run supervised fine-tuning (SFT) on instruction or chat datasets using the Tinker "
  },
  {
    "path": ".claude/skills/tinker-cli/SKILL.md",
    "chars": 3782,
    "preview": "---\nname: tinker-cli\ndescription: Guide for the Tinker CLI — managing training runs, checkpoints, downloading weights, a"
  },
  {
    "path": ".claude/skills/tinker-sdk/SKILL.md",
    "chars": 7320,
    "preview": "---\nname: tinker-sdk\ndescription: Guide for using the Tinker Python SDK APIs — ServiceClient, TrainingClient, SamplingCl"
  },
  {
    "path": ".claude/skills/tinker-types/SKILL.md",
    "chars": 6679,
    "preview": "---\nname: tinker-types\ndescription: Reference for Tinker SDK types — Datum, ModelInput, TensorData, SamplingParams, resp"
  },
  {
    "path": ".claude/skills/weights/SKILL.md",
    "chars": 3753,
    "preview": "---\nname: weights\ndescription: Guide for the weight lifecycle — downloading trained weights from Tinker, merging LoRA ad"
  },
  {
    "path": ".github/workflows/claude-review.yml",
    "chars": 1873,
    "preview": "name: Claude Code\n\npermissions:\n  contents: write        # allow Claude to edit files & push commits\n  pull-requests: wr"
  },
  {
    "path": ".github/workflows/downstream-compat.yaml",
    "chars": 540,
    "preview": "name: downstream-compat\n\non:\n  workflow_dispatch:\n  push:\n    branches: [main]\n  pull_request:\n\njobs:\n  downstream-compa"
  },
  {
    "path": ".github/workflows/nightly.yaml",
    "chars": 2599,
    "preview": "name: nightly\n\non:\n  workflow_run:\n    workflows: [\"smoke-test-recipes\"]\n    types: [completed]\n  workflow_dispatch:\n\npe"
  },
  {
    "path": ".github/workflows/pre-commit.yaml",
    "chars": 248,
    "preview": "name: pre-commit\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n\njobs:\n  pre-commit:\n    runs-on: ubuntu-latest\n\n  "
  },
  {
    "path": ".github/workflows/publish-pypi.yaml",
    "chars": 1895,
    "preview": "name: publish-pypi\n\non:\n  push:\n    tags: [\"v[0-9]+.[0-9]+.[0-9]+\"]  # only semver tags like v1.2.3\n  workflow_dispatch:"
  },
  {
    "path": ".github/workflows/pyright.yaml",
    "chars": 678,
    "preview": "name: pyright\n\non:\n  push:\n    branches: [main]\n  pull_request:\n\njobs:\n  type-check:\n    runs-on: ubuntu-latest\n\n    str"
  },
  {
    "path": ".github/workflows/pytest.yaml",
    "chars": 749,
    "preview": "name: pytest\n\non:\n  workflow_dispatch:\n  push:\n    branches: [main]\n  pull_request:\n\njobs:\n  test:\n    runs-on: ubuntu-l"
  },
  {
    "path": ".github/workflows/smoke-test-evals.yaml",
    "chars": 822,
    "preview": "name: smoke-test-evals\n\non:\n  workflow_dispatch:  # manual trigger\n  schedule:\n    - cron: \"0 7 * * *\"  # daily at 7am U"
  },
  {
    "path": ".github/workflows/smoke-test-recipes.yaml",
    "chars": 1676,
    "preview": "name: smoke-test-recipes\n\non:\n  workflow_dispatch:  # manual trigger\n  schedule:\n    - cron: \"0 6 * * *\"  # daily at 6am"
  },
  {
    "path": ".gitignore",
    "chars": 79,
    "preview": "**/__pycache__\ntinker_cookbook/_version.py\n.DS_Store\n.env\n.env.*\n.venv\nuv.lock\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 588,
    "preview": "default_install_hook_types: [pre-commit, pre-push]\n\nrepos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n  rev:"
  },
  {
    "path": ".sync_state",
    "chars": 118,
    "preview": "{\n  \"last_synced_sha\": \"b4fee215e812ae5a6b0096ba37b3d9edc4f99cd5\",\n  \"last_sync_time\": \"2025-10-09T00:09:30.116486\"\n}\n"
  },
  {
    "path": "AGENTS.md",
    "chars": 5957,
    "preview": "# Tinker Cookbook Agent Guide\n\nQuick reference for agents working on `tinker-cookbook`. Full documentation is in `docs/`"
  },
  {
    "path": "CHANGELOG.md",
    "chars": 10658,
    "preview": "# Changelog\n\nA curated feed of notable changes to `tinker-cookbook`. Small bugfixes and minor argument additions are omi"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 7458,
    "preview": "# Contributing to Tinker Cookbook\n\nWe welcome contributions! This project is built in the spirit of open science and col"
  },
  {
    "path": "LICENSE",
    "chars": 11352,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "README.md",
    "chars": 6869,
    "preview": "<h1 align=\"center\">Tinker Cookbook</h1>\n<div align=\"center\">\n  <img src=\"assets/tinker-cover.png\" width=\"60%\" />\n</div>\n"
  },
  {
    "path": "docs/api-reference/apifuture.md",
    "chars": 3390,
    "preview": "API Future classes for handling async operations with retry logic.\n\n## `APIFuture` Objects\n\n```python\nclass APIFuture(AB"
  },
  {
    "path": "docs/api-reference/exceptions.md",
    "chars": 2969,
    "preview": "## `TinkerError` Objects\n\n```python\nclass TinkerError(Exception)\n```\n\nBase exception for all Tinker-related errors.\n\n## "
  },
  {
    "path": "docs/api-reference/restclient.md",
    "chars": 16025,
    "preview": "RestClient for Tinker API REST operations.\n\n## `RestClient` Objects\n\n```python\nclass RestClient(TelemetryProvider)\n```\n\n"
  },
  {
    "path": "docs/api-reference/samplingclient.md",
    "chars": 5195,
    "preview": "SamplingClient for Tinker API.\n\n## `SamplingClient` Objects\n\n```python\nclass SamplingClient(TelemetryProvider, QueueStat"
  },
  {
    "path": "docs/api-reference/serviceclient.md",
    "chars": 7624,
    "preview": "ServiceClient for Tinker API.\n\n## `ServiceClient` Objects\n\n```python\nclass ServiceClient(TelemetryProvider)\n```\n\nThe Ser"
  },
  {
    "path": "docs/api-reference/trainingclient.md",
    "chars": 12248,
    "preview": "TrainingClient for Tinker API.\n\n## `TrainingClient` Objects\n\n```python\nclass TrainingClient(TelemetryProvider)\n```\n\nClie"
  },
  {
    "path": "docs/api-reference/types.md",
    "chars": 16784,
    "preview": "## `LoadWeightsResponse` Objects\n\n```python\nclass LoadWeightsResponse(BaseModel)\n```\n\n#### `path`\n\nA tinker URI for mode"
  },
  {
    "path": "docs/async.mdx",
    "chars": 3170,
    "preview": "# Async and Futures\n\n## Sync and Async APIs\n\nEvery method in the Tinker Python library has both a synchronous (sync) and"
  },
  {
    "path": "docs/compatible-apis/openai.mdx",
    "chars": 3525,
    "preview": "# OpenAI API Compatible Inference (in beta)\n\nOpenAI-compatible inference lets you interact with any model checkpoint in "
  },
  {
    "path": "docs/completers.mdx",
    "chars": 2783,
    "preview": "import { CookbookLink } from '../components/CookbookLink'\n\n# Completers\n\nThe concept of policies is crucial to the RL tr"
  },
  {
    "path": "docs/dev-tips.mdx",
    "chars": 414,
    "preview": "# Developer Tips\n\n## AI-assisted development\n\nThis documentation is mirrored in the [docs folder of Tinker Cookbook](htt"
  },
  {
    "path": "docs/docs-outline.mdx",
    "chars": 2635,
    "preview": "# Navigating these docs\n\nThese docs provide guides to both Tinker and the Tinker Cookbook.\n\nThe first half, \"Using the T"
  },
  {
    "path": "docs/download-weights.mdx",
    "chars": 823,
    "preview": "# Downloading weights\n\n### CLI\n\n```bash\ntinker checkpoint download $TINKER_CHECKPOINT_PATH\n```\n\nSee `tinker checkpoint d"
  },
  {
    "path": "docs/evals.mdx",
    "chars": 8346,
    "preview": "import { Callout } from 'nextra/components'\nimport { CookbookLink } from '../components/CookbookLink'\n\n# Evaluations\n\nOu"
  },
  {
    "path": "docs/index.mdx",
    "chars": 2618,
    "preview": "# Tinker: a training API for researchers and developers\n\nTinker lets you focus on what matters in LLM fine-tuning – your"
  },
  {
    "path": "docs/install.mdx",
    "chars": 1200,
    "preview": "# Installing Tinker\n\nInstall the Tinker SDK with:\n\n```bash\npip install tinker\n```\n\nInstallation makes two components ava"
  },
  {
    "path": "docs/lora-primer.mdx",
    "chars": 4337,
    "preview": "# LoRA Primer\n\nTinker supports [LoRA fine-tuning](https://arxiv.org/abs/2106.09685), which adjusts a small number of par"
  },
  {
    "path": "docs/losses.mdx",
    "chars": 15167,
    "preview": "import { CookbookLink } from '../components/CookbookLink'\n\n# Loss functions in Tinker\n\nFor most use cases, you can use t"
  },
  {
    "path": "docs/model-lineup.mdx",
    "chars": 4746,
    "preview": "import { FilterableModelTable } from '../components/FilterableModelTable'\n\n# Available Models in Tinker\n\nThe table below"
  },
  {
    "path": "docs/overview-building.mdx",
    "chars": 1088,
    "preview": "# Overview: Tinker Cookbook\n\nThe next sections provide a variety of guides for how to use the Tinker API for research an"
  },
  {
    "path": "docs/preferences/dpo-guide.mdx",
    "chars": 5038,
    "preview": "import { Callout } from 'nextra/components'\nimport { CookbookLink } from '../../components/CookbookLink'\n\n# Direct Prefe"
  },
  {
    "path": "docs/preferences/rlhf-example.mdx",
    "chars": 1575,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Reinforcement Learning from Human Feedback\n\nWe've provid"
  },
  {
    "path": "docs/preferences.mdx",
    "chars": 1159,
    "preview": "import { CookbookLink } from '../components/CookbookLink'\n\n# Preferences\n\n# Learning from Preferences\n\nIn this section, "
  },
  {
    "path": "docs/publish-weights.mdx",
    "chars": 2233,
    "preview": "# Publishing weights\n\nIf you've trained a model that you'd like to share with the community, you can\npublish any number "
  },
  {
    "path": "docs/rendering.mdx",
    "chars": 12948,
    "preview": "import { CookbookLink } from \"../components/CookbookLink\";\n\n# Rendering to tokens\n\nRendering converts list-of-message da"
  },
  {
    "path": "docs/rl/rl-basic.mdx",
    "chars": 1725,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Your First RL Run\n\nWe've provided a minimal script that "
  },
  {
    "path": "docs/rl/rl-envs.mdx",
    "chars": 3050,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# RL Environments\n\nHere, we'll explain how to create your "
  },
  {
    "path": "docs/rl/rl-hyperparams.mdx",
    "chars": 5204,
    "preview": "# RL Hyperparameters\n\nThis guide covers the key hyperparameters for reinforcement learning training, from core settings "
  },
  {
    "path": "docs/rl/rl-logging.mdx",
    "chars": 4408,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# RL Training Outputs\n\nEach RL training run writes files t"
  },
  {
    "path": "docs/rl/rl-loops.mdx",
    "chars": 1318,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Reinforcement Learning Training Loop\n\nWe've provided a s"
  },
  {
    "path": "docs/rl/sequence-extension.mdx",
    "chars": 8685,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Sequence Extension Property in Multi-Turn RL\n\nWhen runni"
  },
  {
    "path": "docs/rl.mdx",
    "chars": 3067,
    "preview": "import { CookbookLink } from '../components/CookbookLink'\n\n# Reinforcement learning\n\nReinforcement learning (RL) means l"
  },
  {
    "path": "docs/save-load.mdx",
    "chars": 2457,
    "preview": "# Saving and loading weights and optimizer state\n\nDuring training, you'll need to save checkpoints for two main purposes"
  },
  {
    "path": "docs/supervised-learning/prompt-distillation.mdx",
    "chars": 4159,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Prompt Distillation\n\nPrompt distillation is a training t"
  },
  {
    "path": "docs/supervised-learning/sl-basic.mdx",
    "chars": 2933,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Basic Supervised Learning\n\nThis guide walks you through "
  },
  {
    "path": "docs/supervised-learning/sl-hyperparams.mdx",
    "chars": 2825,
    "preview": "# Supervised Learning Hyperparameters\n\nSuccessful LLM fine-tuning requires careful hyperparameter tuning. While the most"
  },
  {
    "path": "docs/supervised-learning/sl-loop.mdx",
    "chars": 698,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Supervised Learning Training Loop\n\nWe've provided a simp"
  },
  {
    "path": "docs/supervised-learning/sweep-case-study.mdx",
    "chars": 4678,
    "preview": "import { CookbookLink } from '../../components/CookbookLink'\n\n# Sweep case study\n\nIn [Supervised Learning Hyperparameter"
  },
  {
    "path": "docs/supervised-learning.mdx",
    "chars": 1616,
    "preview": "import { CookbookLink } from '../components/CookbookLink'\n\n# Cookbook: Supervised learning\n\nThis section takes you throu"
  },
  {
    "path": "docs/support.mdx",
    "chars": 266,
    "preview": "# Support\n\n## Get Tinker Support\n\nOur official support email is [tinker@thinkingmachines.ai](mailto:tinker@thinkingmachi"
  },
  {
    "path": "docs/training-sampling.mdx",
    "chars": 11838,
    "preview": "import { Callout } from 'nextra/components'\n\n# Getting started with training and sampling\n\nIn this guide, we'll step you"
  },
  {
    "path": "docs/under-the-hood.mdx",
    "chars": 4816,
    "preview": "# Under the Hood\n\nThis page explains some implementation details of Tinker, which are important for understanding how to"
  },
  {
    "path": "pyproject.toml",
    "chars": 4685,
    "preview": "[project]\nname = \"tinker_cookbook\"\ndynamic = [\"version\"]\ndescription = \"Implementations of post-training algorithms usin"
  },
  {
    "path": "tests/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/compare_sampling_training_logprobs.py",
    "chars": 5828,
    "preview": "import asyncio\nimport logging\nimport time\nfrom functools import cache\n\nimport chz\nimport httpx\nimport pandas as pd\nimpor"
  },
  {
    "path": "tests/conftest.py",
    "chars": 1350,
    "preview": "\"\"\"Pytest configuration for integration tests.\n\nRecipes NOT yet covered by integration tests:\n  - code_rl: requires exte"
  },
  {
    "path": "tests/downstream_compat/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/downstream_compat/conftest.py",
    "chars": 619,
    "preview": "\"\"\"Auto-apply the downstream_compat marker to every test in this directory.\n\nThese tests verify that tinker-cookbook's p"
  },
  {
    "path": "tests/downstream_compat/sig_helpers.py",
    "chars": 986,
    "preview": "\"\"\"Helpers for checking function/method signatures in downstream compat tests.\"\"\"\n\nimport inspect\n\n\ndef get_param_names("
  },
  {
    "path": "tests/downstream_compat/test_checkpoint_utils.py",
    "chars": 2960,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.checkpoint_utils.\n\nValidates that checkpoint management types and "
  },
  {
    "path": "tests/downstream_compat/test_cli_and_hyperparam.py",
    "chars": 1331,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.cli_utils and hyperparam_utils.\n\nValidates that CLI utilities and "
  },
  {
    "path": "tests/downstream_compat/test_completers.py",
    "chars": 2512,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.completers.\n\nValidates that completer interfaces and types remain "
  },
  {
    "path": "tests/downstream_compat/test_model_info.py",
    "chars": 2559,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.model_info.\n\nValidates that model metadata functions and ModelAttr"
  },
  {
    "path": "tests/downstream_compat/test_recipes.py",
    "chars": 6338,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.recipes.\n\nValidates that recipe modules used by downstream remain "
  },
  {
    "path": "tests/downstream_compat/test_renderers.py",
    "chars": 10615,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.renderers.\n\nValidates that the renderer public API surface — types"
  },
  {
    "path": "tests/downstream_compat/test_rl_train.py",
    "chars": 2873,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.rl.train and rl.data_processing.\n\nValidates that RL training entry"
  },
  {
    "path": "tests/downstream_compat/test_rl_types.py",
    "chars": 6206,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.rl.types.\n\nValidates that the RL type system — Env, StepResult, Tr"
  },
  {
    "path": "tests/downstream_compat/test_supervised.py",
    "chars": 1731,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.supervised.\n\nValidates that supervised training types and data uti"
  },
  {
    "path": "tests/downstream_compat/test_tokenizer_utils.py",
    "chars": 1714,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.tokenizer_utils.\n\nValidates that the tokenizer registry API remain"
  },
  {
    "path": "tests/downstream_compat/test_utils.py",
    "chars": 3220,
    "preview": "\"\"\"Downstream compatibility tests for tinker_cookbook.utils.\n\nValidates that logging, tracing, and misc utilities remain"
  },
  {
    "path": "tests/helpers.py",
    "chars": 3130,
    "preview": "\"\"\"Shared helpers for recipe smoke tests.\"\"\"\n\nimport os\nimport select\nimport subprocess\nimport time\n\nimport pytest\n\n# Ti"
  },
  {
    "path": "tests/recipes/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/recipes/test_recipe_chat_sl.py",
    "chars": 769,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\nMODULE = \"tinker_cookbook.recipes.chat_sl.train\"\nLOG_PATH = \"/tmp/t"
  },
  {
    "path": "tests/recipes/test_recipe_dpo.py",
    "chars": 220,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_dpo():\n    run_recipe(\n        \""
  },
  {
    "path": "tests/recipes/test_recipe_guess_number.py",
    "chars": 272,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_guess_number():\n    run_recipe(\n"
  },
  {
    "path": "tests/recipes/test_recipe_math_rl.py",
    "chars": 1144,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\nMODULE = \"tinker_cookbook.recipes.math_rl.train\"\n\n\n@pytest.mark.int"
  },
  {
    "path": "tests/recipes/test_recipe_off_policy_reasoning.py",
    "chars": 364,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_off_policy_reasoning():\n    run_"
  },
  {
    "path": "tests/recipes/test_recipe_on_policy_distillation.py",
    "chars": 312,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_on_policy_distillation():\n    ru"
  },
  {
    "path": "tests/recipes/test_recipe_on_policy_multi_teacher.py",
    "chars": 364,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_on_policy_multi_teacher():\n    r"
  },
  {
    "path": "tests/recipes/test_recipe_rlhf_pipeline.py",
    "chars": 227,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_rlhf_pipeline():\n    run_recipe("
  },
  {
    "path": "tests/recipes/test_recipe_shorter.py",
    "chars": 228,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_shorter():\n    run_recipe(\n     "
  },
  {
    "path": "tests/recipes/test_recipe_text_arena.py",
    "chars": 281,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_text_arena():\n    run_recipe(\n  "
  },
  {
    "path": "tests/recipes/test_recipe_twenty_questions.py",
    "chars": 308,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_twenty_questions():\n    run_reci"
  },
  {
    "path": "tests/recipes/test_recipe_vlm_classifier.py",
    "chars": 461,
    "preview": "import pytest\n\nfrom tests.helpers import run_recipe\n\n\n@pytest.mark.integration\ndef test_vlm_classifier():\n    run_recipe"
  },
  {
    "path": "tests/test_inspect_eval.py",
    "chars": 5639,
    "preview": "\"\"\"Smoke tests for inspect evaluation integration.\n\nTests the include_reasoning parameter across thinking and non-thinki"
  },
  {
    "path": "tests/test_modal_sandbox.py",
    "chars": 4731,
    "preview": "\"\"\"Smoke tests for ModalSandbox.\n\nRequire Modal authentication and network access; skipped when Modal is not\nconfigured "
  },
  {
    "path": "tests/third_party/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/third_party/test_litellm.py",
    "chars": 5719,
    "preview": "\"\"\"End-to-end smoke test for the LiteLLM Tinker provider.\n\nRequires TINKER_API_KEY to be set (skipped otherwise, see con"
  },
  {
    "path": "tests/validate_temperature_logprobs.py",
    "chars": 11869,
    "preview": "\"\"\"\nValidate temperature scaling in sampling by comparing pairwise logprob differences.\n\nTwo complementary checks ensure"
  },
  {
    "path": "tests/weights/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/weights/test_download.py",
    "chars": 1691,
    "preview": "\"\"\"Integration test for weights.download().\n\nRequires TINKER_API_KEY to be set. Skipped otherwise.\n\"\"\"\n\nimport os\nimport"
  },
  {
    "path": "tests/weights/test_export.py",
    "chars": 27839,
    "preview": "\"\"\"End-to-end tests for build_hf_model across all supported model families.\n\nEach test instantiates a tiny real HuggingF"
  },
  {
    "path": "tests/weights/test_lifecycle.py",
    "chars": 4083,
    "preview": "\"\"\"End-to-end lifecycle test: train → save → download → build.\n\nTrains a tiny SFT model for 1 step, saves the checkpoint"
  },
  {
    "path": "tests/weights/test_publish.py",
    "chars": 2244,
    "preview": "\"\"\"Integration test for weights.publish_to_hf_hub().\n\nRequires HF authentication (HF_TOKEN env var or `hf auth login`).\n"
  },
  {
    "path": "tests/weights/test_quantized.py",
    "chars": 24317,
    "preview": "\"\"\"End-to-end tests for quantized export (DeepSeek FP8).\n\nUses a tiny 1-layer DeepSeek V3 model created from config with"
  },
  {
    "path": "tests/weights/test_quantized_equivalence.py",
    "chars": 18785,
    "preview": "\"\"\"Equivalence tests: verify our quantized export matches PR #470 behavior.\n\nPR #470 (tinker_cookbook/weights/_deepseek."
  },
  {
    "path": "tests/weights/test_strategy_consistency.py",
    "chars": 6057,
    "preview": "\"\"\"Verify that merge_strategy='full' and merge_strategy='shard' produce identical output.\n\nUses a tiny real Qwen3 dense "
  },
  {
    "path": "tinker_cookbook/__init__.py",
    "chars": 996,
    "preview": "\"\"\"Tinker Cookbook: post-training algorithms using the Tinker API.\"\"\"\n\ntry:\n    from tinker_cookbook._version import __v"
  },
  {
    "path": "tinker_cookbook/chat_app/README.md",
    "chars": 1497,
    "preview": "# Tinker Chat CLI\n\nThis README provides instructions for chatting with models trained using **Tinker**.\n\n---\n\n## Getting"
  },
  {
    "path": "tinker_cookbook/chat_app/tinker_chat_cli.py",
    "chars": 6040,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nSimple CLI chat interface using tinker sampling client.\n\"\"\"\n\nimport asyncio\nimport logging\nim"
  },
  {
    "path": "tinker_cookbook/checkpoint_utils.py",
    "chars": 14000,
    "preview": "import asyncio\nimport dataclasses\nimport json\nimport logging\nfrom dataclasses import dataclass, field\nfrom pathlib impor"
  },
  {
    "path": "tinker_cookbook/checkpoint_utils_test.py",
    "chars": 6042,
    "preview": "\"\"\"Tests for checkpoint_utils path handling.\"\"\"\n\nimport json\nimport tempfile\nfrom pathlib import Path\n\nfrom tinker_cookb"
  },
  {
    "path": "tinker_cookbook/cli_utils.py",
    "chars": 2376,
    "preview": "import logging\nimport shutil\nfrom pathlib import Path\nfrom typing import Literal\n\nfrom tinker_cookbook.exceptions import"
  },
  {
    "path": "tinker_cookbook/cli_utils_test.py",
    "chars": 1282,
    "preview": "\"\"\"Tests for cli_utils path handling.\"\"\"\n\nimport tempfile\nfrom pathlib import Path\n\nimport pytest\n\nfrom tinker_cookbook."
  },
  {
    "path": "tinker_cookbook/completers.py",
    "chars": 3983,
    "preview": "\"\"\"\nImplementations that correspond to a model or policy that can be sampled from, but with different amounts of additio"
  },
  {
    "path": "tinker_cookbook/display.py",
    "chars": 1867,
    "preview": "import io\n\nimport tinker\nfrom termcolor import colored\n\nfrom tinker_cookbook.rl.types import Trajectory, Transition\nfrom"
  },
  {
    "path": "tinker_cookbook/distillation/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tinker_cookbook/distillation/datasets.py",
    "chars": 9290,
    "preview": "\"\"\"\nDataset utilities for on-policy distillation.\n\nThis module contains dataset configuration classes and environment de"
  },
  {
    "path": "tinker_cookbook/distillation/train_on_policy.py",
    "chars": 19531,
    "preview": "\"\"\"\nImplements on-policy distillation. For more details, see:\nhttps://thinkingmachines.ai/blog/on-policy-distillation\n\"\""
  },
  {
    "path": "tinker_cookbook/eval/README.md",
    "chars": 261,
    "preview": "Tinker can integrate with oss eval framework like Inspect AI easily (`run_inspect_evals.py`), or you can create a simple"
  },
  {
    "path": "tinker_cookbook/eval/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tinker_cookbook/eval/custom_evaluators.py",
    "chars": 3092,
    "preview": "import asyncio\nfrom collections.abc import Callable\nfrom typing import Any\n\nimport tinker\nfrom tinker import types\n\nfrom"
  },
  {
    "path": "tinker_cookbook/eval/custom_inspect_task.py",
    "chars": 2243,
    "preview": "\"\"\"\nExample of using LLM-as-a-judge with inspect_ai.\n\nTo run this task, use:\npython -m tinker_cookbook.eval.run_inspect_"
  },
  {
    "path": "tinker_cookbook/eval/evaluators.py",
    "chars": 783,
    "preview": "import logging\nfrom collections.abc import Callable\n\nimport tinker\n\n# Set up logger\nlogger = logging.getLogger(__name__)"
  },
  {
    "path": "tinker_cookbook/eval/inspect_evaluators.py",
    "chars": 5583,
    "preview": "import logging\nfrom pathlib import Path\n\nimport chz\nimport tinker\nfrom inspect_ai import Tasks, eval_async\nfrom inspect_"
  },
  {
    "path": "tinker_cookbook/eval/inspect_utils.py",
    "chars": 8918,
    "preview": "\"\"\"\nShared utilities for inspect evaluation.\n\nThis module contains the common classes and functions used by both\nrun_ins"
  },
  {
    "path": "tinker_cookbook/eval/inspect_utils_test.py",
    "chars": 4065,
    "preview": "\"\"\"Tests for inspect_utils conversion functions.\"\"\"\n\nimport pytest\n\npytest.importorskip(\"inspect_ai\")\n\nfrom inspect_ai.m"
  },
  {
    "path": "tinker_cookbook/eval/run_inspect_evals.py",
    "chars": 2597,
    "preview": "import asyncio\nimport logging\n\nimport chz\nimport tinker\n\nfrom tinker_cookbook import checkpoint_utils, model_info\nfrom t"
  },
  {
    "path": "tinker_cookbook/example_data/conversations.jsonl",
    "chars": 23726,
    "preview": "{\"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}, {\"role\": \"assistant\", \"content\": \"The capit"
  },
  {
    "path": "tinker_cookbook/example_data/multilingual.txt",
    "chars": 166591,
    "preview": "وقال، ماما، لقد عدت للمنزل.\nИ той каза: Мамо, у дома съм.\nund er hat gesagt, Mama ich bin daheim.\nΚαι είπε, Μαμά, έφτασα"
  },
  {
    "path": "tinker_cookbook/exceptions.py",
    "chars": 6763,
    "preview": "\"\"\"Centralized exception hierarchy for tinker-cookbook.\n\nAll custom exceptions inherit from :class:`TinkerCookbookError`"
  },
  {
    "path": "tinker_cookbook/exceptions_test.py",
    "chars": 4810,
    "preview": "\"\"\"Tests for the exception hierarchy in tinker_cookbook.exceptions.\n\nVerifies inheritance contracts so that future chang"
  },
  {
    "path": "tinker_cookbook/hyperparam_utils.py",
    "chars": 7405,
    "preview": "\"\"\"\nUtilities for guessing good hyperparameters for fine-tuning.\n\"\"\"\n\nimport json\nimport math\nimport struct\n\nimport hugg"
  },
  {
    "path": "tinker_cookbook/image_processing_utils.py",
    "chars": 2011,
    "preview": "\"\"\"\nUtilities for working with image processors. Create new types to avoid needing to import AutoImageProcessor and Base"
  },
  {
    "path": "tinker_cookbook/image_processing_utils_test.py",
    "chars": 2008,
    "preview": "from unittest.mock import MagicMock, patch\n\nimport pytest\n\nfrom tinker_cookbook.image_processing_utils import get_image_"
  },
  {
    "path": "tinker_cookbook/model_info.py",
    "chars": 7575,
    "preview": "\"\"\"\nThis module associates model names with metadata, which helps  training code choose good defaults.\n\"\"\"\n\nfrom __futur"
  },
  {
    "path": "tinker_cookbook/model_info_test.py",
    "chars": 2002,
    "preview": "import logging\n\nimport pytest\n\nfrom tinker_cookbook.model_info import warn_if_renderer_not_recommended\n\n\nclass TestWarnI"
  },
  {
    "path": "tinker_cookbook/preference/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tinker_cookbook/preference/comparison_policy_evaluator.py",
    "chars": 2727,
    "preview": "import asyncio\nfrom collections.abc import Callable, Sequence\nfrom dataclasses import replace\n\nimport numpy as np\nimport"
  },
  {
    "path": "tinker_cookbook/preference/dpo_datasets.py",
    "chars": 3037,
    "preview": "import chz\nimport tinker\n\nfrom tinker_cookbook.preference.preference_datasets import (\n    ComparisonDatasetBuilder,\n)\nf"
  },
  {
    "path": "tinker_cookbook/preference/preference_datasets.py",
    "chars": 6503,
    "preview": "import logging\nimport random\n\nimport chz\nimport datasets\nimport tinker\n\nfrom tinker_cookbook.preference.types import (\n "
  },
  {
    "path": "tinker_cookbook/preference/train_dpo.py",
    "chars": 18176,
    "preview": "\"\"\"\nDirect Preference Optimization (DPO) training\n\"\"\"\n\nimport asyncio\nimport logging\nfrom pathlib import Path\nfrom typin"
  },
  {
    "path": "tinker_cookbook/preference/types.py",
    "chars": 5564,
    "preview": "\"\"\"\nTypes for preference learning and Direct Preference Optimization (DPO).\n\nThis module defines the core data structure"
  },
  {
    "path": "tinker_cookbook/py.typed",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tinker_cookbook/recipes/README.md",
    "chars": 3509,
    "preview": "# Cookbook Recipes\n\nTinker allows you to flexibly customize your training environment.\nWe will first introduce a few sim"
  },
  {
    "path": "tinker_cookbook/recipes/chat_sl/README.md",
    "chars": 1222,
    "preview": "# Supervised Learning\n\n## SFT on NoRobots\n\n```bash\npython -m tinker_cookbook.recipes.chat_sl.train \\\n    model_name=Qwen"
  },
  {
    "path": "tinker_cookbook/recipes/chat_sl/chat_datasets.py",
    "chars": 2813,
    "preview": "\"\"\"\nDatasets for supervised learning (SFT) that use chat-formatted data, which we\nconvert to tokens using a Renderer.\n\"\""
  },
  {
    "path": "tinker_cookbook/recipes/chat_sl/train.py",
    "chars": 5762,
    "preview": "\"\"\"\nBasic CLI for training with supervised learning. Currently only used for integration tests.\n\n\"\"\"\n\nimport asyncio\nfro"
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/README.md",
    "chars": 2973,
    "preview": "# Replicating DeepCoder with Tinker\n\nCompetitive programming problems are a common testbed for RL with LLMs. The recent "
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/code_env.py",
    "chars": 10027,
    "preview": "from __future__ import annotations\n\nimport json\nimport logging\nfrom collections.abc import Sequence\nfrom typing import A"
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/code_grading.py",
    "chars": 5798,
    "preview": "\"\"\"\nCode grading utilities for RL training.\n\nSupports two execution backends:\n- sandboxfusion: Local Docker-based sandbo"
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/deepcoder_tool.py",
    "chars": 4369,
    "preview": "from __future__ import annotations\n\nimport json\nfrom dataclasses import dataclass\nfrom typing import Annotated, Any\n\nfro"
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/lcb_utils.py",
    "chars": 31027,
    "preview": "\"\"\"\nLiveCodeBench testing utilities for sandbox execution.\n\nAdapted from https://github.com/LiveCodeBench/LiveCodeBench\n"
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/sandbox_config/local.yaml",
    "chars": 4641,
    "preview": "dataset:\n  database:\n    backend:\n      type: none\n    cache:\n      path: memory\n      sources:\n        - type: local\n  "
  },
  {
    "path": "tinker_cookbook/recipes/code_rl/train.py",
    "chars": 4411,
    "preview": "import asyncio\nimport logging\nfrom datetime import datetime\n\nimport chz\n\nfrom tinker_cookbook import checkpoint_utils, c"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/README.md",
    "chars": 7791,
    "preview": "# Distillation\n\nDistillation refers to a class of methods where a teacher model is supervising the training of a student"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/harbor_multiturn.py",
    "chars": 1163,
    "preview": "\"\"\"Harbor environment for multi-turn on-policy distillation.\n\nProvides a DatasetBuilder that creates harbor sandbox envi"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/harbor_multiturn_test.py",
    "chars": 1726,
    "preview": "\"\"\"Tests for harbor_multiturn zero-reward distillation.\"\"\"\n\nimport asyncio\nfrom unittest.mock import MagicMock\n\nimport p"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/off_policy_reasoning.py",
    "chars": 6551,
    "preview": "\"\"\"\nSupervised fine-tuning for reasoning tasks using OpenThoughts3.\n\nThis script implements standard supervised learning"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/on_policy_distillation.py",
    "chars": 5842,
    "preview": "\"\"\"\nOn-policy distillation for reasoning and chat tasks.\n\nThis script implements on-policy distillation where a student "
  },
  {
    "path": "tinker_cookbook/recipes/distillation/on_policy_distillation_harbor_multi_turn.py",
    "chars": 6238,
    "preview": "\"\"\"\nMulti-turn on-policy distillation with Harbor sandbox environments.\nYou need to download the tasks from the harbor c"
  },
  {
    "path": "tinker_cookbook/recipes/distillation/on_policy_multi_teacher.py",
    "chars": 6514,
    "preview": "\"\"\"\nMulti-teacher on-policy distillation example.\n\nThis script demonstrates on-policy distillation with multiple dataset"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/README.md",
    "chars": 4974,
    "preview": "# Harbor RL\n\n## Installation\n\n```bash\nuv pip install 'tinker-cookbook[modal] @ git+https://github.com/thinking-machines-"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/eval.py",
    "chars": 7539,
    "preview": "\"\"\"\nStandalone evaluation for Harbor tasks.\n\nDownload harbor datasets:\n  uvx harbor datasets download swebench-verified@"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/harbor_env.py",
    "chars": 8122,
    "preview": "\"\"\"Harbor environment, dataset, and dataset builder for RL training.\"\"\"\n\nfrom __future__ import annotations\n\nimport logg"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/harbor_tools.py",
    "chars": 4504,
    "preview": "\"\"\"Harbor bash tool and reward function for RL training.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logg"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/harbor_tools_test.py",
    "chars": 9362,
    "preview": "\"\"\"Unit tests for HarborReward, HarborBashTool, and HarborEnvGroupBuilder.\"\"\"\n\nimport asyncio\nimport json\nimport pickle\n"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/scripts/.gitignore",
    "chars": 9,
    "preview": "results/\n"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/scripts/eval_terminal_bench.py",
    "chars": 612,
    "preview": "\"\"\"\nLoad Terminal-Bench tasks from the Harbor cache and run evaluation.\n\nuv run python tinker_cookbook/recipes/harbor_rl"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/scripts/train_terminal_bench.py",
    "chars": 546,
    "preview": "\"\"\"\nLoad Terminal-Bench tasks from the Harbor cache and launch RL training.\n\nuv run python tinker_cookbook/recipes/harbo"
  },
  {
    "path": "tinker_cookbook/recipes/harbor_rl/train.py",
    "chars": 3723,
    "preview": "\"\"\"CLI entry point for Harbor RL training.\"\"\"\n\nimport logging\nfrom datetime import datetime\n\nimport chz\n\nfrom tinker_coo"
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/README.md",
    "chars": 4282,
    "preview": "# Using Reinforcement Learning to Solve Math Problems\n\nMath problems have been the most active testbed for RL with LLMs."
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/arithmetic_env.py",
    "chars": 3245,
    "preview": "from collections.abc import Sequence\nfrom functools import partial\n\nimport chz\nimport numpy as np\n\nfrom tinker_cookbook "
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/math_env.py",
    "chars": 15468,
    "preview": "import math\nimport re\nfrom collections.abc import Sequence\nfrom functools import partial\nfrom typing import Literal, cas"
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/math_env_test.py",
    "chars": 1064,
    "preview": "import asyncio\n\nfrom tinker_cookbook.recipes.math_rl.math_env import MathDatasetBuilder\n\n\ndef test_math_dataset_builder("
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/math_grading.py",
    "chars": 16751,
    "preview": "\"\"\"\nMath grading utilities for RL training.\n\nIncludes math_normalize functionality that was dependency of grader.\n\"\"\"\n\ni"
  },
  {
    "path": "tinker_cookbook/recipes/math_rl/train.py",
    "chars": 5970,
    "preview": "import asyncio\nimport logging\nfrom datetime import datetime\nfrom typing import Any\n\nimport chz\nfrom tinker.types import "
  },
  {
    "path": "tinker_cookbook/recipes/multiplayer_rl/README.md",
    "chars": 1126,
    "preview": "# Multiturn Training\n\nOften we not only want large language models (LLMs) to generate a single response, but also to per"
  },
  {
    "path": "tinker_cookbook/recipes/multiplayer_rl/guess_number/README.md",
    "chars": 4683,
    "preview": "# A Simple `Environment` for Guessing the Number\n\n```bash\npython -m tinker_cookbook.recipes.multiplayer_rl.guess_number."
  }
]

// ... and 166 more files (download for full content)

About this extraction

This page contains the full source code of the thinking-machines-lab/tinker-cookbook GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 366 files (2.5 MB), approximately 681.3k tokens, and a symbol index with 2672 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!