gitextract_vc456gw1/

├── .claude/
│   └── skills/
│       ├── checkpoints/
│       │   └── SKILL.md
│       ├── ci/
│       │   └── SKILL.md
│       ├── completers/
│       │   └── SKILL.md
│       ├── contributing/
│       │   └── SKILL.md
│       ├── datasets/
│       │   └── SKILL.md
│       ├── distillation/
│       │   └── SKILL.md
│       ├── dpo/
│       │   └── SKILL.md
│       ├── environments/
│       │   └── SKILL.md
│       ├── evals/
│       │   └── SKILL.md
│       ├── grpo/
│       │   └── SKILL.md
│       ├── hyperparams/
│       │   └── SKILL.md
│       ├── logging/
│       │   └── SKILL.md
│       ├── manage-skills/
│       │   └── SKILL.md
│       ├── models/
│       │   └── SKILL.md
│       ├── multiturn-rl/
│       │   └── SKILL.md
│       ├── new-recipe/
│       │   └── SKILL.md
│       ├── renderers/
│       │   └── SKILL.md
│       ├── rlhf/
│       │   └── SKILL.md
│       ├── setup/
│       │   └── SKILL.md
│       ├── sft/
│       │   └── SKILL.md
│       ├── tinker-cli/
│       │   └── SKILL.md
│       ├── tinker-sdk/
│       │   └── SKILL.md
│       ├── tinker-types/
│       │   └── SKILL.md
│       └── weights/
│           └── SKILL.md
├── .github/
│   └── workflows/
│       ├── claude-review.yml
│       ├── downstream-compat.yaml
│       ├── nightly.yaml
│       ├── pre-commit.yaml
│       ├── publish-pypi.yaml
│       ├── pyright.yaml
│       ├── pytest.yaml
│       ├── smoke-test-evals.yaml
│       └── smoke-test-recipes.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .sync_state
├── AGENTS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs/
│   ├── api-reference/
│   │   ├── apifuture.md
│   │   ├── exceptions.md
│   │   ├── restclient.md
│   │   ├── samplingclient.md
│   │   ├── serviceclient.md
│   │   ├── trainingclient.md
│   │   └── types.md
│   ├── async.mdx
│   ├── compatible-apis/
│   │   └── openai.mdx
│   ├── completers.mdx
│   ├── dev-tips.mdx
│   ├── docs-outline.mdx
│   ├── download-weights.mdx
│   ├── evals.mdx
│   ├── index.mdx
│   ├── install.mdx
│   ├── lora-primer.mdx
│   ├── losses.mdx
│   ├── model-lineup.mdx
│   ├── overview-building.mdx
│   ├── preferences/
│   │   ├── dpo-guide.mdx
│   │   └── rlhf-example.mdx
│   ├── preferences.mdx
│   ├── publish-weights.mdx
│   ├── rendering.mdx
│   ├── rl/
│   │   ├── rl-basic.mdx
│   │   ├── rl-envs.mdx
│   │   ├── rl-hyperparams.mdx
│   │   ├── rl-logging.mdx
│   │   ├── rl-loops.mdx
│   │   └── sequence-extension.mdx
│   ├── rl.mdx
│   ├── save-load.mdx
│   ├── supervised-learning/
│   │   ├── prompt-distillation.mdx
│   │   ├── sl-basic.mdx
│   │   ├── sl-hyperparams.mdx
│   │   ├── sl-loop.mdx
│   │   └── sweep-case-study.mdx
│   ├── supervised-learning.mdx
│   ├── support.mdx
│   ├── training-sampling.mdx
│   └── under-the-hood.mdx
├── pyproject.toml
├── tests/
│   ├── __init__.py
│   ├── compare_sampling_training_logprobs.py
│   ├── conftest.py
│   ├── downstream_compat/
│   │   ├── __init__.py
│   │   ├── conftest.py
│   │   ├── sig_helpers.py
│   │   ├── test_checkpoint_utils.py
│   │   ├── test_cli_and_hyperparam.py
│   │   ├── test_completers.py
│   │   ├── test_model_info.py
│   │   ├── test_recipes.py
│   │   ├── test_renderers.py
│   │   ├── test_rl_train.py
│   │   ├── test_rl_types.py
│   │   ├── test_supervised.py
│   │   ├── test_tokenizer_utils.py
│   │   └── test_utils.py
│   ├── helpers.py
│   ├── recipes/
│   │   ├── __init__.py
│   │   ├── test_recipe_chat_sl.py
│   │   ├── test_recipe_dpo.py
│   │   ├── test_recipe_guess_number.py
│   │   ├── test_recipe_math_rl.py
│   │   ├── test_recipe_off_policy_reasoning.py
│   │   ├── test_recipe_on_policy_distillation.py
│   │   ├── test_recipe_on_policy_multi_teacher.py
│   │   ├── test_recipe_rlhf_pipeline.py
│   │   ├── test_recipe_shorter.py
│   │   ├── test_recipe_text_arena.py
│   │   ├── test_recipe_twenty_questions.py
│   │   └── test_recipe_vlm_classifier.py
│   ├── test_inspect_eval.py
│   ├── test_modal_sandbox.py
│   ├── third_party/
│   │   ├── __init__.py
│   │   └── test_litellm.py
│   ├── validate_temperature_logprobs.py
│   └── weights/
│       ├── __init__.py
│       ├── test_download.py
│       ├── test_export.py
│       ├── test_lifecycle.py
│       ├── test_publish.py
│       ├── test_quantized.py
│       ├── test_quantized_equivalence.py
│       └── test_strategy_consistency.py
└── tinker_cookbook/
    ├── __init__.py
    ├── chat_app/
    │   ├── README.md
    │   └── tinker_chat_cli.py
    ├── checkpoint_utils.py
    ├── checkpoint_utils_test.py
    ├── cli_utils.py
    ├── cli_utils_test.py
    ├── completers.py
    ├── display.py
    ├── distillation/
    │   ├── __init__.py
    │   ├── datasets.py
    │   └── train_on_policy.py
    ├── eval/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── custom_evaluators.py
    │   ├── custom_inspect_task.py
    │   ├── evaluators.py
    │   ├── inspect_evaluators.py
    │   ├── inspect_utils.py
    │   ├── inspect_utils_test.py
    │   └── run_inspect_evals.py
    ├── example_data/
    │   ├── conversations.jsonl
    │   └── multilingual.txt
    ├── exceptions.py
    ├── exceptions_test.py
    ├── hyperparam_utils.py
    ├── image_processing_utils.py
    ├── image_processing_utils_test.py
    ├── model_info.py
    ├── model_info_test.py
    ├── preference/
    │   ├── __init__.py
    │   ├── comparison_policy_evaluator.py
    │   ├── dpo_datasets.py
    │   ├── preference_datasets.py
    │   ├── train_dpo.py
    │   └── types.py
    ├── py.typed
    ├── recipes/
    │   ├── README.md
    │   ├── chat_sl/
    │   │   ├── README.md
    │   │   ├── chat_datasets.py
    │   │   └── train.py
    │   ├── code_rl/
    │   │   ├── README.md
    │   │   ├── code_env.py
    │   │   ├── code_grading.py
    │   │   ├── deepcoder_tool.py
    │   │   ├── lcb_utils.py
    │   │   ├── sandbox_config/
    │   │   │   └── local.yaml
    │   │   └── train.py
    │   ├── distillation/
    │   │   ├── README.md
    │   │   ├── harbor_multiturn.py
    │   │   ├── harbor_multiturn_test.py
    │   │   ├── off_policy_reasoning.py
    │   │   ├── on_policy_distillation.py
    │   │   ├── on_policy_distillation_harbor_multi_turn.py
    │   │   └── on_policy_multi_teacher.py
    │   ├── harbor_rl/
    │   │   ├── README.md
    │   │   ├── eval.py
    │   │   ├── harbor_env.py
    │   │   ├── harbor_tools.py
    │   │   ├── harbor_tools_test.py
    │   │   ├── scripts/
    │   │   │   ├── .gitignore
    │   │   │   ├── eval_terminal_bench.py
    │   │   │   └── train_terminal_bench.py
    │   │   └── train.py
    │   ├── math_rl/
    │   │   ├── README.md
    │   │   ├── arithmetic_env.py
    │   │   ├── math_env.py
    │   │   ├── math_env_test.py
    │   │   ├── math_grading.py
    │   │   └── train.py
    │   ├── multiplayer_rl/
    │   │   ├── README.md
    │   │   ├── guess_number/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   ├── text_arena/
    │   │   │   ├── README.md
    │   │   │   ├── env.py
    │   │   │   └── train.py
    │   │   └── twenty_questions/
    │   │       ├── README.md
    │   │       ├── common_english_nouns.txt
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── preference/
    │   │   ├── README.md
    │   │   ├── datasets.py
    │   │   ├── dpo/
    │   │   │   ├── README.md
    │   │   │   └── train.py
    │   │   ├── rlhf/
    │   │   │   ├── README.md
    │   │   │   └── rlhf_pipeline.py
    │   │   └── shorter/
    │   │       ├── README.md
    │   │       ├── env.py
    │   │       └── train.py
    │   ├── prompt_distillation/
    │   │   ├── README.md
    │   │   ├── create_data.py
    │   │   └── train.py
    │   ├── rl_basic.py
    │   ├── rl_loop.py
    │   ├── rubric/
    │   │   ├── README.md
    │   │   ├── data.py
    │   │   ├── debug_env.py
    │   │   ├── env.py
    │   │   ├── generate_data.py
    │   │   ├── prometheus_experimental.py
    │   │   └── train.py
    │   ├── search_tool/
    │   │   ├── README.md
    │   │   ├── chroma_pickle_test.py
    │   │   ├── embedding.py
    │   │   ├── offline_eval.py
    │   │   ├── search_env.py
    │   │   ├── tools.py
    │   │   └── train.py
    │   ├── sl_basic.py
    │   ├── sl_loop.py
    │   ├── verifiers_rl/
    │   │   ├── README.md
    │   │   ├── evaluate.py
    │   │   ├── tinker_openai.py
    │   │   ├── train.py
    │   │   ├── verifiers_env.py
    │   │   └── verifiers_pickle_test.py
    │   └── vlm_classifier/
    │       ├── README.md
    │       ├── data.py
    │       ├── eval.py
    │       ├── eval_sweep.py
    │       ├── sweep.py
    │       └── train.py
    ├── renderers/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── base.py
    │   ├── deepseek_v3.py
    │   ├── deepseek_v3_test.py
    │   ├── gpt_oss.py
    │   ├── gpt_oss_test.py
    │   ├── kimi_k2.py
    │   ├── kimi_k25.py
    │   ├── kimi_k25_test.py
    │   ├── kimi_k2_5_tool_declaration_ts.py
    │   ├── kimi_k2_test.py
    │   ├── kimi_k2_tool_declaration_test.py
    │   ├── llama3.py
    │   ├── nemotron3.py
    │   ├── nemotron3_test.py
    │   ├── parsing_test.py
    │   ├── qwen3.py
    │   ├── qwen3_5.py
    │   ├── qwen3_test.py
    │   ├── qwen3_tool_declaration_test.py
    │   ├── renderer_pickle_test.py
    │   ├── renderers_test.py
    │   ├── role_colon.py
    │   ├── testing_utils.py
    │   └── tool_calling_test.py
    ├── rl/
    │   ├── __init__.py
    │   ├── builder_pickle_test.py
    │   ├── data_processing.py
    │   ├── message_env.py
    │   ├── message_env_test.py
    │   ├── metric_util.py
    │   ├── metrics.py
    │   ├── multiturn_weight_assignment_test.py
    │   ├── play_w_env.py
    │   ├── preference_envs.py
    │   ├── problem_env.py
    │   ├── rollout_error_resilience_test.py
    │   ├── rollout_logging.py
    │   ├── rollout_logging_test.py
    │   ├── rollout_strategy.py
    │   ├── rollouts.py
    │   ├── shutdown_test.py
    │   ├── train.py
    │   └── types.py
    ├── sandbox/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── modal_sandbox.py
    │   ├── sandbox_interface.py
    │   └── sandboxfusion.py
    ├── scripts/
    │   ├── merge_tinker_adapter_to_hf_model.py
    │   └── test_tool_calling_e2e.py
    ├── supervised/
    │   ├── __init__.py
    │   ├── common.py
    │   ├── data.py
    │   ├── nll_evaluator.py
    │   ├── resume_test.py
    │   ├── train.py
    │   ├── types.py
    │   └── viz_sft_dataset.py
    ├── third_party/
    │   ├── __init__.py
    │   ├── litellm/
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── provider.py
    │   │   └── provider_test.py
    │   ├── openai_compat.py
    │   └── openai_compat_test.py
    ├── tokenizer_utils.py
    ├── tokenizer_utils_test.py
    ├── tool_use/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── agent_tool_message_env.py
    │   ├── agent_tool_message_env_test.py
    │   ├── tools.py
    │   └── types.py
    ├── utils/
    │   ├── __init__.py
    │   ├── code_state.py
    │   ├── deprecation.py
    │   ├── deprecation_test.py
    │   ├── file_utils.py
    │   ├── format_colorized.py
    │   ├── logtree.py
    │   ├── logtree_formatters.py
    │   ├── logtree_test.py
    │   ├── lr_scheduling.py
    │   ├── misc_utils.py
    │   ├── ml_log.py
    │   ├── ml_log_test.py
    │   ├── trace.py
    │   └── trace_test.py
    ├── weights/
    │   ├── __init__.py
    │   ├── _artifacts.py
    │   ├── _download.py
    │   ├── _export/
    │   │   ├── __init__.py
    │   │   ├── _full.py
    │   │   ├── _quantized.py
    │   │   └── _shard.py
    │   ├── _merge.py
    │   ├── _publish.py
    │   ├── artifacts_test.py
    │   ├── download_test.py
    │   ├── export_test.py
    │   ├── merge_test.py
    │   ├── publish_test.py
    │   ├── quantized_test.py
    │   └── stress_test.py
    └── xmux/
        ├── README.md
        ├── __init__.py
        ├── control.py
        ├── core.py
        ├── examples/
        │   ├── async_rl_sweep.py
        │   ├── fake_train.py
        │   └── ml_sweep.py
        ├── run_job.py
        └── utils.py