gitextract_7jngjjdm/

├── .agents/
│   └── skills/
│       ├── fix-art-issues/
│       │   └── SKILL.md
│       ├── train-rl/
│       │   └── SKILL.md
│       └── train-sft/
│           └── SKILL.md
├── .dockerignore
├── .github/
│   └── workflows/
│       ├── create-draft-release.yml
│       ├── package-install.yml
│       ├── prek.yml
│       └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── .skyignore
├── AGENT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── THIRD-PARTY-NOTICES
├── dev/
│   ├── demo_logging.py
│   ├── math-vista/
│   │   ├── math-vista.ipynb
│   │   └── math-vista.py
│   ├── new_models/
│   │   ├── benchmark_inference.py
│   │   ├── gemma3.py
│   │   ├── prompts.json
│   │   ├── qwen3_try.ipynb
│   │   └── qwen3_try.py
│   ├── profile.ipynb
│   ├── run_yes_no_maybe_kl_advantage.py
│   ├── sft/
│   │   ├── dataset.jsonl
│   │   ├── distillation.py
│   │   ├── sft-from-file.py
│   │   └── sft-warmup.py
│   ├── yes-no-maybe-kl-advantage.py
│   ├── yes-no-maybe-megatron.py
│   ├── yes-no-maybe-metrics.py
│   ├── yes-no-maybe-vision/
│   │   ├── generate_images.py
│   │   └── train.ipynb
│   ├── yes-no-maybe.ipynb
│   └── yes-no-maybe.py
├── docs/
│   ├── .gitignore
│   ├── README.md
│   ├── analytics.js
│   ├── docs.json
│   ├── experimental/
│   │   └── gspo.mdx
│   ├── features/
│   │   ├── additional-histories.mdx
│   │   ├── checkpoint-deletion.mdx
│   │   ├── checkpoint-forking.mdx
│   │   ├── mcp-rl.mdx
│   │   └── tracking-metrics.mdx
│   ├── fundamentals/
│   │   ├── art-backend.mdx
│   │   ├── art-client.mdx
│   │   ├── ruler.mdx
│   │   ├── sft-training.mdx
│   │   └── training-loop.mdx
│   ├── getting-started/
│   │   ├── about.mdx
│   │   ├── faq.mdx
│   │   ├── installation-setup.mdx
│   │   ├── notebooks.mdx
│   │   └── quick-start.mdx
│   ├── integrations/
│   │   ├── langgraph-integration.mdx
│   │   └── openenv-integration.mdx
│   ├── package.json
│   ├── proposals/
│   │   └── backend-first-training-api.md
│   ├── resources/
│   │   ├── glossary.mdx
│   │   └── models.mdx
│   ├── style.css
│   └── tutorials/
│       ├── open-deep-research.mdx
│       └── summarizer.mdx
├── examples/
│   ├── 2048/
│   │   ├── display_benchmarks.ipynb
│   │   ├── generate_benchmarks.py
│   │   ├── rollout.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── benchmarking_comparison_models.py
│   ├── codenames/
│   │   ├── codenames_words.json
│   │   └── dictionary.json
│   ├── data/
│   │   ├── greentext/
│   │   │   └── prompts.txt
│   │   └── temporal-clue/
│   │       └── puzzles.json
│   ├── hn_title_generator/
│   │   ├── reference_grpo_trainer.py
│   │   ├── train.py
│   │   └── utils.py
│   ├── just-the-facts/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── just_the_facts/
│   │   │   ├── __init__.py
│   │   │   ├── checks.py
│   │   │   ├── display_benchmarks.ipynb
│   │   │   ├── experiments.py
│   │   │   ├── find_articles.py
│   │   │   ├── generate_benchmarks.py
│   │   │   ├── rollout.py
│   │   │   ├── scenarios.py
│   │   │   ├── train.py
│   │   │   └── utils.py
│   │   ├── main.py
│   │   ├── pyproject.toml
│   │   └── test_scraper.py
│   ├── mcp-rl/
│   │   ├── README.md
│   │   ├── all_experiments.py
│   │   ├── mcp_rl/
│   │   │   ├── __init__.py
│   │   │   ├── benchmarks/
│   │   │   │   ├── display_benchmarks/
│   │   │   │   │   ├── mcp_alphavantage.ipynb
│   │   │   │   │   └── mcp_balldontlie.ipynb
│   │   │   │   └── generate_benchmarks.py
│   │   │   ├── checks.py
│   │   │   ├── mcp_server.py
│   │   │   ├── rollout.py
│   │   │   ├── scenario_generator.py
│   │   │   ├── train.py
│   │   │   └── utils.py
│   │   ├── pyproject.toml
│   │   ├── servers/
│   │   │   └── python/
│   │   │       ├── mcp_alphavantage/
│   │   │       │   ├── README.md
│   │   │       │   ├── __init__.py
│   │   │       │   ├── scenarios/
│   │   │       │   │   ├── train.jsonl
│   │   │       │   │   └── val.jsonl
│   │   │       │   ├── scenarios.jsonl
│   │   │       │   ├── server.py
│   │   │       │   └── server_params.py
│   │   │       ├── mcp_balldontlie/
│   │   │       │   ├── README.md
│   │   │       │   ├── __init__.py
│   │   │       │   ├── scenarios/
│   │   │       │   │   ├── train.jsonl
│   │   │       │   │   └── val.jsonl
│   │   │       │   ├── server.py
│   │   │       │   └── server_params.py
│   │   │       └── mcp_googlemaps/
│   │   │           ├── README.md
│   │   │           ├── __init__.py
│   │   │           ├── pyproject.toml
│   │   │           ├── server.py
│   │   │           └── server_params.py
│   │   └── test_scenario_generation.py
│   ├── openenv_echo.py
│   ├── prisoners-dilemma.ipynb
│   ├── rock-paper-tool-use.ipynb
│   ├── roflbot/
│   │   └── .gitignore
│   ├── temporal_clue/
│   │   ├── temporal-clue-7b-async.ipynb
│   │   ├── temporal-clue-7b.ipynb
│   │   └── temporal-clue.py
│   ├── tic_tac_toe/
│   │   ├── display-benchmarks.ipynb
│   │   ├── game_utils.py
│   │   ├── rollout.py
│   │   └── tic-tac-toe.py
│   └── tic_tac_toe_self_play/
│       ├── deploy_step.py
│       ├── game_utils.py
│       ├── gather_trajectory_groups_by_index.py
│       ├── rollout.py
│       ├── train.py
│       └── train_o4_mini.py
├── licenses/
│   ├── GPL-3.0.txt
│   └── LGPL-3.0.txt
├── pyproject.toml
├── requirements/
│   └── backend.vcs.txt
├── scripts/
│   ├── bump_version.py
│   ├── ci/
│   │   ├── build_and_push_uv_cache.sh
│   │   └── compute_uv_fingerprint.py
│   ├── deploy-model.py
│   ├── kill-gpu-processes.sh
│   ├── migrate-s3-checkpoints.py
│   ├── publish.sh
│   └── setup.sh
├── skypilot-config.yaml
├── src/
│   ├── art/
│   │   ├── __init__.py
│   │   ├── api_costs.py
│   │   ├── auto_trajectory.py
│   │   ├── backend.py
│   │   ├── batches.py
│   │   ├── cli.py
│   │   ├── costs.py
│   │   ├── dev/
│   │   │   ├── __init__.py
│   │   │   ├── engine.py
│   │   │   ├── get_model_config.py
│   │   │   ├── model.py
│   │   │   ├── openai_server.py
│   │   │   ├── train.py
│   │   │   └── validate.py
│   │   ├── errors.py
│   │   ├── gather.py
│   │   ├── guided_completion.py
│   │   ├── langgraph/
│   │   │   ├── __init__.py
│   │   │   ├── llm_wrapper.py
│   │   │   ├── logging.py
│   │   │   └── message_utils.py
│   │   ├── local/
│   │   │   ├── __init__.py
│   │   │   ├── backend.py
│   │   │   ├── checkpoints.py
│   │   │   └── service.py
│   │   ├── loss.py
│   │   ├── mcp/
│   │   │   ├── __init__.py
│   │   │   ├── default_tools.py
│   │   │   ├── generate_scenarios.py
│   │   │   └── types.py
│   │   ├── megatron/
│   │   │   ├── __init__.py
│   │   │   ├── backend.py
│   │   │   ├── flex_attention.py
│   │   │   ├── lora.py
│   │   │   ├── offload.py
│   │   │   ├── provider.py
│   │   │   ├── service.py
│   │   │   ├── setup.sh
│   │   │   └── train.py
│   │   ├── metrics.py
│   │   ├── metrics_taxonomy.py
│   │   ├── model.py
│   │   ├── openai.py
│   │   ├── pipeline_trainer/
│   │   │   ├── __init__.py
│   │   │   ├── binary_prefix_tool_pipeline.py
│   │   │   ├── state.py
│   │   │   ├── status.py
│   │   │   ├── trainer.py
│   │   │   ├── types.py
│   │   │   └── yes_no_maybe_pipeline.py
│   │   ├── preprocessing/
│   │   │   ├── __init__.py
│   │   │   ├── inputs.py
│   │   │   ├── pack.py
│   │   │   └── tokenize.py
│   │   ├── py.typed
│   │   ├── rewards/
│   │   │   ├── __init__.py
│   │   │   └── ruler.py
│   │   ├── serverless/
│   │   │   ├── __init__.py
│   │   │   ├── backend.py
│   │   │   └── client.py
│   │   ├── test/
│   │   │   ├── test_kl_advantage.py
│   │   │   └── test_step_skipping.py
│   │   ├── tinker/
│   │   │   ├── __init__.py
│   │   │   ├── backend.py
│   │   │   ├── cookbook_v/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hyperparam_utils.py
│   │   │   │   ├── image_processing_utils.py
│   │   │   │   ├── renderers/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── deepseek_v3.py
│   │   │   │   │   ├── gpt_oss.py
│   │   │   │   │   ├── kimi_k2.py
│   │   │   │   │   ├── kimi_k25.py
│   │   │   │   │   ├── kimi_k2_5_tool_declaration_ts.py
│   │   │   │   │   ├── llama3.py
│   │   │   │   │   ├── qwen3.py
│   │   │   │   │   ├── qwen3_5.py
│   │   │   │   │   └── role_colon.py
│   │   │   │   ├── tokenizer_utils.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       └── misc_utils.py
│   │   │   ├── prefix_cache.py
│   │   │   ├── renderers.py
│   │   │   ├── server.py
│   │   │   └── service.py
│   │   ├── tinker_native/
│   │   │   ├── __init__.py
│   │   │   ├── backend.py
│   │   │   └── data.py
│   │   ├── trajectories.py
│   │   ├── transformers/
│   │   │   ├── __init__.py
│   │   │   └── patches.py
│   │   ├── types.py
│   │   ├── unsloth/
│   │   │   ├── __init__.py
│   │   │   ├── service.py
│   │   │   └── train.py
│   │   ├── utils/
│   │   │   ├── __init__.py
│   │   │   ├── benchmark_rollout.py
│   │   │   ├── benchmarking/
│   │   │   │   ├── aggregate_trajectories.py
│   │   │   │   ├── charts/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── percentage_comparison_bar_chart.py
│   │   │   │   │   └── training_progress_chart.py
│   │   │   │   ├── filter_model_split.py
│   │   │   │   ├── load_trajectories.py
│   │   │   │   ├── log_constant_metrics_wandb.py
│   │   │   │   ├── pull_model_trajectories.py
│   │   │   │   └── types.py
│   │   │   ├── convert_moe_lora.py
│   │   │   ├── deploy_model.py
│   │   │   ├── deployment/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── common.py
│   │   │   │   ├── legacy.py
│   │   │   │   ├── together.py
│   │   │   │   └── wandb.py
│   │   │   ├── format_message.py
│   │   │   ├── get_model_step.py
│   │   │   ├── get_repo_root_path.py
│   │   │   ├── group_aggregate.py
│   │   │   ├── iterate_dataset.py
│   │   │   ├── limit_concurrency.py
│   │   │   ├── litellm.py
│   │   │   ├── log_http_errors.py
│   │   │   ├── logging.py
│   │   │   ├── model_config.py
│   │   │   ├── old_benchmarking/
│   │   │   │   ├── calculate_step_metrics.py
│   │   │   │   ├── display_image_grid.py
│   │   │   │   ├── generate_comparison_table.py
│   │   │   │   ├── generate_line_graphs.py
│   │   │   │   ├── load_benchmarked_models.py
│   │   │   │   └── types.py
│   │   │   ├── output_dirs.py
│   │   │   ├── record_provenance.py
│   │   │   ├── retry.py
│   │   │   ├── s3.py
│   │   │   ├── s3_checkpoint_utils.py
│   │   │   ├── sft.py
│   │   │   ├── strip_logprobs.py
│   │   │   ├── suppress_litellm_serialization_warnings.py
│   │   │   ├── trajectory_logging.py
│   │   │   └── trajectory_migration.py
│   │   ├── vllm/
│   │   │   ├── __init__.py
│   │   │   ├── dedicated_server.py
│   │   │   ├── engine.py
│   │   │   ├── patches.py
│   │   │   └── server.py
│   │   └── yield_trajectory.py
│   └── mp_actors/
│       ├── __init__.py
│       ├── move.py
│       └── traceback.py
└── tests/
    ├── integration/
    │   ├── __init__.py
    │   ├── test_live_api_cost.py
    │   ├── test_multi_checkpoint_training.py
    │   ├── test_pipeline_localbackend_dedicated.py
    │   ├── test_provenance.py
    │   ├── test_push_and_fork.py
    │   ├── test_tinker_native_backend.py
    │   └── test_vllm_contract.py
    ├── integration.py
    ├── test_backend_train_api.py
    └── unit/
        ├── test_auto_trajectory.py
        ├── test_benchmarking_loader.py
        ├── test_dedicated_config.py
        ├── test_dedicated_server.py
        ├── test_frontend_logging.py
        ├── test_metric_routing.py
        ├── test_metrics_builder.py
        ├── test_metrics_taxonomy.py
        ├── test_model_openai_client_costs.py
        ├── test_mp_actors.py
        ├── test_multi_checkpoint_inference.py
        ├── test_pipeline_trainer_batching.py
        ├── test_pipeline_trainer_local_backend.py
        ├── test_pipeline_trainer_metrics.py
        ├── test_prefix_cache.py
        ├── test_ruler_metrics.py
        ├── test_sft.py
        ├── test_strip_logprobs.py
        ├── test_tinker_renderers.py
        ├── test_tokenize_trajectory_groups.ipynb
        ├── test_track_api_cost.py
        ├── test_trajectory_copy.py
        ├── test_trajectory_parquet.py
        ├── test_unsloth_metrics.py
        ├── test_vllm_patches_contract.py
        └── test_yield_trajectory.py