[
  {
    "path": ".gitattributes",
    "content": "*.png filter=lfs diff=lfs merge=lfs -text\n"
  },
  {
    "path": ".github/workflows/claude-review.yml",
    "content": "name: Claude PR Review\n\non:\n  pull_request:\n    types: [opened, synchronize, ready_for_review]\n\npermissions:\n  contents: read\n  pull-requests: write\n  issues: read\n  id-token: write\n\nconcurrency:\n  group: claude-review-${{ github.event.pull_request.number }}\n  cancel-in-progress: true\n\njobs:\n  review:\n    if: github.event.pull_request.draft == false\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n\n      - name: Compose review prompt\n        id: compose\n        run: |\n          {\n            printf 'prompt<<PROMPT_EOF\\n'\n            if [ -f REVIEW.md ]; then\n              echo '# Highest-priority review instructions (from REVIEW.md at the repo root)'\n              echo 'Follow these rules as the authoritative guide for this review. If anything'\n              echo 'below contradicts a more generic review habit, follow these.'\n              echo\n              cat REVIEW.md\n              echo\n              echo '---'\n              echo\n            fi\n            cat <<'BASE'\n          Review this pull request against the main branch.\n\n          Tag every finding with a priority label: P0 (blocks merge), P1 (worth\n          fixing, not blocking), or P2 (informational / pre-existing). Open the\n          review body with a one-line tally (\"2 P0, 3 P1\", or\n          \"No blocking issues — 3 P1\", or \"LGTM\" if nothing). Cite file:line for\n          every behavior claim. Prefer inline comments over long summaries.\n\n          Fallback focus if REVIEW.md is missing: correctness, security (auth,\n          injection, SSRF), LiteLLM/Bedrock routing breakage, agent loop / streaming\n          regressions, test coverage for new behavior. Skip anything ruff already\n          catches.\n          BASE\n            printf 'PROMPT_EOF\\n'\n          } >> \"$GITHUB_OUTPUT\"\n\n      - uses: anthropics/claude-code-action@v1\n        with:\n          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}\n          track_progress: true\n          prompt: ${{ steps.compose.outputs.prompt }}\n"
  },
  {
    "path": ".github/workflows/claude.yml",
    "content": "name: Claude on Mention\n\non:\n  issue_comment:\n    types: [created]\n  pull_request_review_comment:\n    types: [created]\n  pull_request_review:\n    types: [submitted]\n  issues:\n    types: [opened, assigned]\n\npermissions:\n  contents: write\n  pull-requests: write\n  issues: write\n  id-token: write\n\njobs:\n  claude:\n    if: |\n      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||\n      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||\n      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||\n      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n\n      - uses: anthropics/claude-code-action@v1\n        with:\n          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}\n          track_progress: true\n"
  },
  {
    "path": ".gitignore",
    "content": "# Python-generated files\n__pycache__/\n*.py[oc]\nbuild/\ndist/\nwheels/\n*.egg-info\n.pytest_cache/\n.mypy_cache/\n.tox/\n.coverage\nhtmlcov/\n.ipynb_checkpoints/\n\n# Virtual environments\n.venv/\nvenv/\nENV/\nenv/\n\n# Environment and Secrets\n.env\n.env.local\n.env.*\n!.env.example\n*.local\ncredentials*.json\n\n# OS-specific\n.DS_Store\nThumbs.db\n*.swp\n\n# IDE-specific\n.vscode/\n.idea/\n.cursor/\n.history/\n*.sublime-project\n*.sublime-workspace\n\n# Frontend (Node.js)\nfrontend/node_modules/\nfrontend/dist/\nfrontend/.cache/\nfrontend/*.local\nfrontend/.eslintcache\nfrontend/npm-debug.log*\nfrontend/yarn-debug.log*\nfrontend/yarn-error.log*\n\n# Docker\n.docker/\n\n# Eval (stale)\neval/\n\n# Project-specific\nsession_logs/\n/logs\nhf-agent-leaderboard/\nskills/\n.claude/\n*.jsonl\n*.csv\n\n# ML / Data\ndata/\ndatasets/\nmodels/\ncheckpoint-*/\nruns/\nwandb/\nfrontend/tsconfig.tsbuildinfo\n"
  },
  {
    "path": ".python-version",
    "content": "3.12\n"
  },
  {
    "path": "Dockerfile",
    "content": "# Stage 1: Build frontend\nFROM node:20-alpine AS frontend-builder\nWORKDIR /app/frontend\nCOPY frontend/package.json frontend/package-lock.json ./\nRUN npm install\nCOPY frontend/ ./\nRUN npm run build\n\n# Stage 2: Production\nFROM python:3.12-slim\n\n# Install uv directly from official image\nCOPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/\n\n# Create user with UID 1000 (required for HF Spaces)\nRUN useradd -m -u 1000 user\n\nWORKDIR /app\n\n# Install system dependencies\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    git \\\n    curl \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Copy dependency files\nCOPY pyproject.toml uv.lock ./\n\n# Install dependencies into /app/.venv\n# Use --frozen to ensure exact versions from uv.lock\nRUN uv sync --no-dev --frozen\n\n# Copy application code\nCOPY agent/ ./agent/\nCOPY backend/ ./backend/\nCOPY configs/ ./configs/\n\n# Copy built frontend\nCOPY --from=frontend-builder /app/frontend/dist ./static/\n\n# Create directories and set ownership\nRUN mkdir -p /app/session_logs && \\\n    chown -R user:user /app\n\n# Switch to non-root user\nUSER user\n\n# Set environment\nENV HOME=/home/user \\\n    PYTHONUNBUFFERED=1 \\\n    PYTHONPATH=/app \\\n    PATH=\"/app/.venv/bin:$PATH\"\n\n# Expose port\nEXPOSE 7860\n\n# Run the application from backend directory\nWORKDIR /app/backend\nCMD [\"bash\", \"start.sh\"]\n"
  },
  {
    "path": "README.md",
    "content": "<p align=\"center\">\n  <img src=\"frontend/public/smolagents.webp\" alt=\"smolagents logo\" width=\"160\" />\n</p>\n\n# ML Intern\n\nAn ML intern that autonomously researches, writes, and ships good quality ML releated code using the Hugging Face ecosystem — with deep access to docs, papers, datasets, and cloud compute.\n\n## Quick Start\n\n### Installation\n\n```bash\ngit clone git@github.com:huggingface/ml-intern.git\ncd ml-intern\nuv sync\nuv tool install -e .\n```\n\n#### That's it. Now `ml-intern` works from any directory:\n\n```bash\nml-intern\n```\n\nCreate a `.env` file in the project root (or export these in your shell):\n\n```bash\nANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models\nHF_TOKEN=<your-hugging-face-token>\nGITHUB_TOKEN=<github-personal-access-token> \n```\nIf no `HF_TOKEN` is set, the CLI will prompt you to paste one on first launch. To get a GITHUB_TOKEN follow the tutorial [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-fine-grained-personal-access-token).\n\n### Usage\n\n**Interactive mode** (start a chat session):\n\n```bash\nml-intern\n```\n\n**Headless mode** (single prompt, auto-approve):\n\n```bash\nml-intern \"fine-tune llama on my dataset\"\n```\n\n**Options:**\n\n```bash\nml-intern --model anthropic/claude-opus-4-6 \"your prompt\"\nml-intern --max-iterations 100 \"your prompt\"\nml-intern --no-stream \"your prompt\"\n```\n\n## Architecture\n\n### Component Overview\n\n```\n┌─────────────────────────────────────────────────────────────┐\n│                         User/CLI                            │\n└────────────┬─────────────────────────────────────┬──────────┘\n             │ Operations                          │ Events\n             ↓ (user_input, exec_approval,         ↑\n      submission_queue  interrupt, compact, ...)  event_queue\n             │                                          │\n             ↓                                          │\n┌────────────────────────────────────────────────────┐  │\n│            submission_loop (agent_loop.py)         │  │\n│  ┌──────────────────────────────────────────────┐  │  │\n│  │  1. Receive Operation from queue             │  │  │\n│  │  2. Route to handler (run_agent/compact/...) │  │  │\n│  └──────────────────────────────────────────────┘  │  │\n│                      ↓                             │  │\n│  ┌──────────────────────────────────────────────┐  │  │\n│  │         Handlers.run_agent()                 │  ├──┤\n│  │                                              │  │  │\n│  │  ┌────────────────────────────────────────┐  │  │  │\n│  │  │  Agentic Loop (max 300 iterations)     │  │  │  │\n│  │  │                                        │  │  │  │\n│  │  │  ┌──────────────────────────────────┐  │  │  │  │\n│  │  │  │ Session                          │  │  │  │  │\n│  │  │  │  ┌────────────────────────────┐  │  │  │  │  │\n│  │  │  │  │ ContextManager             │  │  │  │  │  │\n│  │  │  │  │ • Message history          │  │  │  │  │  │\n│  │  │  │  │   (litellm.Message[])      │  │  │  │  │  │\n│  │  │  │  │ • Auto-compaction (170k)   │  │  │  │  │  │\n│  │  │  │  │ • Session upload to HF     │  │  │  │  │  │\n│  │  │  │  └────────────────────────────┘  │  │  │  │  │\n│  │  │  │                                  │  │  │  │  │\n│  │  │  │  ┌────────────────────────────┐  │  │  │  │  │\n│  │  │  │  │ ToolRouter                 │  │  │  │  │  │\n│  │  │  │  │  ├─ HF docs & research     │  │  │  │  │  │\n│  │  │  │  │  ├─ HF repos, datasets,    │  │  │  │  │  │\n│  │  │  │  │  │  jobs, papers           │  │  │  │  │  │\n│  │  │  │  │  ├─ GitHub code search     │  │  │  │  │  │\n│  │  │  │  │  ├─ Sandbox & local tools  │  │  │  │  │  │\n│  │  │  │  │  ├─ Planning               │  │  │  │  │  │\n│  │  │  │  │  └─ MCP server tools       │  │  │  │  │  │\n│  │  │  │  └────────────────────────────┘  │  │  │  │  │\n│  │  │  └──────────────────────────────────┘  │  │  │  │\n│  │  │                                        │  │  │  │\n│  │  │  ┌──────────────────────────────────┐  │  │  │  │\n│  │  │  │ Doom Loop Detector               │  │  │  │  │\n│  │  │  │ • Detects repeated tool patterns │  │  │  │  │\n│  │  │  │ • Injects corrective prompts     │  │  │  │  │\n│  │  │  └──────────────────────────────────┘  │  │  │  │\n│  │  │                                        │  │  │  │\n│  │  │  Loop:                                 │  │  │  │\n│  │  │    1. LLM call (litellm.acompletion)   │  │  │  │\n│  │  │       ↓                                │  │  │  │\n│  │  │    2. Parse tool_calls[]               │  │  │  │\n│  │  │       ↓                                │  │  │  │\n│  │  │    3. Approval check                   │  │  │  │\n│  │  │       (jobs, sandbox, destructive ops) │  │  │  │\n│  │  │       ↓                                │  │  │  │\n│  │  │    4. Execute via ToolRouter           │  │  │  │\n│  │  │       ↓                                │  │  │  │\n│  │  │    5. Add results to ContextManager    │  │  │  │\n│  │  │       ↓                                │  │  │  │\n│  │  │    6. Repeat if tool_calls exist       │  │  │  │\n│  │  └────────────────────────────────────────┘  │  │  │\n│  └──────────────────────────────────────────────┘  │  │\n└────────────────────────────────────────────────────┴──┘\n```\n\n### Agentic Loop Flow\n\n```\nUser Message\n     ↓\n[Add to ContextManager]\n     ↓\n     ╔═══════════════════════════════════════════╗\n     ║      Iteration Loop (max 300)             ║\n     ║                                           ║\n     ║  Get messages + tool specs                ║\n     ║         ↓                                 ║\n     ║  litellm.acompletion()                    ║\n     ║         ↓                                 ║\n     ║  Has tool_calls? ──No──> Done             ║\n     ║         │                                 ║\n     ║        Yes                                ║\n     ║         ↓                                 ║\n     ║  Add assistant msg (with tool_calls)      ║\n     ║         ↓                                 ║\n     ║  Doom loop check                          ║\n     ║         ↓                                 ║\n     ║  For each tool_call:                      ║\n     ║    • Needs approval? ──Yes──> Wait for    ║\n     ║    │                         user confirm ║\n     ║    No                                     ║\n     ║    ↓                                      ║\n     ║    • ToolRouter.execute_tool()            ║\n     ║    • Add result to ContextManager         ║\n     ║         ↓                                 ║\n     ║  Continue loop ─────────────────┐         ║\n     ║         ↑                       │         ║\n     ║         └───────────────────────┘         ║\n     ╚═══════════════════════════════════════════╝\n```\n\n## Events\n\nThe agent emits the following events via `event_queue`:\n\n- `processing` - Starting to process user input\n- `ready` - Agent is ready for input\n- `assistant_chunk` - Streaming token chunk\n- `assistant_message` - Complete LLM response text\n- `assistant_stream_end` - Token stream finished\n- `tool_call` - Tool being called with arguments\n- `tool_output` - Tool execution result\n- `tool_log` - Informational tool log message\n- `tool_state_change` - Tool execution state transition\n- `approval_required` - Requesting user approval for sensitive operations\n- `turn_complete` - Agent finished processing\n- `error` - Error occurred during processing\n- `interrupted` - Agent was interrupted\n- `compacted` - Context was compacted\n- `undo_complete` - Undo operation completed\n- `shutdown` - Agent shutting down\n\n## Development\n\n### Adding Built-in Tools\n\nEdit `agent/core/tools.py`:\n\n```python\ndef create_builtin_tools() -> list[ToolSpec]:\n    return [\n        ToolSpec(\n            name=\"your_tool\",\n            description=\"What your tool does\",\n            parameters={\n                \"type\": \"object\",\n                \"properties\": {\n                    \"param\": {\"type\": \"string\", \"description\": \"Parameter description\"}\n                },\n                \"required\": [\"param\"]\n            },\n            handler=your_async_handler\n        ),\n        # ... existing tools\n    ]\n```\n\n### Adding MCP Servers\n\nEdit `configs/main_agent_config.json`:\n\n```json\n{\n  \"model_name\": \"anthropic/claude-sonnet-4-5-20250929\",\n  \"mcpServers\": {\n    \"your-server-name\": {\n      \"transport\": \"http\",\n      \"url\": \"https://example.com/mcp\",\n      \"headers\": {\n        \"Authorization\": \"Bearer ${YOUR_TOKEN}\"\n      }\n    }\n  }\n}\n```\n\nNote: Environment variables like `${YOUR_TOKEN}` are auto-substituted from `.env`.\n"
  },
  {
    "path": "REVIEW.md",
    "content": "# Review instructions\n\nThese rules override the default review guidance. Treat them as the highest-priority\ninstruction block for any review of this repo. If something here contradicts a more\ngeneric review habit, follow these.\n\n## Severity levels\n\nEvery finding carries one of three priority labels:\n\n- **P0** — blocks merge.\n- **P1** — worth fixing, not blocking.\n- **P2** — informational.\n\nWrite labels as plain text (`P0`, `P1`, `P2`) in finding headers. Do not use\nemoji or colored markers. Use judgment on what belongs at which level — this\nrepo does not enumerate P0 cases; read the code and decide.\n\n## Default bias: rigor\n\nReviews gate merges. This is an open-source repo that takes PRs from anyone; the\nmaintainer team is small and relies on the review to catch what they don't have\ntime to verify themselves. **Default bias is rigor, not speed.** When in doubt\non a P0-class concern, investigate further before deciding whether to flag — a\nfalse negative ships a bug to production, a false positive costs the contributor\none round trip.\n\nRigor is not nitpicking. The P1 cap, \"do not report\" skip list, and verification\nbar all still apply. Rigor means going deep on a small number of real concerns,\nnot surfacing a large number of shallow ones. Prefer one well-investigated P0\nover three speculative P1s.\n\n**Hold the line on P0.** If the author pushes back on a P0 finding without a fix\nthat actually addresses the root cause, re-state the concern with added\ncitations. Only accept the pushback if the author points to code or behavior you\nmissed. Do not soften a P0 because the contributor is polite or new to the repo.\n\nFor P1 and P2: if the author defers or pushes back without fixing, accept it\nsilently — do not re-flag on subsequent commits. P1/P2 are informational; the\nauthor may defer to a follow-up issue at their discretion.\n\nIf Claude and the author repeatedly disagree on the same class of finding, the\nsignal is that REVIEW.md is missing a rule; note it once in the PR summary as\n`suggest-rule: <short description>` and stop.\n\n## Investigate before posting\n\nThe depth of your analysis determines the strength of your finding. For any\nP0-class concern, before writing it up:\n\n- Read the relevant callers and callees, not just the diff. Use Read and Grep\n  to open files the diff doesn't touch but the changed code interacts with.\n- Trace the full chain end-to-end for routing, auth, and agent-loop findings.\n  Cite each hop by `file:line`, not just the suspicious line.\n- Check whether the codebase already has an established pattern for this kind\n  of change (`grep` for similar call sites, similar tool definitions, similar\n  route guards). If the PR introduces a new approach where an established\n  pattern exists, flag that — divergence from the existing pattern is usually a\n  regression vector even when the new code \"works.\"\n- Confirm the specific behavior you're claiming. \"This breaks X\" must be\n  grounded in either the code handling X or a test exercising X, not in\n  inference from naming or structure.\n\nA finding you \"spotted\" by scanning the diff is more likely to be a false\npositive than a finding you verified by reading the code around it.\n\n## P1 cap\n\nReport at most **3** P1 findings per review. If you found more, say \"plus N\nsimilar items\" in the summary. If everything you found is P1 or below, open the\nsummary with \"No blocking issues.\"\n\n## Re-review convergence\n\nIf this PR has already received a Claude review (there is a prior review comment\nby the `claude` bot), suppress new P1 findings and post only P0 ones. Do not\nre-post P1s that were already flagged on earlier commits. If the author pushed a\nfix for a previously flagged issue, acknowledge it in one line rather than\nre-flagging.\n\n## Do not report\n\nAnything in these paths — skip entirely:\n\n- `frontend/node_modules/**`, `**/*.lock`, `uv.lock`, `package-lock.json`\n- `hf_agent.egg-info/**`, `.ruff_cache/**`, `.pytest_cache/**`, `.venv/**`\n- `session_logs/**`, `reports/**`\n- Anything under a `gen/` or `generated/` path\n\nAnything speculative — do not post:\n\n- \"This might be slow\" without a concrete complexity claim tied to a specific\n  input size\n- Hypothetical race conditions without a concrete interleaving\n\n## Dependency PRs\n\nFor PRs whose diff is only a lockfile bump, a `pyproject.toml` change, or a\nnew dependency, the code rules above don't apply — risks shift to provenance\nand framing. Every claim in the title or body (CVE IDs, version numbers,\nbehavior fixes) must match what the diff actually does, and any new\ntransitive dep needs justification. A PR that lies in its framing is P0\nregardless of whether the code change is safe in isolation.\n\n## Verification bar\n\nEvery behavior claim in a finding must cite `file:line`. \"This breaks X\" is not\nactionable without a line reference. If you cannot cite a line, do not post\nthe finding.\n\n## Summary shape\n\nOpen the review body with a single-line tally and an explicit merge verdict, on\ntwo lines:\n\n```\n2 P0, 3 P1\nVerdict: changes requested\n```\n\nValid verdicts:\n\n- **Verdict: ready to merge** — no P0 findings, contributor can merge as-is\n  once any CI passes\n- **Verdict: changes requested** — at least one P0 that must be addressed\n  before merging\n- **Verdict: needs discussion** — a design-level concern the maintainer should\n  weigh in on before the contributor iterates (use sparingly)\n\nIf it's a clean review, write `LGTM` followed by `Verdict: ready to merge`.\n\nThen a **What I checked** bullet list — one line per major area you examined,\nregardless of whether you found anything. This gives the maintainer visible\ncoverage at a glance and lets them decide whether to spot-check areas you\ndidn't touch.\n"
  },
  {
    "path": "agent/README.md",
    "content": "# Agent\n\nAsync agent loop with LiteLLM.\n\n## Architecture\n\n**Queue-based async system:**\n- Submissions in (user input) → Agent Loop → Events output for possible UI updates\n- Session maintains state (context + tools) for possible future Context Engineering\n- Handlers operations like (USER_INPUT, INTERRUPT, COMPACT, UNDO, SHUTDOWN) for possible UI control\n\n## Components\n\n| Component | Purpose | Long Term Goal |\n|-----------|---------|----------------|\n| **`agent_loop.py`** | Core agentic loop: processes user input, calls LLM via LiteLLM, executes tool calls iteratively until completion, emits events | Support parallel tool execution, streaming responses, and advanced reasoning patterns |\n| **`session.py`** | Maintains session state and interaction with potential UI (context, config, event queue), handles interrupts, assigns unique session IDs for tracing | Enable plugging in different UIs (CLI, web, API, programmatic etc.) |\n| **`tools.py`** | `ToolRouter` manages potential built-in tools (e.g. bash, read_file, write_file which are dummy implementations rn) + MCP tools, converts specs to OpenAI format | Be the place for tools that can be used by the agent. All crazy tool design happens here. |\n| **`context_manager/`** | Manages conversation history, very rudimentary context engineering support | Implement intelligent context engineering to keep the agent on track |\n| **`config.py`** | Loads JSON config for the agent | Support different configs etc. |\n| **`main.py`** | Interactive CLI with async queue architecture (submission→agent, agent→events) (simple way to interact with the agent now)| Serve as reference implementation for other UIs (web, API, programmatic) |\n"
  },
  {
    "path": "agent/__init__.py",
    "content": "\"\"\"\nHF Agent - Main agent module\n\"\"\"\n\nimport litellm\n\n# Global LiteLLM behavior — set once at package import so both CLI and\n# backend entries share the same config.\n#   drop_params: quietly drop unsupported params rather than raising\n#   suppress_debug_info: hide the noisy \"Give Feedback\" banner on errors\n#   modify_params: let LiteLLM patch Anthropic's tool-call requirements\n#     (synthesize a dummy tool spec when we call completion on a history\n#     that contains tool_calls but aren't passing `tools=` — happens\n#     during summarization / session seeding).\nlitellm.drop_params = True\nlitellm.suppress_debug_info = True\nlitellm.modify_params = True\n\nfrom agent.core.agent_loop import submission_loop  # noqa: E402\n\n__all__ = [\"submission_loop\"]\n"
  },
  {
    "path": "agent/config.py",
    "content": "import json\nimport os\nimport re\nfrom pathlib import Path\nfrom typing import Any, Union\n\nfrom dotenv import load_dotenv\n\n# Project root: two levels up from this file (agent/config.py -> project root)\n_PROJECT_ROOT = Path(__file__).resolve().parent.parent\nfrom fastmcp.mcp_config import (\n    RemoteMCPServer,\n    StdioMCPServer,\n)\nfrom pydantic import BaseModel\n\n# These two are the canonical server config types for MCP servers.\nMCPServerConfig = Union[StdioMCPServer, RemoteMCPServer]\n\n\nclass Config(BaseModel):\n    \"\"\"Configuration manager\"\"\"\n\n    model_name: str\n    mcpServers: dict[str, MCPServerConfig] = {}\n    save_sessions: bool = True\n    session_dataset_repo: str = \"akseljoonas/hf-agent-sessions\"\n    auto_save_interval: int = 3  # Save every N user turns (0 = disabled)\n    yolo_mode: bool = False  # Auto-approve all tool calls without confirmation\n    max_iterations: int = 300  # Max LLM calls per agent turn (-1 = unlimited)\n\n    # Permission control parameters\n    confirm_cpu_jobs: bool = True\n    auto_file_upload: bool = False\n\n    # Reasoning effort *preference* — the ceiling the user wants. The probe\n    # on `/model` walks a cascade down from here (``max`` → ``xhigh`` → ``high``\n    # → …) and caches per-model what the provider actually accepted in\n    # ``Session.model_effective_effort``. Default ``max`` because we'd rather\n    # burn tokens thinking than ship a wrong ML recipe; the cascade lands on\n    # whichever level the model supports (``high`` for GPT-5 / HF router,\n    # ``xhigh`` or ``max`` for Anthropic 4.6 / 4.7). ``None`` = thinking off.\n    # Valid values: None | \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\" | \"max\"\n    reasoning_effort: str | None = \"max\"\n\n\ndef substitute_env_vars(obj: Any) -> Any:\n    \"\"\"\n    Recursively substitute environment variables in any data structure.\n\n    Supports ${VAR_NAME} syntax for required variables and ${VAR_NAME:-default} for optional.\n    \"\"\"\n    if isinstance(obj, str):\n        pattern = r\"\\$\\{([^}:]+)(?::(-)?([^}]*))?\\}\"\n\n        def replacer(match):\n            var_name = match.group(1)\n            has_default = match.group(2) is not None\n            default_value = match.group(3) if has_default else None\n\n            env_value = os.environ.get(var_name)\n\n            if env_value is not None:\n                return env_value\n            elif has_default:\n                return default_value or \"\"\n            else:\n                raise ValueError(\n                    f\"Environment variable '{var_name}' is not set. \"\n                    f\"Add it to your .env file.\"\n                )\n\n        return re.sub(pattern, replacer, obj)\n\n    elif isinstance(obj, dict):\n        return {key: substitute_env_vars(value) for key, value in obj.items()}\n\n    elif isinstance(obj, list):\n        return [substitute_env_vars(item) for item in obj]\n\n    return obj\n\n\ndef load_config(config_path: str = \"config.json\") -> Config:\n    \"\"\"\n    Load configuration with environment variable substitution.\n\n    Use ${VAR_NAME} in your JSON for any secret.\n    Automatically loads from .env file.\n    \"\"\"\n    # Load .env from project root first (so it works from any directory),\n    # then CWD .env can override if present\n    load_dotenv(_PROJECT_ROOT / \".env\")\n    load_dotenv(override=False)\n\n    with open(config_path, \"r\") as f:\n        raw_config = json.load(f)\n\n    config_with_env = substitute_env_vars(raw_config)\n    return Config.model_validate(config_with_env)\n"
  },
  {
    "path": "agent/context_manager/__init__.py",
    "content": "\"\"\"\nContext manager for handling conversation history\n\"\"\"\n\nfrom agent.context_manager.manager import ContextManager\n\n__all__ = [\"ContextManager\"]\n"
  },
  {
    "path": "agent/context_manager/manager.py",
    "content": "\"\"\"\nContext management for conversation history\n\"\"\"\n\nimport logging\nimport os\nimport zoneinfo\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any\n\nimport yaml\nfrom jinja2 import Template\nfrom litellm import Message, acompletion\n\nfrom agent.core.prompt_caching import with_prompt_caching\n\nlogger = logging.getLogger(__name__)\n\n_HF_WHOAMI_URL = \"https://huggingface.co/api/whoami-v2\"\n_HF_WHOAMI_TIMEOUT = 5  # seconds\n\n\ndef _get_hf_username(hf_token: str | None = None) -> str:\n    \"\"\"Return the HF username for the given token.\n\n    Uses subprocess + curl to avoid Python HTTP client IPv6 issues that\n    cause 40+ second hangs (httpx/urllib try IPv6 first which times out\n    at OS level before falling back to IPv4 — the \"Happy Eyeballs\" problem).\n    \"\"\"\n    import json\n    import subprocess\n    import time as _t\n\n    if not hf_token:\n        logger.warning(\"No hf_token provided, using 'unknown' as username\")\n        return \"unknown\"\n\n    t0 = _t.monotonic()\n    try:\n        result = subprocess.run(\n            [\n                \"curl\",\n                \"-s\",\n                \"-4\",  # force IPv4\n                \"-m\",\n                str(_HF_WHOAMI_TIMEOUT),  # max time\n                \"-H\",\n                f\"Authorization: Bearer {hf_token}\",\n                _HF_WHOAMI_URL,\n            ],\n            capture_output=True,\n            text=True,\n            timeout=_HF_WHOAMI_TIMEOUT + 2,\n        )\n        t1 = _t.monotonic()\n        if result.returncode == 0 and result.stdout:\n            data = json.loads(result.stdout)\n            username = data.get(\"name\", \"unknown\")\n            logger.info(f\"HF username resolved to '{username}' in {t1 - t0:.2f}s\")\n            return username\n        else:\n            logger.warning(\n                f\"curl whoami failed (rc={result.returncode}) in {t1 - t0:.2f}s\"\n            )\n            return \"unknown\"\n    except Exception as e:\n        t1 = _t.monotonic()\n        logger.warning(f\"HF whoami failed in {t1 - t0:.2f}s: {e}\")\n        return \"unknown\"\n\n\n_COMPACT_PROMPT = (\n    \"Please provide a concise summary of the conversation above, focusing on \"\n    \"key decisions, the 'why' behind the decisions, problems solved, and \"\n    \"important context needed for developing further. Your summary will be \"\n    \"given to someone who has never worked on this project before and they \"\n    \"will be have to be filled in.\"\n)\n\n# Used when seeding a brand-new session from prior browser-cached messages.\n# Here we're writing a note to *ourselves* — so preserve the tool-call trail,\n# files produced, and planned next steps in first person. Optimized for\n# continuity, not brevity.\n_RESTORE_PROMPT = (\n    \"You're about to be restored into a fresh session with no memory of the \"\n    \"conversation above. Write a first-person note to your future self so \"\n    \"you can continue right where you left off. Include:\\n\"\n    \"  • What the user originally asked for and what progress you've made.\\n\"\n    \"  • Every tool you called, with arguments and a one-line result summary.\\n\"\n    \"  • Any code, files, scripts, or artifacts you produced (with paths).\\n\"\n    \"  • Key decisions and the reasoning behind them.\\n\"\n    \"  • What you were planning to do next.\\n\\n\"\n    \"Don't be cute. Be specific. This is the only context you'll have.\"\n)\n\n\nasync def summarize_messages(\n    messages: list[Message],\n    model_name: str,\n    hf_token: str | None = None,\n    max_tokens: int = 2000,\n    tool_specs: list[dict] | None = None,\n    prompt: str = _COMPACT_PROMPT,\n) -> tuple[str, int]:\n    \"\"\"Run a summarization prompt against a list of messages.\n\n    ``prompt`` defaults to the compaction prompt (terse, decision-focused).\n    Callers seeding a new session after a restart should pass ``_RESTORE_PROMPT``\n    instead — it preserves the tool-call trail so the agent can answer\n    follow-up questions about what it did.\n\n    Returns ``(summary_text, completion_tokens)``.\n    \"\"\"\n    from agent.core.llm_params import _resolve_llm_params\n\n    prompt_messages = list(messages) + [Message(role=\"user\", content=prompt)]\n    llm_params = _resolve_llm_params(model_name, hf_token, reasoning_effort=\"high\")\n    prompt_messages, tool_specs = with_prompt_caching(\n        prompt_messages, tool_specs, llm_params.get(\"model\")\n    )\n    response = await acompletion(\n        messages=prompt_messages,\n        max_completion_tokens=max_tokens,\n        tools=tool_specs,\n        **llm_params,\n    )\n    summary = response.choices[0].message.content or \"\"\n    completion_tokens = response.usage.completion_tokens if response.usage else 0\n    return summary, completion_tokens\n\n\nclass ContextManager:\n    \"\"\"Manages conversation context and message history for the agent\"\"\"\n\n    def __init__(\n        self,\n        model_max_tokens: int = 180_000,\n        compact_size: float = 0.1,\n        untouched_messages: int = 5,\n        tool_specs: list[dict[str, Any]] | None = None,\n        prompt_file_suffix: str = \"system_prompt_v3.yaml\",\n        hf_token: str | None = None,\n        local_mode: bool = False,\n    ):\n        self.system_prompt = self._load_system_prompt(\n            tool_specs or [],\n            prompt_file_suffix=\"system_prompt_v3.yaml\",\n            hf_token=hf_token,\n            local_mode=local_mode,\n        )\n        # The model's real input-token ceiling (from litellm.get_model_info).\n        # Compaction triggers at _COMPACT_THRESHOLD_RATIO below it — see\n        # the compaction_threshold property.\n        self.model_max_tokens = model_max_tokens\n        self.compact_size = int(model_max_tokens * compact_size)\n        # Running count of tokens the last LLM call reported. Drives the\n        # compaction gate; updated in add_message() with each response's\n        # usage.total_tokens.\n        self.running_context_usage = 0\n        self.untouched_messages = untouched_messages\n        self.items: list[Message] = [Message(role=\"system\", content=self.system_prompt)]\n\n    def _load_system_prompt(\n        self,\n        tool_specs: list[dict[str, Any]],\n        prompt_file_suffix: str = \"system_prompt.yaml\",\n        hf_token: str | None = None,\n        local_mode: bool = False,\n    ):\n        \"\"\"Load and render the system prompt from YAML file with Jinja2\"\"\"\n        prompt_file = Path(__file__).parent.parent / \"prompts\" / f\"{prompt_file_suffix}\"\n\n        with open(prompt_file, \"r\") as f:\n            prompt_data = yaml.safe_load(f)\n            template_str = prompt_data.get(\"system_prompt\", \"\")\n\n        # Get current date and time\n        tz = zoneinfo.ZoneInfo(\"Europe/Paris\")\n        now = datetime.now(tz)\n        current_date = now.strftime(\"%d-%m-%Y\")\n        current_time = now.strftime(\"%H:%M:%S.%f\")[:-3]\n        current_timezone = f\"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})\"\n\n        # Get HF user info from OAuth token\n        hf_user_info = _get_hf_username(hf_token)\n\n        template = Template(template_str)\n        static_prompt = template.render(\n            tools=tool_specs,\n            num_tools=len(tool_specs),\n        )\n\n        # CLI-specific context for local mode\n        if local_mode:\n            import os\n            cwd = os.getcwd()\n            local_context = (\n                f\"\\n\\n# CLI / Local mode\\n\\n\"\n                f\"You are running as a local CLI tool on the user's machine. \"\n                f\"There is NO sandbox — bash, read, write, and edit operate directly \"\n                f\"on the local filesystem.\\n\\n\"\n                f\"Working directory: {cwd}\\n\"\n                f\"Use absolute paths or paths relative to the working directory. \"\n                f\"Do NOT use /app/ paths — that is a sandbox convention that does not apply here.\\n\"\n                f\"The sandbox_create tool is NOT available. Run code directly with bash.\"\n            )\n            static_prompt += local_context\n\n        return (\n            f\"{static_prompt}\\n\\n\"\n            f\"[Session context: Date={current_date}, Time={current_time}, \"\n            f\"Timezone={current_timezone}, User={hf_user_info}, \"\n            f\"Tools={len(tool_specs)}]\"\n        )\n\n    def add_message(self, message: Message, token_count: int = None) -> None:\n        \"\"\"Add a message to the history\"\"\"\n        if token_count:\n            self.running_context_usage = token_count\n        self.items.append(message)\n\n    def get_messages(self) -> list[Message]:\n        \"\"\"Get all messages for sending to LLM.\n\n        Patches any dangling tool_calls (assistant messages with tool_calls\n        that have no matching tool-result message) so the LLM API doesn't\n        reject the request.\n        \"\"\"\n        self._patch_dangling_tool_calls()\n        return self.items\n\n    @staticmethod\n    def _normalize_tool_calls(msg: Message) -> None:\n        \"\"\"Ensure msg.tool_calls contains proper ToolCall objects, not dicts.\n\n        litellm's Message has validate_assignment=False (Pydantic v2 default),\n        so direct attribute assignment (e.g. inside litellm's streaming handler)\n        can leave raw dicts.  Re-assigning via the constructor fixes this.\n        \"\"\"\n        from litellm import ChatCompletionMessageToolCall as ToolCall\n\n        tool_calls = getattr(msg, \"tool_calls\", None)\n        if not tool_calls:\n            return\n        needs_fix = any(isinstance(tc, dict) for tc in tool_calls)\n        if not needs_fix:\n            return\n        msg.tool_calls = [\n            tc if not isinstance(tc, dict) else ToolCall(**tc) for tc in tool_calls\n        ]\n\n    def _patch_dangling_tool_calls(self) -> None:\n        \"\"\"Add stub tool results for any tool_calls that lack a matching result.\n\n        Scans backwards to find the last assistant message with tool_calls,\n        which may not be items[-1] if some tool results were already added.\n        \"\"\"\n        if not self.items:\n            return\n\n        # Find the last assistant message with tool_calls\n        assistant_msg = None\n        for i in range(len(self.items) - 1, -1, -1):\n            msg = self.items[i]\n            if getattr(msg, \"role\", None) == \"assistant\" and getattr(\n                msg, \"tool_calls\", None\n            ):\n                assistant_msg = msg\n                break\n            # Stop scanning once we hit a user message — anything before\n            # that belongs to a previous (complete) turn.\n            if getattr(msg, \"role\", None) == \"user\":\n                break\n\n        if not assistant_msg:\n            return\n\n        self._normalize_tool_calls(assistant_msg)\n        answered_ids = {\n            getattr(m, \"tool_call_id\", None)\n            for m in self.items\n            if getattr(m, \"role\", None) == \"tool\"\n        }\n        for tc in assistant_msg.tool_calls:\n            if tc.id not in answered_ids:\n                self.items.append(\n                    Message(\n                        role=\"tool\",\n                        content=\"Tool was not executed (interrupted or error).\",\n                        tool_call_id=tc.id,\n                        name=tc.function.name,\n                    )\n                )\n\n    def undo_last_turn(self) -> bool:\n        \"\"\"Remove the last complete turn (user msg + all assistant/tool msgs that follow).\n\n        Pops from the end until the last user message is removed, keeping the\n        tool_use/tool_result pairing valid. Never removes the system message.\n\n        Returns True if a user message was found and removed.\n        \"\"\"\n        if len(self.items) <= 1:\n            return False\n\n        while len(self.items) > 1:\n            msg = self.items.pop()\n            if getattr(msg, \"role\", None) == \"user\":\n                return True\n\n        return False\n\n    def truncate_to_user_message(self, user_message_index: int) -> bool:\n        \"\"\"Truncate history to just before the Nth user message (0-indexed).\n\n        Removes that user message and everything after it.\n        System message (index 0) is never removed.\n\n        Returns True if the target user message was found and removed.\n        \"\"\"\n        count = 0\n        for i, msg in enumerate(self.items):\n            if i == 0:\n                continue  # skip system message\n            if getattr(msg, \"role\", None) == \"user\":\n                if count == user_message_index:\n                    self.items = self.items[:i]\n                    return True\n                count += 1\n        return False\n\n    # Compaction fires at 90% of model_max_tokens so there's headroom for\n    # the next turn's prompt + response before we actually hit the ceiling.\n    _COMPACT_THRESHOLD_RATIO = 0.9\n\n    @property\n    def compaction_threshold(self) -> int:\n        \"\"\"Token count at which `compact()` kicks in.\"\"\"\n        return int(self.model_max_tokens * self._COMPACT_THRESHOLD_RATIO)\n\n    @property\n    def needs_compaction(self) -> bool:\n        return self.running_context_usage > self.compaction_threshold and bool(self.items)\n\n    async def compact(\n        self,\n        model_name: str,\n        tool_specs: list[dict] | None = None,\n        hf_token: str | None = None,\n    ) -> None:\n        \"\"\"Remove old messages to keep history under target size\"\"\"\n        if not self.needs_compaction:\n            return\n\n        system_msg = (\n            self.items[0] if self.items and self.items[0].role == \"system\" else None\n        )\n\n        # Preserve the first user message (task prompt) — never summarize it\n        first_user_msg = None\n        first_user_idx = 1\n        for i in range(1, len(self.items)):\n            if getattr(self.items[i], \"role\", None) == \"user\":\n                first_user_msg = self.items[i]\n                first_user_idx = i\n                break\n\n        # Don't summarize a certain number of just-preceding messages\n        # Walk back to find a user message to make sure we keep an assistant -> user ->\n        # assistant general conversation structure\n        idx = len(self.items) - self.untouched_messages\n        while idx > 1 and self.items[idx].role != \"user\":\n            idx -= 1\n\n        recent_messages = self.items[idx:]\n        messages_to_summarize = self.items[first_user_idx + 1:idx]\n\n        # improbable, messages would have to very long\n        if not messages_to_summarize:\n            return\n\n        summary, completion_tokens = await summarize_messages(\n            messages_to_summarize,\n            model_name=model_name,\n            hf_token=hf_token,\n            max_tokens=self.compact_size,\n            tool_specs=tool_specs,\n            prompt=_COMPACT_PROMPT,\n        )\n        summarized_message = Message(role=\"assistant\", content=summary)\n\n        # Reconstruct: system + first user msg + summary + recent messages\n        head = [system_msg] if system_msg else []\n        if first_user_msg:\n            head.append(first_user_msg)\n        self.items = head + [summarized_message] + recent_messages\n\n        # Count the actual post-compact context — system prompt + first user\n        # turn + summary + the preserved tail all contribute, not just the\n        # summary. litellm.token_counter uses the model's real tokenizer.\n        from litellm import token_counter\n\n        try:\n            self.running_context_usage = token_counter(\n                model=model_name,\n                messages=[m.model_dump() for m in self.items],\n            )\n        except Exception as e:\n            logger.warning(\"token_counter failed post-compact (%s); falling back to rough estimate\", e)\n            self.running_context_usage = len(self.system_prompt) // 4 + completion_tokens\n"
  },
  {
    "path": "agent/core/__init__.py",
    "content": "\"\"\"\nCore agent implementation\nContains the main agent logic, decision-making, and orchestration\n\"\"\"\n\nfrom agent.core.tools import ToolRouter, ToolSpec, create_builtin_tools\n\n__all__ = [\n    \"ToolRouter\",\n    \"ToolSpec\",\n    \"create_builtin_tools\",\n]\n"
  },
  {
    "path": "agent/core/agent_loop.py",
    "content": "\"\"\"loop\nMain agent implementation with integrated tool system and MCP support\n\"\"\"\n\nimport asyncio\nimport json\nimport logging\nimport os\nfrom dataclasses import dataclass\n\nfrom litellm import ChatCompletionMessageToolCall, Message, acompletion\nfrom litellm.exceptions import ContextWindowExceededError\n\nfrom agent.config import Config\nfrom agent.core.doom_loop import check_for_doom_loop\nfrom agent.core.llm_params import _resolve_llm_params\nfrom agent.core.prompt_caching import with_prompt_caching\nfrom agent.core.session import Event, OpType, Session\nfrom agent.core.tools import ToolRouter\nfrom agent.tools.jobs_tool import CPU_FLAVORS\n\nlogger = logging.getLogger(__name__)\n\nToolCall = ChatCompletionMessageToolCall\n\n\ndef _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:\n    \"\"\"\n    Validate tool arguments structure.\n\n    Returns:\n        (is_valid, error_message)\n    \"\"\"\n    args = tool_args.get(\"args\", {})\n    # Sometimes LLM passes args as string instead of dict\n    if isinstance(args, str):\n        return (\n            False,\n            f\"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}\",\n        )\n    if not isinstance(args, dict) and args is not None:\n        return (\n            False,\n            f\"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}\",\n        )\n    return True, None\n\n\ndef _needs_approval(\n    tool_name: str, tool_args: dict, config: Config | None = None\n) -> bool:\n    \"\"\"Check if a tool call requires user approval before execution.\"\"\"\n    # Yolo mode: skip all approvals\n    if config and config.yolo_mode:\n        return False\n\n    # If args are malformed, skip approval (validation error will be shown later)\n    args_valid, _ = _validate_tool_args(tool_args)\n    if not args_valid:\n        return False\n\n    if tool_name == \"sandbox_create\":\n        return True\n\n    if tool_name == \"hf_jobs\":\n        operation = tool_args.get(\"operation\", \"\")\n        if operation not in [\"run\", \"uv\", \"scheduled run\", \"scheduled uv\"]:\n            return False\n\n        # Check if this is a CPU-only job\n        # hardware_flavor is at top level of tool_args, not nested in args\n        hardware_flavor = (\n            tool_args.get(\"hardware_flavor\")\n            or tool_args.get(\"flavor\")\n            or tool_args.get(\"hardware\")\n            or \"cpu-basic\"\n        )\n        is_cpu_job = hardware_flavor in CPU_FLAVORS\n\n        if is_cpu_job:\n            if config and not config.confirm_cpu_jobs:\n                return False\n            return True\n\n        return True\n\n    # Check for file upload operations (hf_private_repos or other tools)\n    if tool_name == \"hf_private_repos\":\n        operation = tool_args.get(\"operation\", \"\")\n        if operation == \"upload_file\":\n            if config and config.auto_file_upload:\n                return False\n            return True\n        # Other operations (create_repo, etc.) always require approval\n        if operation in [\"create_repo\"]:\n            return True\n\n    # hf_repo_files: upload (can overwrite) and delete require approval\n    if tool_name == \"hf_repo_files\":\n        operation = tool_args.get(\"operation\", \"\")\n        if operation in [\"upload\", \"delete\"]:\n            return True\n\n    # hf_repo_git: destructive operations require approval\n    if tool_name == \"hf_repo_git\":\n        operation = tool_args.get(\"operation\", \"\")\n        if operation in [\n            \"delete_branch\",\n            \"delete_tag\",\n            \"merge_pr\",\n            \"create_repo\",\n            \"update_repo\",\n        ]:\n            return True\n\n    return False\n\n\n# -- LLM retry constants --------------------------------------------------\n_MAX_LLM_RETRIES = 3\n_LLM_RETRY_DELAYS = [5, 15, 30]  # seconds between retries\n\n\ndef _is_transient_error(error: Exception) -> bool:\n    \"\"\"Return True for errors that are likely transient and worth retrying.\"\"\"\n    err_str = str(error).lower()\n    transient_patterns = [\n        \"timeout\", \"timed out\",\n        \"429\", \"rate limit\", \"rate_limit\",\n        \"503\", \"service unavailable\",\n        \"502\", \"bad gateway\",\n        \"500\", \"internal server error\",\n        \"overloaded\", \"capacity\",\n        \"connection reset\", \"connection refused\", \"connection error\",\n        \"eof\", \"broken pipe\",\n    ]\n    return any(pattern in err_str for pattern in transient_patterns)\n\n\ndef _is_effort_config_error(error: Exception) -> bool:\n    \"\"\"Catch the two 400s the effort probe also handles — thinking\n    unsupported for this model, or the specific effort level invalid.\n\n    This is our safety net for the case where ``/effort`` was changed\n    mid-conversation (which clears the probe cache) and the new level\n    doesn't work for the current model. We heal the cache and retry once.\n    \"\"\"\n    from agent.core.effort_probe import _is_invalid_effort, _is_thinking_unsupported\n    return _is_thinking_unsupported(error) or _is_invalid_effort(error)\n\n\nasync def _heal_effort_and_rebuild_params(\n    session: Session, error: Exception, llm_params: dict,\n) -> dict:\n    \"\"\"Update the session's effort cache based on ``error`` and return new\n    llm_params. Called only when ``_is_effort_config_error(error)`` is True.\n\n    Two branches:\n      • thinking-unsupported → cache ``None`` for this model, next call\n        strips thinking entirely\n      • invalid-effort → re-run the full cascade probe; the result lands\n        in the cache\n    \"\"\"\n    from agent.core.effort_probe import ProbeInconclusive, _is_thinking_unsupported, probe_effort\n\n    model = session.config.model_name\n    if _is_thinking_unsupported(error):\n        session.model_effective_effort[model] = None\n        logger.info(\"healed: %s doesn't support thinking — stripped\", model)\n    else:\n        try:\n            outcome = await probe_effort(\n                model, session.config.reasoning_effort, session.hf_token,\n            )\n            session.model_effective_effort[model] = outcome.effective_effort\n            logger.info(\n                \"healed: %s effort cascade → %s\", model, outcome.effective_effort,\n            )\n        except ProbeInconclusive:\n            # Transient during healing — strip thinking for safety, next\n            # call will either succeed or surface the real error.\n            session.model_effective_effort[model] = None\n            logger.info(\"healed: %s probe inconclusive — stripped\", model)\n\n    return _resolve_llm_params(\n        model,\n        session.hf_token,\n        reasoning_effort=session.effective_effort_for(model),\n    )\n\n\ndef _friendly_error_message(error: Exception) -> str | None:\n    \"\"\"Return a user-friendly message for known error types, or None to fall back to traceback.\"\"\"\n    err_str = str(error).lower()\n\n    if \"authentication\" in err_str or \"unauthorized\" in err_str or \"invalid x-api-key\" in err_str:\n        return (\n            \"Authentication failed — your API key is missing or invalid.\\n\\n\"\n            \"To fix this, set the API key for your model provider:\\n\"\n            \"  • Anthropic:   export ANTHROPIC_API_KEY=sk-...\\n\"\n            \"  • OpenAI:      export OPENAI_API_KEY=sk-...\\n\"\n            \"  • HF Router:   export HF_TOKEN=hf_...\\n\\n\"\n            \"You can also add it to a .env file in the project root.\\n\"\n            \"To switch models, use the /model command.\"\n        )\n\n    if \"insufficient\" in err_str and \"credit\" in err_str:\n        return (\n            \"Insufficient API credits. Please check your account balance \"\n            \"at your model provider's dashboard.\"\n        )\n\n    if \"not supported by provider\" in err_str or \"no provider supports\" in err_str:\n        return (\n            \"The model isn't served by the provider you pinned.\\n\\n\"\n            \"Drop the ':<provider>' suffix to let the HF router auto-pick a \"\n            \"provider, or use '/model' (no arg) to see which providers host \"\n            \"which models.\"\n        )\n\n    if \"model_not_found\" in err_str or (\n        \"model\" in err_str\n        and (\"not found\" in err_str or \"does not exist\" in err_str)\n    ):\n        return (\n            \"Model not found. Use '/model' to list suggestions, or paste an \"\n            \"HF model id like 'MiniMaxAI/MiniMax-M2.7'. Availability is shown \"\n            \"when you switch.\"\n        )\n\n    return None\n\n\nasync def _compact_and_notify(session: Session) -> None:\n    \"\"\"Run compaction and send event if context was reduced.\"\"\"\n    cm = session.context_manager\n    old_usage = cm.running_context_usage\n    logger.debug(\n        \"Compaction check: usage=%d, max=%d, threshold=%d, needs_compact=%s\",\n        old_usage, cm.model_max_tokens, cm.compaction_threshold, cm.needs_compaction,\n    )\n    await cm.compact(\n        model_name=session.config.model_name,\n        tool_specs=session.tool_router.get_tool_specs_for_llm(),\n        hf_token=session.hf_token,\n    )\n    new_usage = cm.running_context_usage\n    if new_usage != old_usage:\n        logger.warning(\n            \"Context compacted: %d -> %d tokens (max=%d, %d messages)\",\n            old_usage, new_usage, cm.model_max_tokens, len(cm.items),\n        )\n        await session.send_event(\n            Event(\n                event_type=\"compacted\",\n                data={\"old_tokens\": old_usage, \"new_tokens\": new_usage},\n            )\n        )\n\n\nasync def _cleanup_on_cancel(session: Session) -> None:\n    \"\"\"Kill sandbox processes and cancel HF jobs when the user interrupts.\"\"\"\n    # Kill active sandbox processes\n    sandbox = getattr(session, \"sandbox\", None)\n    if sandbox:\n        try:\n            await asyncio.to_thread(sandbox.kill_all)\n            logger.info(\"Killed sandbox processes on cancel\")\n        except Exception as e:\n            logger.warning(\"Failed to kill sandbox processes: %s\", e)\n\n    # Cancel running HF jobs\n    job_ids = list(session._running_job_ids)\n    if job_ids:\n        from huggingface_hub import HfApi\n\n        api = HfApi(token=session.hf_token)\n        for job_id in job_ids:\n            try:\n                await asyncio.to_thread(api.cancel_job, job_id=job_id)\n                logger.info(\"Cancelled HF job %s on interrupt\", job_id)\n            except Exception as e:\n                logger.warning(\"Failed to cancel HF job %s: %s\", job_id, e)\n        session._running_job_ids.clear()\n\n\n@dataclass\nclass LLMResult:\n    \"\"\"Result from an LLM call (streaming or non-streaming).\"\"\"\n    content: str | None\n    tool_calls_acc: dict[int, dict]\n    token_count: int\n    finish_reason: str | None\n\n\nasync def _call_llm_streaming(session: Session, messages, tools, llm_params) -> LLMResult:\n    \"\"\"Call the LLM with streaming, emitting assistant_chunk events.\"\"\"\n    response = None\n    _healed_effort = False  # one-shot safety net per call\n    messages, tools = with_prompt_caching(messages, tools, llm_params.get(\"model\"))\n    for _llm_attempt in range(_MAX_LLM_RETRIES):\n        try:\n            response = await acompletion(\n                messages=messages,\n                tools=tools,\n                tool_choice=\"auto\",\n                stream=True,\n                stream_options={\"include_usage\": True},\n                timeout=600,\n                **llm_params,\n            )\n            break\n        except ContextWindowExceededError:\n            raise\n        except Exception as e:\n            if not _healed_effort and _is_effort_config_error(e):\n                _healed_effort = True\n                llm_params = await _heal_effort_and_rebuild_params(session, e, llm_params)\n                await session.send_event(Event(\n                    event_type=\"tool_log\",\n                    data={\"tool\": \"system\", \"log\": \"Reasoning effort not supported for this model — adjusting and retrying.\"},\n                ))\n                continue\n            if _llm_attempt < _MAX_LLM_RETRIES - 1 and _is_transient_error(e):\n                _delay = _LLM_RETRY_DELAYS[_llm_attempt]\n                logger.warning(\n                    \"Transient LLM error (attempt %d/%d): %s — retrying in %ds\",\n                    _llm_attempt + 1, _MAX_LLM_RETRIES, e, _delay,\n                )\n                await session.send_event(Event(\n                    event_type=\"tool_log\",\n                    data={\"tool\": \"system\", \"log\": f\"LLM connection error, retrying in {_delay}s...\"},\n                ))\n                await asyncio.sleep(_delay)\n                continue\n            raise\n\n    full_content = \"\"\n    tool_calls_acc: dict[int, dict] = {}\n    token_count = 0\n    finish_reason = None\n\n    async for chunk in response:\n        if session.is_cancelled:\n            tool_calls_acc.clear()\n            break\n\n        choice = chunk.choices[0] if chunk.choices else None\n        if not choice:\n            if hasattr(chunk, \"usage\") and chunk.usage:\n                token_count = chunk.usage.total_tokens\n            continue\n\n        delta = choice.delta\n        if choice.finish_reason:\n            finish_reason = choice.finish_reason\n\n        if delta.content:\n            full_content += delta.content\n            await session.send_event(\n                Event(event_type=\"assistant_chunk\", data={\"content\": delta.content})\n            )\n\n        if delta.tool_calls:\n            for tc_delta in delta.tool_calls:\n                idx = tc_delta.index\n                if idx not in tool_calls_acc:\n                    tool_calls_acc[idx] = {\n                        \"id\": \"\", \"type\": \"function\",\n                        \"function\": {\"name\": \"\", \"arguments\": \"\"},\n                    }\n                if tc_delta.id:\n                    tool_calls_acc[idx][\"id\"] = tc_delta.id\n                if tc_delta.function:\n                    if tc_delta.function.name:\n                        tool_calls_acc[idx][\"function\"][\"name\"] += tc_delta.function.name\n                    if tc_delta.function.arguments:\n                        tool_calls_acc[idx][\"function\"][\"arguments\"] += tc_delta.function.arguments\n\n        if hasattr(chunk, \"usage\") and chunk.usage:\n            token_count = chunk.usage.total_tokens\n\n    return LLMResult(\n        content=full_content or None,\n        tool_calls_acc=tool_calls_acc,\n        token_count=token_count,\n        finish_reason=finish_reason,\n    )\n\n\nasync def _call_llm_non_streaming(session: Session, messages, tools, llm_params) -> LLMResult:\n    \"\"\"Call the LLM without streaming, emit assistant_message at the end.\"\"\"\n    response = None\n    _healed_effort = False\n    messages, tools = with_prompt_caching(messages, tools, llm_params.get(\"model\"))\n    for _llm_attempt in range(_MAX_LLM_RETRIES):\n        try:\n            response = await acompletion(\n                messages=messages,\n                tools=tools,\n                tool_choice=\"auto\",\n                stream=False,\n                timeout=600,\n                **llm_params,\n            )\n            break\n        except ContextWindowExceededError:\n            raise\n        except Exception as e:\n            if not _healed_effort and _is_effort_config_error(e):\n                _healed_effort = True\n                llm_params = await _heal_effort_and_rebuild_params(session, e, llm_params)\n                await session.send_event(Event(\n                    event_type=\"tool_log\",\n                    data={\"tool\": \"system\", \"log\": \"Reasoning effort not supported for this model — adjusting and retrying.\"},\n                ))\n                continue\n            if _llm_attempt < _MAX_LLM_RETRIES - 1 and _is_transient_error(e):\n                _delay = _LLM_RETRY_DELAYS[_llm_attempt]\n                logger.warning(\n                    \"Transient LLM error (attempt %d/%d): %s — retrying in %ds\",\n                    _llm_attempt + 1, _MAX_LLM_RETRIES, e, _delay,\n                )\n                await session.send_event(Event(\n                    event_type=\"tool_log\",\n                    data={\"tool\": \"system\", \"log\": f\"LLM connection error, retrying in {_delay}s...\"},\n                ))\n                await asyncio.sleep(_delay)\n                continue\n            raise\n\n    choice = response.choices[0]\n    message = choice.message\n    content = message.content or None\n    finish_reason = choice.finish_reason\n    token_count = response.usage.total_tokens if response.usage else 0\n\n    # Build tool_calls_acc in the same format as streaming\n    tool_calls_acc: dict[int, dict] = {}\n    if message.tool_calls:\n        for idx, tc in enumerate(message.tool_calls):\n            tool_calls_acc[idx] = {\n                \"id\": tc.id,\n                \"type\": \"function\",\n                \"function\": {\n                    \"name\": tc.function.name,\n                    \"arguments\": tc.function.arguments,\n                },\n            }\n\n    # Emit the full message as a single event\n    if content:\n        await session.send_event(\n            Event(event_type=\"assistant_message\", data={\"content\": content})\n        )\n\n    return LLMResult(\n        content=content,\n        tool_calls_acc=tool_calls_acc,\n        token_count=token_count,\n        finish_reason=finish_reason,\n    )\n\n\nclass Handlers:\n    \"\"\"Handler functions for each operation type\"\"\"\n\n    @staticmethod\n    async def _abandon_pending_approval(session: Session) -> None:\n        \"\"\"Cancel pending approval tools when the user continues the conversation.\n\n        Injects rejection tool-result messages into the LLM context (so the\n        history stays valid) and notifies the frontend that those tools were\n        abandoned.\n        \"\"\"\n        tool_calls = session.pending_approval.get(\"tool_calls\", [])\n        for tc in tool_calls:\n            tool_name = tc.function.name\n            abandon_msg = (\n                \"Task abandoned — user continued the conversation without approving.\"\n            )\n\n            # Keep LLM context valid: every tool_call needs a tool result\n            tool_msg = Message(\n                role=\"tool\",\n                content=abandon_msg,\n                tool_call_id=tc.id,\n                name=tool_name,\n            )\n            session.context_manager.add_message(tool_msg)\n\n            await session.send_event(\n                Event(\n                    event_type=\"tool_state_change\",\n                    data={\n                        \"tool_call_id\": tc.id,\n                        \"tool\": tool_name,\n                        \"state\": \"abandoned\",\n                    },\n                )\n            )\n\n        session.pending_approval = None\n        logger.info(\"Abandoned %d pending approval tool(s)\", len(tool_calls))\n\n    @staticmethod\n    async def run_agent(\n        session: Session, text: str,\n    ) -> str | None:\n        \"\"\"\n        Handle user input (like user_input_or_turn in codex.rs:1291)\n        Returns the final assistant response content, if any.\n        \"\"\"\n        # Clear any stale cancellation flag from a previous run\n        session.reset_cancel()\n\n        # If there's a pending approval and the user sent a new message,\n        # abandon the pending tools so the LLM context stays valid.\n        if text and session.pending_approval:\n            await Handlers._abandon_pending_approval(session)\n\n        # Add user message to history only if there's actual content\n        if text:\n            user_msg = Message(role=\"user\", content=text)\n            session.context_manager.add_message(user_msg)\n\n        # Send event that we're processing\n        await session.send_event(\n            Event(event_type=\"processing\", data={\"message\": \"Processing user input\"})\n        )\n\n        # Agentic loop - continue until model doesn't call tools or max iterations is reached\n        iteration = 0\n        final_response = None\n        errored = False\n        max_iterations = session.config.max_iterations\n\n        while max_iterations == -1 or iteration < max_iterations:\n            # ── Cancellation check: before LLM call ──\n            if session.is_cancelled:\n                break\n\n            # Compact before calling the LLM if context is near the limit\n            await _compact_and_notify(session)\n\n            # Doom-loop detection: break out of repeated tool call patterns\n            doom_prompt = check_for_doom_loop(session.context_manager.items)\n            if doom_prompt:\n                session.context_manager.add_message(\n                    Message(role=\"user\", content=doom_prompt)\n                )\n                await session.send_event(\n                    Event(\n                        event_type=\"tool_log\",\n                        data={\n                            \"tool\": \"system\",\n                            \"log\": \"Doom loop detected — injecting corrective prompt\",\n                        },\n                    )\n                )\n\n            messages = session.context_manager.get_messages()\n            tools = session.tool_router.get_tool_specs_for_llm()\n            try:\n                # ── Call the LLM (streaming or non-streaming) ──\n                # Pull the per-model probed effort from the session cache when\n                # available; fall back to the raw preference for models we\n                # haven't probed yet (e.g. research sub-model).\n                llm_params = _resolve_llm_params(\n                    session.config.model_name,\n                    session.hf_token,\n                    reasoning_effort=session.effective_effort_for(session.config.model_name),\n                )\n                if session.stream:\n                    llm_result = await _call_llm_streaming(session, messages, tools, llm_params)\n                else:\n                    llm_result = await _call_llm_non_streaming(session, messages, tools, llm_params)\n\n                content = llm_result.content\n                tool_calls_acc = llm_result.tool_calls_acc\n                token_count = llm_result.token_count\n                finish_reason = llm_result.finish_reason\n\n                # If output was truncated, all tool call args are garbage.\n                # Inject a system hint so the LLM retries with smaller content.\n                if finish_reason == \"length\" and tool_calls_acc:\n                    dropped_names = [\n                        tc[\"function\"][\"name\"]\n                        for tc in tool_calls_acc.values()\n                        if tc[\"function\"][\"name\"]\n                    ]\n                    logger.warning(\n                        \"Output truncated (finish_reason=length) — dropping tool calls: %s\",\n                        dropped_names,\n                    )\n                    tool_calls_acc.clear()\n\n                    # Tell the agent what happened so it can retry differently\n                    truncation_hint = (\n                        \"Your previous response was truncated because the output hit the \"\n                        \"token limit. The following tool calls were lost: \"\n                        f\"{dropped_names}. \"\n                        \"IMPORTANT: Do NOT retry with the same large content. Instead:\\n\"\n                        \"  • For 'write': use bash with cat<<'HEREDOC' to write the file, \"\n                        \"or split into several smaller edit calls.\\n\"\n                        \"  • For other tools: reduce the size of your arguments or use bash.\"\n                    )\n                    if content:\n                        assistant_msg = Message(role=\"assistant\", content=content)\n                        session.context_manager.add_message(assistant_msg, token_count)\n                    session.context_manager.add_message(\n                        Message(role=\"user\", content=f\"[SYSTEM: {truncation_hint}]\")\n                    )\n                    if session.stream:\n                        await session.send_event(\n                            Event(event_type=\"assistant_stream_end\", data={})\n                        )\n                    await session.send_event(\n                        Event(\n                            event_type=\"tool_log\",\n                            data={\"tool\": \"system\", \"log\": f\"Output truncated — retrying with smaller content ({dropped_names})\"},\n                        )\n                    )\n                    iteration += 1\n                    continue  # retry this iteration\n\n                # Build tool_calls list from accumulated deltas\n                tool_calls: list[ToolCall] = []\n                for idx in sorted(tool_calls_acc.keys()):\n                    tc_data = tool_calls_acc[idx]\n                    tool_calls.append(\n                        ToolCall(\n                            id=tc_data[\"id\"],\n                            type=\"function\",\n                            function={\n                                \"name\": tc_data[\"function\"][\"name\"],\n                                \"arguments\": tc_data[\"function\"][\"arguments\"],\n                            },\n                        )\n                    )\n\n                # Signal end of streaming to the frontend\n                if session.stream:\n                    await session.send_event(\n                        Event(event_type=\"assistant_stream_end\", data={})\n                    )\n\n                # If no tool calls, add assistant message and we're done\n                if not tool_calls:\n                    logger.debug(\n                        \"Agent loop ending: no tool calls. \"\n                        \"finish_reason=%s, token_count=%d, \"\n                        \"usage=%d, model_max_tokens=%d, \"\n                        \"iteration=%d/%d, \"\n                        \"response_text=%s\",\n                        finish_reason,\n                        token_count,\n                        session.context_manager.running_context_usage,\n                        session.context_manager.model_max_tokens,\n                        iteration,\n                        max_iterations,\n                        (content or \"\")[:500],\n                    )\n                    if content:\n                        assistant_msg = Message(role=\"assistant\", content=content)\n                        session.context_manager.add_message(assistant_msg, token_count)\n                        final_response = content\n                    break\n\n                # Validate tool call args (one json.loads per call, once)\n                # and split into good vs bad\n                good_tools: list[tuple[ToolCall, str, dict]] = []\n                bad_tools: list[ToolCall] = []\n                for tc in tool_calls:\n                    try:\n                        args = json.loads(tc.function.arguments)\n                        good_tools.append((tc, tc.function.name, args))\n                    except (json.JSONDecodeError, TypeError, ValueError):\n                        logger.warning(\n                            \"Malformed arguments for tool_call %s (%s) — skipping\",\n                            tc.id, tc.function.name,\n                        )\n                        tc.function.arguments = \"{}\"\n                        bad_tools.append(tc)\n\n                # Add assistant message with all tool calls to context\n                assistant_msg = Message(\n                    role=\"assistant\",\n                    content=content,\n                    tool_calls=tool_calls,\n                )\n                session.context_manager.add_message(assistant_msg, token_count)\n\n                # Add error results for bad tool calls so the LLM\n                # knows what happened and can retry differently\n                for tc in bad_tools:\n                    error_msg = (\n                        f\"ERROR: Tool call to '{tc.function.name}' had malformed JSON \"\n                        f\"arguments and was NOT executed. Retry with smaller content — \"\n                        f\"for 'write', split into multiple smaller writes using 'edit'.\"\n                    )\n                    session.context_manager.add_message(Message(\n                        role=\"tool\",\n                        content=error_msg,\n                        tool_call_id=tc.id,\n                        name=tc.function.name,\n                    ))\n                    await session.send_event(Event(\n                        event_type=\"tool_call\",\n                        data={\"tool\": tc.function.name, \"arguments\": {}, \"tool_call_id\": tc.id},\n                    ))\n                    await session.send_event(Event(\n                        event_type=\"tool_output\",\n                        data={\"tool\": tc.function.name, \"tool_call_id\": tc.id, \"output\": error_msg, \"success\": False},\n                    ))\n\n                # ── Cancellation check: before tool execution ──\n                if session.is_cancelled:\n                    break\n\n                # Separate good tools into approval-required vs auto-execute\n                approval_required_tools: list[tuple[ToolCall, str, dict]] = []\n                non_approval_tools: list[tuple[ToolCall, str, dict]] = []\n                for tc, tool_name, tool_args in good_tools:\n                    if _needs_approval(tool_name, tool_args, session.config):\n                        approval_required_tools.append((tc, tool_name, tool_args))\n                    else:\n                        non_approval_tools.append((tc, tool_name, tool_args))\n\n                # Execute non-approval tools (in parallel when possible)\n                if non_approval_tools:\n                    # 1. Validate args upfront\n                    parsed_tools: list[\n                        tuple[ToolCall, str, dict, bool, str]\n                    ] = []\n                    for tc, tool_name, tool_args in non_approval_tools:\n                        args_valid, error_msg = _validate_tool_args(tool_args)\n                        parsed_tools.append(\n                            (tc, tool_name, tool_args, args_valid, error_msg)\n                        )\n\n                    # 2. Send all tool_call events upfront (so frontend shows them all)\n                    for tc, tool_name, tool_args, args_valid, _ in parsed_tools:\n                        if args_valid:\n                            await session.send_event(\n                                Event(\n                                    event_type=\"tool_call\",\n                                    data={\n                                        \"tool\": tool_name,\n                                        \"arguments\": tool_args,\n                                        \"tool_call_id\": tc.id,\n                                    },\n                                )\n                            )\n\n                    # 3. Execute all valid tools in parallel, cancellable\n                    async def _exec_tool(\n                        tc: ToolCall,\n                        name: str,\n                        args: dict,\n                        valid: bool,\n                        err: str,\n                    ) -> tuple[ToolCall, str, dict, str, bool]:\n                        if not valid:\n                            return (tc, name, args, err, False)\n                        out, ok = await session.tool_router.call_tool(\n                            name, args, session=session, tool_call_id=tc.id\n                        )\n                        return (tc, name, args, out, ok)\n\n                    gather_task = asyncio.ensure_future(asyncio.gather(\n                        *[\n                            _exec_tool(tc, name, args, valid, err)\n                            for tc, name, args, valid, err in parsed_tools\n                        ]\n                    ))\n                    cancel_task = asyncio.ensure_future(session._cancelled.wait())\n\n                    done, _ = await asyncio.wait(\n                        [gather_task, cancel_task],\n                        return_when=asyncio.FIRST_COMPLETED,\n                    )\n\n                    if cancel_task in done:\n                        gather_task.cancel()\n                        try:\n                            await gather_task\n                        except asyncio.CancelledError:\n                            pass\n                        # Notify frontend that in-flight tools were cancelled\n                        for tc, name, _args, valid, _ in parsed_tools:\n                            if valid:\n                                await session.send_event(Event(\n                                    event_type=\"tool_state_change\",\n                                    data={\"tool_call_id\": tc.id, \"tool\": name, \"state\": \"cancelled\"},\n                                ))\n                        await _cleanup_on_cancel(session)\n                        break\n\n                    cancel_task.cancel()\n                    results = gather_task.result()\n\n                    # 4. Record results and send outputs (order preserved)\n                    for tc, tool_name, tool_args, output, success in results:\n                        tool_msg = Message(\n                            role=\"tool\",\n                            content=output,\n                            tool_call_id=tc.id,\n                            name=tool_name,\n                        )\n                        session.context_manager.add_message(tool_msg)\n\n                        await session.send_event(\n                            Event(\n                                event_type=\"tool_output\",\n                                data={\n                                    \"tool\": tool_name,\n                                    \"tool_call_id\": tc.id,\n                                    \"output\": output,\n                                    \"success\": success,\n                                },\n                            )\n                        )\n\n                # If there are tools requiring approval, ask for batch approval\n                if approval_required_tools:\n                    # Prepare batch approval data\n                    tools_data = []\n                    for tc, tool_name, tool_args in approval_required_tools:\n                        # Resolve sandbox file paths for hf_jobs scripts so the\n                        # frontend can display & edit the actual file content.\n                        if tool_name == \"hf_jobs\" and isinstance(tool_args.get(\"script\"), str):\n                            from agent.tools.sandbox_tool import resolve_sandbox_script\n                            sandbox = getattr(session, \"sandbox\", None)\n                            resolved, _ = await resolve_sandbox_script(sandbox, tool_args[\"script\"])\n                            if resolved:\n                                tool_args = {**tool_args, \"script\": resolved}\n\n                        tools_data.append({\n                            \"tool\": tool_name,\n                            \"arguments\": tool_args,\n                            \"tool_call_id\": tc.id,\n                        })\n\n                    await session.send_event(Event(\n                        event_type=\"approval_required\",\n                        data={\"tools\": tools_data, \"count\": len(tools_data)},\n                    ))\n\n                    # Store all approval-requiring tools (ToolCall objects for execution)\n                    session.pending_approval = {\n                        \"tool_calls\": [tc for tc, _, _ in approval_required_tools],\n                    }\n\n                    # Return early - wait for EXEC_APPROVAL operation\n                    return None\n\n                iteration += 1\n\n            except ContextWindowExceededError:\n                # Force compact and retry this iteration\n                cm = session.context_manager\n                logger.warning(\n                    \"ContextWindowExceededError at iteration %d — forcing compaction \"\n                    \"(usage=%d, model_max_tokens=%d, messages=%d)\",\n                    iteration, cm.running_context_usage, cm.model_max_tokens, len(cm.items),\n                )\n                cm.running_context_usage = cm.model_max_tokens + 1\n                await _compact_and_notify(session)\n                continue\n\n            except Exception as e:\n                import traceback\n\n                error_msg = _friendly_error_message(e)\n                if error_msg is None:\n                    error_msg = str(e) + \"\\n\" + traceback.format_exc()\n\n                await session.send_event(\n                    Event(\n                        event_type=\"error\",\n                        data={\"error\": error_msg},\n                    )\n                )\n                errored = True\n                break\n\n        if session.is_cancelled:\n            await _cleanup_on_cancel(session)\n            await session.send_event(Event(event_type=\"interrupted\"))\n        elif not errored:\n            await session.send_event(\n                Event(\n                    event_type=\"turn_complete\",\n                    data={\"history_size\": len(session.context_manager.items)},\n                )\n            )\n\n        # Increment turn counter and check for auto-save\n        session.increment_turn()\n        await session.auto_save_if_needed()\n\n        return final_response\n\n    @staticmethod\n    async def undo(session: Session) -> None:\n        \"\"\"Remove the last complete turn and notify the frontend.\"\"\"\n        removed = session.context_manager.undo_last_turn()\n        if not removed:\n            logger.warning(\"Undo: no user message found to remove\")\n        await session.send_event(Event(event_type=\"undo_complete\"))\n\n    @staticmethod\n    async def exec_approval(session: Session, approvals: list[dict]) -> None:\n        \"\"\"Handle batch job execution approval\"\"\"\n        if not session.pending_approval:\n            await session.send_event(\n                Event(\n                    event_type=\"error\",\n                    data={\"error\": \"No pending approval to process\"},\n                )\n            )\n            return\n\n        tool_calls = session.pending_approval.get(\"tool_calls\", [])\n        if not tool_calls:\n            await session.send_event(\n                Event(\n                    event_type=\"error\",\n                    data={\"error\": \"No pending tool calls found\"},\n                )\n            )\n            return\n\n        # Create a map of tool_call_id -> approval decision\n        approval_map = {a[\"tool_call_id\"]: a for a in approvals}\n        for a in approvals:\n            if a.get(\"edited_script\"):\n                logger.info(\n                    f\"Received edited script for tool_call {a['tool_call_id']} ({len(a['edited_script'])} chars)\"\n                )\n\n        # Separate approved and rejected tool calls\n        approved_tasks = []\n        rejected_tasks = []\n\n        for tc in tool_calls:\n            tool_name = tc.function.name\n            try:\n                tool_args = json.loads(tc.function.arguments)\n            except (json.JSONDecodeError, TypeError) as e:\n                # Malformed arguments — treat as failed, notify agent\n                logger.warning(f\"Malformed tool arguments for {tool_name}: {e}\")\n                tool_msg = Message(\n                    role=\"tool\",\n                    content=f\"Malformed arguments: {e}\",\n                    tool_call_id=tc.id,\n                    name=tool_name,\n                )\n                session.context_manager.add_message(tool_msg)\n                await session.send_event(\n                    Event(\n                        event_type=\"tool_output\",\n                        data={\n                            \"tool\": tool_name,\n                            \"tool_call_id\": tc.id,\n                            \"output\": f\"Malformed arguments: {e}\",\n                            \"success\": False,\n                        },\n                    )\n                )\n                continue\n\n            approval_decision = approval_map.get(tc.id, {\"approved\": False})\n\n            if approval_decision.get(\"approved\", False):\n                edited_script = approval_decision.get(\"edited_script\")\n                was_edited = False\n                if edited_script and \"script\" in tool_args:\n                    tool_args[\"script\"] = edited_script\n                    was_edited = True\n                    logger.info(f\"Using user-edited script for {tool_name} ({tc.id})\")\n                approved_tasks.append((tc, tool_name, tool_args, was_edited))\n            else:\n                rejected_tasks.append((tc, tool_name, approval_decision))\n\n        # Clear pending approval immediately so a page refresh during\n        # execution won't re-show the approval dialog.\n        session.pending_approval = None\n\n        # Notify frontend of approval decisions immediately (before execution)\n        for tc, tool_name, tool_args, _was_edited in approved_tasks:\n            await session.send_event(\n                Event(\n                    event_type=\"tool_state_change\",\n                    data={\n                        \"tool_call_id\": tc.id,\n                        \"tool\": tool_name,\n                        \"state\": \"approved\",\n                    },\n                )\n            )\n        for tc, tool_name, approval_decision in rejected_tasks:\n            await session.send_event(\n                Event(\n                    event_type=\"tool_state_change\",\n                    data={\n                        \"tool_call_id\": tc.id,\n                        \"tool\": tool_name,\n                        \"state\": \"rejected\",\n                    },\n                )\n            )\n\n        # Execute all approved tools concurrently\n        async def execute_tool(tc, tool_name, tool_args, was_edited):\n            \"\"\"Execute a single tool and return its result.\n\n            The TraceLog already exists on the frontend (created by\n            approval_required), so we send tool_state_change instead of\n            tool_call to avoid creating a duplicate.\n            \"\"\"\n            await session.send_event(\n                Event(\n                    event_type=\"tool_state_change\",\n                    data={\n                        \"tool_call_id\": tc.id,\n                        \"tool\": tool_name,\n                        \"state\": \"running\",\n                    },\n                )\n            )\n\n            output, success = await session.tool_router.call_tool(\n                tool_name, tool_args, session=session, tool_call_id=tc.id\n            )\n\n            return (tc, tool_name, output, success, was_edited)\n\n        # Execute all approved tools concurrently (cancellable)\n        if approved_tasks:\n            gather_task = asyncio.ensure_future(asyncio.gather(\n                *[\n                    execute_tool(tc, tool_name, tool_args, was_edited)\n                    for tc, tool_name, tool_args, was_edited in approved_tasks\n                ],\n                return_exceptions=True,\n            ))\n            cancel_task = asyncio.ensure_future(session._cancelled.wait())\n\n            done, _ = await asyncio.wait(\n                [gather_task, cancel_task],\n                return_when=asyncio.FIRST_COMPLETED,\n            )\n\n            if cancel_task in done:\n                gather_task.cancel()\n                try:\n                    await gather_task\n                except asyncio.CancelledError:\n                    pass\n                # Notify frontend that approved tools were cancelled\n                for tc, tool_name, _args, _was_edited in approved_tasks:\n                    await session.send_event(Event(\n                        event_type=\"tool_state_change\",\n                        data={\"tool_call_id\": tc.id, \"tool\": tool_name, \"state\": \"cancelled\"},\n                    ))\n                await _cleanup_on_cancel(session)\n                await session.send_event(Event(event_type=\"interrupted\"))\n                session.increment_turn()\n                await session.auto_save_if_needed()\n                return\n\n            cancel_task.cancel()\n            results = gather_task.result()\n\n            # Process results and add to context\n            for result in results:\n                if isinstance(result, Exception):\n                    # Handle execution error\n                    logger.error(f\"Tool execution error: {result}\")\n                    continue\n\n                tc, tool_name, output, success, was_edited = result\n\n                if was_edited:\n                    output = f\"[Note: The user edited the script before execution. The output below reflects the user-modified version, not your original script.]\\n\\n{output}\"\n\n                # Add tool result to context\n                tool_msg = Message(\n                    role=\"tool\",\n                    content=output,\n                    tool_call_id=tc.id,\n                    name=tool_name,\n                )\n                session.context_manager.add_message(tool_msg)\n\n                await session.send_event(\n                    Event(\n                        event_type=\"tool_output\",\n                        data={\n                            \"tool\": tool_name,\n                            \"tool_call_id\": tc.id,\n                            \"output\": output,\n                            \"success\": success,\n                        },\n                    )\n                )\n\n        # Process rejected tools\n        for tc, tool_name, approval_decision in rejected_tasks:\n            rejection_msg = \"Job execution cancelled by user\"\n            user_feedback = approval_decision.get(\"feedback\")\n            if user_feedback:\n                # Ensure feedback is a string and sanitize any problematic characters\n                feedback_str = str(user_feedback).strip()\n                # Remove any control characters that might break JSON parsing\n                feedback_str = \"\".join(\n                    char for char in feedback_str if ord(char) >= 32 or char in \"\\n\\t\"\n                )\n                rejection_msg += f\". User feedback: {feedback_str}\"\n\n            # Ensure rejection_msg is a clean string\n            rejection_msg = str(rejection_msg).strip()\n\n            tool_msg = Message(\n                role=\"tool\",\n                content=rejection_msg,\n                tool_call_id=tc.id,\n                name=tool_name,\n            )\n            session.context_manager.add_message(tool_msg)\n\n            await session.send_event(\n                Event(\n                    event_type=\"tool_output\",\n                    data={\n                        \"tool\": tool_name,\n                        \"tool_call_id\": tc.id,\n                        \"output\": rejection_msg,\n                        \"success\": False,\n                    },\n                )\n            )\n\n        # Continue agent loop with empty input to process the tool results\n        await Handlers.run_agent(session, \"\")\n\n    @staticmethod\n    async def shutdown(session: Session) -> bool:\n        \"\"\"Handle shutdown (like shutdown in codex.rs:1329)\"\"\"\n        # Save session trajectory if enabled (fire-and-forget, returns immediately)\n        if session.config.save_sessions:\n            logger.info(\"Saving session...\")\n            repo_id = session.config.session_dataset_repo\n            _ = session.save_and_upload_detached(repo_id)\n\n        session.is_running = False\n        await session.send_event(Event(event_type=\"shutdown\"))\n        return True\n\n\nasync def process_submission(session: Session, submission) -> bool:\n    \"\"\"\n    Process a single submission and return whether to continue running.\n\n    Returns:\n        bool: True to continue, False to shutdown\n    \"\"\"\n    op = submission.operation\n    logger.debug(\"Received operation: %s\", op.op_type.value)\n\n    if op.op_type == OpType.USER_INPUT:\n        text = op.data.get(\"text\", \"\") if op.data else \"\"\n        await Handlers.run_agent(session, text)\n        return True\n\n    if op.op_type == OpType.COMPACT:\n        await _compact_and_notify(session)\n        return True\n\n    if op.op_type == OpType.UNDO:\n        await Handlers.undo(session)\n        return True\n\n    if op.op_type == OpType.EXEC_APPROVAL:\n        approvals = op.data.get(\"approvals\", []) if op.data else []\n        await Handlers.exec_approval(session, approvals)\n        return True\n\n    if op.op_type == OpType.SHUTDOWN:\n        return not await Handlers.shutdown(session)\n\n    logger.warning(f\"Unknown operation: {op.op_type}\")\n    return True\n\n\nasync def submission_loop(\n    submission_queue: asyncio.Queue,\n    event_queue: asyncio.Queue,\n    config: Config | None = None,\n    tool_router: ToolRouter | None = None,\n    session_holder: list | None = None,\n    hf_token: str | None = None,\n    local_mode: bool = False,\n    stream: bool = True,\n) -> None:\n    \"\"\"\n    Main agent loop - processes submissions and dispatches to handlers.\n    This is the core of the agent (like submission_loop in codex.rs:1259-1340)\n    \"\"\"\n\n    # Create session with tool router\n    session = Session(\n        event_queue, config=config, tool_router=tool_router, hf_token=hf_token,\n        local_mode=local_mode, stream=stream,\n    )\n    if session_holder is not None:\n        session_holder[0] = session\n    logger.info(\"Agent loop started\")\n\n    # Retry any failed uploads from previous sessions (fire-and-forget)\n    if config and config.save_sessions:\n        Session.retry_failed_uploads_detached(\n            directory=\"session_logs\", repo_id=config.session_dataset_repo\n        )\n\n    try:\n        # Main processing loop\n        async with tool_router:\n            # Emit ready event after initialization\n            await session.send_event(\n                Event(event_type=\"ready\", data={\n                    \"message\": \"Agent initialized\",\n                    \"tool_count\": len(tool_router.tools),\n                })\n            )\n\n            while session.is_running:\n                submission = await submission_queue.get()\n\n                try:\n                    should_continue = await process_submission(session, submission)\n                    if not should_continue:\n                        break\n                except asyncio.CancelledError:\n                    logger.warning(\"Agent loop cancelled\")\n                    break\n                except Exception as e:\n                    logger.error(f\"Error in agent loop: {e}\")\n                    await session.send_event(\n                        Event(event_type=\"error\", data={\"error\": str(e)})\n                    )\n\n        logger.info(\"Agent loop exited\")\n\n    finally:\n        # Emergency save if session saving is enabled and shutdown wasn't called properly\n        if session.config.save_sessions and session.is_running:\n            logger.info(\"Emergency save: preserving session before exit...\")\n            try:\n                local_path = session.save_and_upload_detached(\n                    session.config.session_dataset_repo\n                )\n                if local_path:\n                    logger.info(\"Emergency save successful, upload in progress\")\n            except Exception as e:\n                logger.error(f\"Emergency save failed: {e}\")\n"
  },
  {
    "path": "agent/core/doom_loop.py",
    "content": "\"\"\"\nDoom-loop detection for repeated tool call patterns.\n\nDetects when the agent is stuck calling the same tools repeatedly\nand injects a corrective prompt to break the cycle.\n\"\"\"\n\nimport hashlib\nimport json\nimport logging\nfrom dataclasses import dataclass\n\nfrom litellm import Message\n\nlogger = logging.getLogger(__name__)\n\n\n@dataclass(frozen=True)\nclass ToolCallSignature:\n    \"\"\"Hashable signature for a single tool call (name + args hash).\"\"\"\n\n    name: str\n    args_hash: str\n\n\ndef _hash_args(args_str: str) -> str:\n    \"\"\"Return a short hash of the JSON arguments string.\"\"\"\n    return hashlib.md5(args_str.encode()).hexdigest()[:12]\n\n\ndef extract_recent_tool_signatures(\n    messages: list[Message], lookback: int = 30\n) -> list[ToolCallSignature]:\n    \"\"\"Extract tool call signatures from recent assistant messages.\"\"\"\n    signatures: list[ToolCallSignature] = []\n    recent = messages[-lookback:] if len(messages) > lookback else messages\n\n    for msg in recent:\n        if getattr(msg, \"role\", None) != \"assistant\":\n            continue\n        tool_calls = getattr(msg, \"tool_calls\", None)\n        if not tool_calls:\n            continue\n        for tc in tool_calls:\n            fn = getattr(tc, \"function\", None)\n            if not fn:\n                continue\n            name = getattr(fn, \"name\", \"\") or \"\"\n            args_str = getattr(fn, \"arguments\", \"\") or \"\"\n            signatures.append(ToolCallSignature(name=name, args_hash=_hash_args(args_str)))\n\n    return signatures\n\n\ndef detect_identical_consecutive(\n    signatures: list[ToolCallSignature], threshold: int = 3\n) -> str | None:\n    \"\"\"Return the tool name if threshold+ identical consecutive calls are found.\"\"\"\n    if len(signatures) < threshold:\n        return None\n\n    count = 1\n    for i in range(1, len(signatures)):\n        if signatures[i] == signatures[i - 1]:\n            count += 1\n            if count >= threshold:\n                return signatures[i].name\n        else:\n            count = 1\n\n    return None\n\n\ndef detect_repeating_sequence(\n    signatures: list[ToolCallSignature],\n) -> list[ToolCallSignature] | None:\n    \"\"\"Detect repeating patterns like [A,B,A,B] for sequences of length 2-5 with 2+ reps.\"\"\"\n    n = len(signatures)\n    for seq_len in range(2, 6):\n        min_required = seq_len * 2\n        if n < min_required:\n            continue\n\n        # Check the tail of the signatures list\n        tail = signatures[-min_required:]\n        pattern = tail[:seq_len]\n\n        # Count how many full repetitions from the end\n        reps = 0\n        for start in range(n - seq_len, -1, -seq_len):\n            chunk = signatures[start : start + seq_len]\n            if chunk == pattern:\n                reps += 1\n            else:\n                break\n\n        if reps >= 2:\n            return pattern\n\n    return None\n\n\ndef check_for_doom_loop(messages: list[Message]) -> str | None:\n    \"\"\"Check for doom loop patterns. Returns a corrective prompt or None.\"\"\"\n    signatures = extract_recent_tool_signatures(messages, lookback=30)\n    if len(signatures) < 3:\n        return None\n\n    # Check for identical consecutive calls\n    tool_name = detect_identical_consecutive(signatures, threshold=3)\n    if tool_name:\n        logger.warning(\"Doom loop detected: %d+ identical consecutive calls to '%s'\", 3, tool_name)\n        return (\n            f\"[SYSTEM: DOOM LOOP DETECTED] You have called '{tool_name}' with the same \"\n            f\"arguments multiple times in a row, getting the same result each time. \"\n            f\"STOP repeating this approach — it is not working. \"\n            f\"Step back and try a fundamentally different strategy. \"\n            f\"Consider: using a different tool, changing your arguments significantly, \"\n            f\"or explaining to the user what you're stuck on and asking for guidance.\"\n        )\n\n    # Check for repeating sequences\n    pattern = detect_repeating_sequence(signatures)\n    if pattern:\n        pattern_desc = \" → \".join(s.name for s in pattern)\n        logger.warning(\"Doom loop detected: repeating sequence [%s]\", pattern_desc)\n        return (\n            f\"[SYSTEM: DOOM LOOP DETECTED] You are stuck in a repeating cycle of tool calls: \"\n            f\"[{pattern_desc}]. This pattern has repeated multiple times without progress. \"\n            f\"STOP this cycle and try a fundamentally different approach. \"\n            f\"Consider: breaking down the problem differently, using alternative tools, \"\n            f\"or explaining to the user what you're stuck on and asking for guidance.\"\n        )\n\n    return None\n"
  },
  {
    "path": "agent/core/effort_probe.py",
    "content": "\"\"\"Probe-and-cascade for reasoning effort on /model switch.\n\nWe don't maintain a per-model capability table. Instead, the first time a\nuser picks a model we fire a 1-token ping with the same params we'd use\nfor real and walk down a cascade (``max`` → ``xhigh`` → ``high`` → …)\nuntil the provider stops rejecting us. The result is cached per-model on\nthe session, so real messages don't pay the probe cost again.\n\nThree outcomes, classified from the 400 error text:\n\n* success → cache the effort that worked\n* ``\"thinking ... not supported\"`` → model doesn't do thinking at all;\n  cache ``None`` so we stop sending thinking params\n* ``\"effort ... invalid\"`` / synonyms → cascade walks down and retries\n\nTransient errors (5xx, timeout, connection reset) bubble out as\n``ProbeInconclusive`` so the caller can complete the switch with a\nwarning instead of blocking on a flaky provider.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport logging\nfrom dataclasses import dataclass\n\nfrom litellm import acompletion\n\nfrom agent.core.llm_params import UnsupportedEffortError, _resolve_llm_params\n\nlogger = logging.getLogger(__name__)\n\n\n# Cascade: for each user-stated preference, the ordered list of levels to\n# try. First success wins. ``max`` / ``xhigh`` are Anthropic-only; providers\n# that don't accept them raise ``UnsupportedEffortError`` synchronously (no\n# wasted network round-trip) and we advance to the next level.\n_EFFORT_CASCADE: dict[str, list[str]] = {\n    \"max\":     [\"max\", \"xhigh\", \"high\", \"medium\", \"low\"],\n    \"xhigh\":   [\"xhigh\", \"high\", \"medium\", \"low\"],\n    \"high\":    [\"high\", \"medium\", \"low\"],\n    \"medium\":  [\"medium\", \"low\"],\n    \"minimal\": [\"minimal\", \"low\"],\n    \"low\":     [\"low\"],\n}\n\n_PROBE_TIMEOUT = 15.0\n_PROBE_MAX_TOKENS = 16\n\n\nclass ProbeInconclusive(Exception):\n    \"\"\"The probe couldn't reach a verdict (transient network / provider error).\n\n    Caller should complete the switch with a warning — the next real call\n    will re-surface the error if it's persistent.\n    \"\"\"\n\n\n@dataclass\nclass ProbeOutcome:\n    \"\"\"What the probe learned. ``effective_effort`` semantics match the cache:\n\n    * str → send this level\n    * None → model doesn't support thinking; strip it\n    \"\"\"\n    effective_effort: str | None\n    attempts: int\n    elapsed_ms: int\n    note: str | None = None  # e.g. \"max not supported, falling back\"\n\n\ndef _is_thinking_unsupported(e: Exception) -> bool:\n    \"\"\"Model rejected any thinking config.\n\n    Matches Anthropic's 'thinking.type.enabled is not supported for this\n    model' as well as the adaptive variant. Substring-match because the\n    exact wording shifts across API versions.\n    \"\"\"\n    s = str(e).lower()\n    return \"thinking\" in s and \"not supported\" in s\n\n\ndef _is_invalid_effort(e: Exception) -> bool:\n    \"\"\"The requested effort level isn't accepted for this model.\n\n    Covers both API responses (Anthropic/OpenAI 400 with \"invalid\", \"must\n    be one of\", etc.) and LiteLLM's local validation that fires *before*\n    the request (e.g. \"effort='max' is only supported by Claude Opus 4.6\"\n    — LiteLLM knows max is Opus-4.6-only and raises synchronously). The\n    cascade walks down on either.\n\n    Explicitly returns False when the message is really about thinking\n    itself (e.g. Anthropic's 4.7 error mentions ``output_config.effort``\n    in its fix hint, but the actual failure is ``thinking.type.enabled``\n    being unsupported). That case is caught by ``_is_thinking_unsupported``.\n    \"\"\"\n    if _is_thinking_unsupported(e):\n        return False\n    s = str(e).lower()\n    if \"effort\" not in s and \"output_config\" not in s:\n        return False\n    return any(\n        phrase in s\n        for phrase in (\n            \"invalid\", \"not supported\", \"must be one of\", \"not a valid\",\n            \"unrecognized\", \"unknown\",\n            # LiteLLM's own pre-flight validation phrasing.\n            \"only supported by\", \"is only supported\",\n        )\n    )\n\n\ndef _is_transient(e: Exception) -> bool:\n    \"\"\"Network / provider-side flake. Keep in sync with agent_loop's list.\n\n    Also matches by type for ``asyncio.TimeoutError`` — its ``str(e)`` is\n    empty, so substring matching alone misses it.\n    \"\"\"\n    if isinstance(e, (asyncio.TimeoutError, TimeoutError)):\n        return True\n    s = str(e).lower()\n    return any(\n        p in s\n        for p in (\n            \"timeout\", \"timed out\", \"429\", \"rate limit\",\n            \"503\", \"service unavailable\", \"502\", \"bad gateway\",\n            \"500\", \"internal server error\", \"overloaded\", \"capacity\",\n            \"connection reset\", \"connection refused\", \"connection error\",\n            \"eof\", \"broken pipe\",\n        )\n    )\n\n\nasync def probe_effort(\n    model_name: str,\n    preference: str | None,\n    hf_token: str | None,\n) -> ProbeOutcome:\n    \"\"\"Walk the cascade for ``preference`` on ``model_name``.\n\n    Returns the first effort the provider accepts, or ``None`` if it\n    rejects thinking altogether. Raises ``ProbeInconclusive`` only for\n    transient errors (5xx, timeout) — persistent 4xx that aren't thinking/\n    effort related bubble as the original exception so callers can surface\n    them (auth, model-not-found, quota, etc.).\n    \"\"\"\n    loop = asyncio.get_event_loop()\n    start = loop.time()\n    attempts = 0\n\n    if not preference:\n        # User explicitly turned effort off — nothing to probe. A bare\n        # ping with no thinking params is pointless; just report \"off\".\n        return ProbeOutcome(effective_effort=None, attempts=0, elapsed_ms=0)\n\n    cascade = _EFFORT_CASCADE.get(preference, [preference])\n    skipped: list[str] = []  # levels the provider rejected synchronously\n\n    last_error: Exception | None = None\n    for effort in cascade:\n        try:\n            params = _resolve_llm_params(\n                model_name, hf_token, reasoning_effort=effort, strict=True,\n            )\n        except UnsupportedEffortError:\n            # Provider can't even accept this effort name (e.g. \"max\" on\n            # HF router). Skip without a network call.\n            skipped.append(effort)\n            continue\n\n        attempts += 1\n        try:\n            await asyncio.wait_for(\n                acompletion(\n                    messages=[{\"role\": \"user\", \"content\": \"ping\"}],\n                    max_tokens=_PROBE_MAX_TOKENS,\n                    stream=False,\n                    **params,\n                ),\n                timeout=_PROBE_TIMEOUT,\n            )\n        except Exception as e:\n            last_error = e\n            if _is_thinking_unsupported(e):\n                elapsed = int((loop.time() - start) * 1000)\n                return ProbeOutcome(\n                    effective_effort=None,\n                    attempts=attempts,\n                    elapsed_ms=elapsed,\n                    note=\"model doesn't support reasoning, dropped\",\n                )\n            if _is_invalid_effort(e):\n                logger.debug(\"probe: %s rejected effort=%s, trying next\", model_name, effort)\n                continue\n            if _is_transient(e):\n                raise ProbeInconclusive(str(e)) from e\n            # Persistent non-thinking 4xx (auth, quota, model-not-found) —\n            # let the caller classify & surface.\n            raise\n        else:\n            elapsed = int((loop.time() - start) * 1000)\n            note = None\n            if effort != preference:\n                note = f\"{preference} not supported, using {effort}\"\n            return ProbeOutcome(\n                effective_effort=effort,\n                attempts=attempts,\n                elapsed_ms=elapsed,\n                note=note,\n            )\n\n    # Cascade exhausted without a success. This only happens when every\n    # level was either rejected synchronously (``UnsupportedEffortError``,\n    # e.g. preference=max on HF and we also somehow filtered all others)\n    # or the provider 400'd ``invalid effort`` on every level.\n    elapsed = int((loop.time() - start) * 1000)\n    if last_error is not None and not _is_invalid_effort(last_error):\n        raise last_error\n    note = (\n        \"no effort level accepted — proceeding without thinking\"\n        if not skipped\n        else f\"provider rejected all efforts ({', '.join(skipped)})\"\n    )\n    return ProbeOutcome(\n        effective_effort=None,\n        attempts=attempts,\n        elapsed_ms=elapsed,\n        note=note,\n    )\n"
  },
  {
    "path": "agent/core/hf_router_catalog.py",
    "content": "\"\"\"Fetch and cache the HF Inference Router model catalog.\n\nThe router exposes an OpenAI-compatible listing at\n``https://router.huggingface.co/v1/models`` with per-provider availability,\npricing, context length, and tool-use support. We use it to:\n\n  • Validate ``/model`` switches with live data instead of a hard-coded allowlist.\n  • Show the user which providers serve a model, at what price, and whether they\n    support tool calls.\n  • Derive a reasonable context-window limit for any routed model.\n\nThe listing is cached in-memory for a few minutes so repeated lookups during a\nsession are free. On fetch failure we return stale data if we have it, or an\nempty catalog otherwise.\n\"\"\"\n\nimport logging\nimport time\nfrom dataclasses import dataclass\nfrom difflib import get_close_matches\nfrom typing import Optional\n\nimport httpx\n\nlogger = logging.getLogger(__name__)\n\n_CATALOG_URL = \"https://router.huggingface.co/v1/models\"\n_CACHE_TTL_SECONDS = 300\n_HTTP_TIMEOUT_SECONDS = 5.0\n\n_cache: Optional[dict] = None\n_cache_time: float = 0.0\n\n\n@dataclass\nclass ProviderInfo:\n    provider: str\n    status: str\n    context_length: Optional[int]\n    input_price: Optional[float]\n    output_price: Optional[float]\n    supports_tools: bool\n    supports_structured_output: bool\n\n\n@dataclass\nclass ModelInfo:\n    id: str\n    providers: list[ProviderInfo]\n\n    @property\n    def live_providers(self) -> list[ProviderInfo]:\n        return [p for p in self.providers if p.status == \"live\"]\n\n    @property\n    def max_context_length(self) -> Optional[int]:\n        lengths = [p.context_length for p in self.live_providers if p.context_length]\n        return max(lengths) if lengths else None\n\n    @property\n    def any_supports_tools(self) -> bool:\n        return any(p.supports_tools for p in self.live_providers)\n\n\ndef _fetch_catalog(force: bool = False) -> dict:\n    global _cache, _cache_time\n    now = time.time()\n    if not force and _cache is not None and now - _cache_time < _CACHE_TTL_SECONDS:\n        return _cache\n    try:\n        resp = httpx.get(_CATALOG_URL, timeout=_HTTP_TIMEOUT_SECONDS)\n        resp.raise_for_status()\n        _cache = resp.json()\n        _cache_time = now\n    except Exception as e:\n        logger.warning(\"Failed to fetch HF router catalog: %s\", e)\n        if _cache is None:\n            _cache = {\"data\": []}\n            _cache_time = now\n    return _cache\n\n\ndef _parse_entry(entry: dict) -> ModelInfo:\n    providers = []\n    for p in entry.get(\"providers\", []) or []:\n        pricing = p.get(\"pricing\") or {}\n        providers.append(\n            ProviderInfo(\n                provider=p.get(\"provider\", \"\"),\n                status=p.get(\"status\", \"\"),\n                context_length=p.get(\"context_length\"),\n                input_price=pricing.get(\"input\"),\n                output_price=pricing.get(\"output\"),\n                supports_tools=bool(p.get(\"supports_tools\", False)),\n                supports_structured_output=bool(p.get(\"supports_structured_output\", False)),\n            )\n        )\n    return ModelInfo(id=entry.get(\"id\", \"\"), providers=providers)\n\n\ndef lookup(model_id: str) -> Optional[ModelInfo]:\n    \"\"\"Find a model in the router catalog.\n\n    Accepts ``<org>/<model>`` or ``<org>/<model>:<tag>`` — the tag is stripped\n    for lookup. Returns ``None`` if the model isn't listed.\n    \"\"\"\n    bare = model_id.split(\":\", 1)[0]\n    catalog = _fetch_catalog()\n    for entry in catalog.get(\"data\", []):\n        if entry.get(\"id\") == bare:\n            return _parse_entry(entry)\n    return None\n\n\ndef fuzzy_suggest(model_id: str, limit: int = 3) -> list[str]:\n    \"\"\"Return the closest model ids from the catalog.\"\"\"\n    bare = model_id.split(\":\", 1)[0]\n    catalog = _fetch_catalog()\n    ids = [e.get(\"id\", \"\") for e in catalog.get(\"data\", []) if e.get(\"id\")]\n    return get_close_matches(bare, ids, n=limit, cutoff=0.4)\n\n\ndef prewarm() -> None:\n    \"\"\"Fetch the catalog so subsequent lookups are instant. Safe to call from\n    a background task — swallows failures.\"\"\"\n    try:\n        _fetch_catalog(force=False)\n    except Exception:\n        pass\n"
  },
  {
    "path": "agent/core/llm_params.py",
    "content": "\"\"\"LiteLLM kwargs resolution for the model ids this agent accepts.\n\nKept separate from ``agent_loop`` so tools (research, context compaction, etc.)\ncan import it without pulling in the whole agent loop / tool router and\ncreating circular imports.\n\"\"\"\n\nimport os\n\n\ndef _patch_litellm_effort_validation() -> None:\n    \"\"\"Neuter LiteLLM 1.83's hardcoded effort-level validation.\n\n    Context: at ``litellm/llms/anthropic/chat/transformation.py:~1443`` the\n    Anthropic adapter validates ``output_config.effort ∈ {high, medium,\n    low, max}`` and gates ``max`` behind an ``_is_opus_4_6_model`` check\n    that only matches the substring ``opus-4-6`` / ``opus_4_6``. Result:\n\n    * ``xhigh`` — valid on Anthropic's real API for Claude 4.7 — is\n      rejected pre-flight with \"Invalid effort value: xhigh\".\n    * ``max`` on Opus 4.7 is rejected with \"effort='max' is only supported\n      by Claude Opus 4.6\", even though Opus 4.7 accepts it in practice.\n\n    We don't want to maintain a parallel model table, so we let the\n    Anthropic API itself be the validator: widen ``_is_opus_4_6_model``\n    to also match ``opus-4-7``+ families, and drop the valid-effort-set\n    check entirely. If Anthropic rejects an effort level, we see a 400\n    and the cascade walks down — exactly the behavior we want for any\n    future model family.\n\n    Removable once litellm ships 1.83.8-stable (which merges PR #25867,\n    \"Litellm day 0 opus 4.7 support\") — see commit 0868a82 on their main\n    branch. Until then, this one-time patch is the escape hatch.\n    \"\"\"\n    try:\n        from litellm.llms.anthropic.chat import transformation as _t\n    except Exception:\n        return\n\n    cfg = getattr(_t, \"AnthropicConfig\", None)\n    if cfg is None:\n        return\n\n    original = getattr(cfg, \"_is_opus_4_6_model\", None)\n    if original is None or getattr(original, \"_hf_agent_patched\", False):\n        return\n\n    def _widened(model: str) -> bool:\n        m = model.lower()\n        # Original 4.6 match plus any future Opus >= 4.6. We only need this\n        # to return True for families where \"max\" / \"xhigh\" are acceptable\n        # at the API; the cascade handles the case when they're not.\n        return any(\n            v in m for v in (\n                \"opus-4-6\", \"opus_4_6\", \"opus-4.6\", \"opus_4.6\",\n                \"opus-4-7\", \"opus_4_7\", \"opus-4.7\", \"opus_4.7\",\n            )\n        )\n\n    _widened._hf_agent_patched = True  # type: ignore[attr-defined]\n    cfg._is_opus_4_6_model = staticmethod(_widened)\n\n\n_patch_litellm_effort_validation()\n\n\n# Effort levels accepted on the wire.\n#   Anthropic (4.6+):  low | medium | high | xhigh | max   (output_config.effort)\n#   OpenAI direct:     minimal | low | medium | high       (reasoning_effort top-level)\n#   HF router:         low | medium | high                 (extra_body.reasoning_effort)\n#\n# We validate *shape* here and let the probe cascade walk down on rejection;\n# we deliberately do NOT maintain a per-model capability table.\n_ANTHROPIC_EFFORTS = {\"low\", \"medium\", \"high\", \"xhigh\", \"max\"}\n_OPENAI_EFFORTS = {\"minimal\", \"low\", \"medium\", \"high\"}\n_HF_EFFORTS = {\"low\", \"medium\", \"high\"}\n\n\nclass UnsupportedEffortError(ValueError):\n    \"\"\"The requested effort isn't valid for this provider's API surface.\n\n    Raised synchronously before any network call so the probe cascade can\n    skip levels the provider can't accept (e.g. ``max`` on HF router).\n    \"\"\"\n\n\ndef _resolve_llm_params(\n    model_name: str,\n    session_hf_token: str | None = None,\n    reasoning_effort: str | None = None,\n    strict: bool = False,\n) -> dict:\n    \"\"\"\n    Build LiteLLM kwargs for a given model id.\n\n    • ``anthropic/<model>`` — native thinking config. We bypass LiteLLM's\n      ``reasoning_effort`` → ``thinking`` mapping (which lags new Claude\n      releases like 4.7 and sends the wrong API shape). Instead we pass\n      both ``thinking={\"type\": \"adaptive\"}`` and ``output_config=\n      {\"effort\": <level>}`` as top-level kwargs — LiteLLM's Anthropic\n      adapter forwards unknown top-level kwargs into the request body\n      verbatim (confirmed by live probe; ``extra_body`` does NOT work\n      here because Anthropic's API rejects it as \"Extra inputs are not\n      permitted\"). This is the stable API for 4.6 and 4.7. Older\n      extended-thinking models that only accept ``thinking.type.enabled``\n      will reject this; the probe's cascade catches that and falls back\n      to no thinking.\n\n    • ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level\n      kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.\n\n    • Anything else is treated as a HuggingFace router id. We hit the\n      auto-routing OpenAI-compatible endpoint at\n      ``https://router.huggingface.co/v1``. The id can be bare or carry an\n      HF routing suffix (``:fastest`` / ``:cheapest`` / ``:<provider>``).\n      A leading ``huggingface/`` is stripped. ``reasoning_effort`` is\n      forwarded via ``extra_body`` (LiteLLM's OpenAI adapter refuses it as\n      a top-level kwarg for non-OpenAI models). \"minimal\" normalizes to\n      \"low\".\n\n    ``strict=True`` raises ``UnsupportedEffortError`` when the requested\n    effort isn't in the provider's accepted set, instead of silently\n    dropping it. The probe cascade uses strict mode so it can walk down\n    (``max`` → ``xhigh`` → ``high`` …) without making an API call. Regular\n    runtime callers leave ``strict=False``, so a stale cached effort\n    can't crash a turn — it just doesn't get sent.\n\n    Token precedence (first non-empty wins):\n      1. INFERENCE_TOKEN env — shared key on the hosted Space (inference is\n         free for users, billed to the Space owner via ``X-HF-Bill-To``).\n      2. session.hf_token — the user's own token (CLI / OAuth / cache file).\n      3. HF_TOKEN env — belt-and-suspenders fallback for CLI users.\n    \"\"\"\n    if model_name.startswith(\"anthropic/\"):\n        params: dict = {\"model\": model_name}\n        if reasoning_effort:\n            level = reasoning_effort\n            if level == \"minimal\":\n                level = \"low\"\n            if level not in _ANTHROPIC_EFFORTS:\n                if strict:\n                    raise UnsupportedEffortError(\n                        f\"Anthropic doesn't accept effort={level!r}\"\n                    )\n            else:\n                # Adaptive thinking + output_config.effort is the stable\n                # Anthropic API for Claude 4.6 / 4.7. Both kwargs are\n                # passed top-level: LiteLLM forwards unknown params into\n                # the request body for Anthropic, so ``output_config``\n                # reaches the API. ``extra_body`` does NOT work here —\n                # Anthropic rejects it as \"Extra inputs are not\n                # permitted\".\n                params[\"thinking\"] = {\"type\": \"adaptive\"}\n                params[\"output_config\"] = {\"effort\": level}\n        return params\n\n    if model_name.startswith(\"bedrock/\"):\n        # LiteLLM routes ``bedrock/...`` through the Converse adapter, which\n        # picks up AWS credentials from the standard env vars\n        # (``AWS_ACCESS_KEY_ID`` / ``AWS_SECRET_ACCESS_KEY`` / ``AWS_REGION``).\n        # The Anthropic thinking/effort shape is not forwarded through Converse\n        # the same way, so we leave it off for now.\n        return {\"model\": model_name}\n\n    if model_name.startswith(\"openai/\"):\n        params = {\"model\": model_name}\n        if reasoning_effort:\n            if reasoning_effort not in _OPENAI_EFFORTS:\n                if strict:\n                    raise UnsupportedEffortError(\n                        f\"OpenAI doesn't accept effort={reasoning_effort!r}\"\n                    )\n            else:\n                params[\"reasoning_effort\"] = reasoning_effort\n        return params\n\n    hf_model = model_name.removeprefix(\"huggingface/\")\n    api_key = (\n        os.environ.get(\"INFERENCE_TOKEN\")\n        or session_hf_token\n        or os.environ.get(\"HF_TOKEN\")\n    )\n    params = {\n        \"model\": f\"openai/{hf_model}\",\n        \"api_base\": \"https://router.huggingface.co/v1\",\n        \"api_key\": api_key,\n    }\n    if os.environ.get(\"INFERENCE_TOKEN\"):\n        bill_to = os.environ.get(\"HF_BILL_TO\", \"smolagents\")\n        params[\"extra_headers\"] = {\"X-HF-Bill-To\": bill_to}\n    if reasoning_effort:\n        hf_level = \"low\" if reasoning_effort == \"minimal\" else reasoning_effort\n        if hf_level not in _HF_EFFORTS:\n            if strict:\n                raise UnsupportedEffortError(\n                    f\"HF router doesn't accept effort={hf_level!r}\"\n                )\n        else:\n            params[\"extra_body\"] = {\"reasoning_effort\": hf_level}\n    return params\n"
  },
  {
    "path": "agent/core/model_switcher.py",
    "content": "\"\"\"Model-switching logic for the interactive CLI's ``/model`` command.\n\nSplit out of ``agent.main`` so the REPL dispatcher stays focused on input\nparsing. Exposes:\n\n* ``SUGGESTED_MODELS`` — the short list shown by ``/model`` with no arg.\n* ``is_valid_model_id`` — loose format check on user input.\n* ``probe_and_switch_model`` — async: checks routing, fires a 1-token\n  probe to resolve the effort cascade, then commits the switch (or\n  rejects it on hard error).\n\nThe probe's cascade lives in ``agent.core.effort_probe``; this module\nglues it to CLI output + session state.\n\"\"\"\n\nfrom __future__ import annotations\n\nfrom agent.core.effort_probe import ProbeInconclusive, probe_effort\n\n\n# Suggested models shown by `/model` (not a gate). Users can paste any HF\n# model id (e.g. \"MiniMaxAI/MiniMax-M2.7\") or an `anthropic/` / `openai/`\n# prefix for direct API access. For HF ids, append \":fastest\" /\n# \":cheapest\" / \":preferred\" / \":<provider>\" to override the default\n# routing policy (auto = fastest with failover).\nSUGGESTED_MODELS = [\n    {\"id\": \"bedrock/us.anthropic.claude-opus-4-7\", \"label\": \"Claude Opus 4.7\"},\n    {\"id\": \"bedrock/us.anthropic.claude-opus-4-6-v1\", \"label\": \"Claude Opus 4.6\"},\n    {\"id\": \"MiniMaxAI/MiniMax-M2.7\", \"label\": \"MiniMax M2.7\"},\n    {\"id\": \"moonshotai/Kimi-K2.6\", \"label\": \"Kimi K2.6\"},\n    {\"id\": \"zai-org/GLM-5.1\", \"label\": \"GLM 5.1\"},\n]\n\n\n_ROUTING_POLICIES = {\"fastest\", \"cheapest\", \"preferred\"}\n\n\ndef is_valid_model_id(model_id: str) -> bool:\n    \"\"\"Loose format check — lets users pick any model id.\n\n    Accepts:\n      • anthropic/<model>\n      • openai/<model>\n      • <org>/<model>[:<tag>]            (HF router; tag = provider or policy)\n      • huggingface/<org>/<model>[:<tag>] (same, accepts legacy prefix)\n\n    Actual availability is verified against the HF router catalog on\n    switch, and by the provider on the probe's ping call.\n    \"\"\"\n    if not model_id or \"/\" not in model_id:\n        return False\n    head = model_id.split(\":\", 1)[0]\n    parts = head.split(\"/\")\n    return len(parts) >= 2 and all(parts)\n\n\ndef _print_hf_routing_info(model_id: str, console) -> bool:\n    \"\"\"Show HF router catalog info (providers, price, context, tool support)\n    for an HF-router model id. Returns ``True`` to signal the caller can\n    proceed with the switch, ``False`` to indicate a hard problem the user\n    should notice before we fire the effort probe.\n\n    Anthropic / OpenAI ids return ``True`` without printing anything —\n    the probe below covers \"does this model exist\".\n    \"\"\"\n    if model_id.startswith((\"anthropic/\", \"openai/\")):\n        return True\n\n    from agent.core import hf_router_catalog as cat\n\n    bare, _, tag = model_id.partition(\":\")\n    info = cat.lookup(bare)\n    if info is None:\n        console.print(\n            f\"[bold red]Warning:[/bold red] '{bare}' isn't in the HF router \"\n            \"catalog. Checking anyway — first call may fail.\"\n        )\n        suggestions = cat.fuzzy_suggest(bare)\n        if suggestions:\n            console.print(f\"[dim]Did you mean: {', '.join(suggestions)}[/dim]\")\n        return True\n\n    live = info.live_providers\n    if not live:\n        console.print(\n            f\"[bold red]Warning:[/bold red] '{bare}' has no live providers \"\n            \"right now. First call will likely fail.\"\n        )\n        return True\n\n    if tag and tag not in _ROUTING_POLICIES:\n        matched = [p for p in live if p.provider == tag]\n        if not matched:\n            names = \", \".join(p.provider for p in live)\n            console.print(\n                f\"[bold red]Warning:[/bold red] provider '{tag}' doesn't serve \"\n                f\"'{bare}'. Live providers: {names}. Checking anyway.\"\n            )\n\n    if not info.any_supports_tools:\n        console.print(\n            f\"[bold red]Warning:[/bold red] no provider for '{bare}' advertises \"\n            \"tool-call support. This agent relies on tool calls — expect errors.\"\n        )\n\n    if tag in _ROUTING_POLICIES:\n        policy = tag\n    elif tag:\n        policy = f\"pinned to {tag}\"\n    else:\n        policy = \"auto (fastest)\"\n    console.print(f\"  [dim]routing: {policy}[/dim]\")\n    for p in live:\n        price = (\n            f\"${p.input_price:g}/${p.output_price:g} per M tok\"\n            if p.input_price is not None and p.output_price is not None\n            else \"price n/a\"\n        )\n        ctx = f\"{p.context_length:,} ctx\" if p.context_length else \"ctx n/a\"\n        tools = \"tools\" if p.supports_tools else \"no tools\"\n        console.print(\n            f\"  [dim]{p.provider}: {price}, {ctx}, {tools}[/dim]\"\n        )\n    return True\n\n\ndef print_model_listing(config, console) -> None:\n    \"\"\"Render the default ``/model`` (no-arg) view: current + suggested.\"\"\"\n    current = config.model_name if config else \"\"\n    console.print(\"[bold]Current model:[/bold]\")\n    console.print(f\"  {current}\")\n    console.print(\"\\n[bold]Suggested:[/bold]\")\n    for m in SUGGESTED_MODELS:\n        marker = \" [dim]<-- current[/dim]\" if m[\"id\"] == current else \"\"\n        console.print(f\"  {m['id']}  [dim]({m['label']})[/dim]{marker}\")\n    console.print(\n        \"\\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\\n\"\n        \"Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\\n\"\n        \"Use 'anthropic/<model>' or 'openai/<model>' for direct API access.[/dim]\"\n    )\n\n\ndef print_invalid_id(arg: str, console) -> None:\n    console.print(f\"[bold red]Invalid model id format:[/bold red] {arg}\")\n    console.print(\n        \"[dim]Expected:\\n\"\n        \"  • <org>/<model>[:tag]    (HF router — paste from huggingface.co)\\n\"\n        \"  • anthropic/<model>\\n\"\n        \"  • openai/<model>[/dim]\"\n    )\n\n\nasync def probe_and_switch_model(\n    model_id: str,\n    config,\n    session,\n    console,\n    hf_token: str | None,\n) -> None:\n    \"\"\"Validate model+effort with a 1-token ping, cache the effective effort,\n    then commit the switch.\n\n    Three visible outcomes:\n\n    * ✓ ``effort: <level>`` — model accepted the preferred effort (or a\n      fallback from the cascade; the note explains if so)\n    * ✓ ``effort: off`` — model doesn't support thinking; we'll strip it\n    * ✗ hard error (auth, model-not-found, quota) — we reject the switch\n      and keep the current model so the user isn't stranded\n\n    Transient errors (5xx, timeout) complete the switch with a yellow\n    warning; the next real call re-surfaces the error if it's persistent.\n    \"\"\"\n    preference = config.reasoning_effort\n    if not _print_hf_routing_info(model_id, console):\n        return\n\n    if not preference:\n        # Nothing to validate with a ping that we couldn't validate on the\n        # first real call just as cheaply. Skip the probe entirely.\n        _commit_switch(model_id, config, session, effective=None, cache=False)\n        console.print(f\"[green]Model switched to {model_id}[/green] [dim](effort: off)[/dim]\")\n        return\n\n    console.print(f\"[dim]checking {model_id} (effort: {preference})...[/dim]\")\n    try:\n        outcome = await probe_effort(model_id, preference, hf_token)\n    except ProbeInconclusive as e:\n        _commit_switch(model_id, config, session, effective=None, cache=False)\n        console.print(\n            f\"[yellow]Model switched to {model_id}[/yellow] \"\n            f\"[dim](couldn't validate: {e}; will verify on first message)[/dim]\"\n        )\n        return\n    except Exception as e:\n        # Hard persistent error — auth, unknown model, quota. Don't switch.\n        console.print(f\"[bold red]Switch failed:[/bold red] {e}\")\n        console.print(f\"[dim]Keeping current model: {config.model_name}[/dim]\")\n        return\n\n    _commit_switch(\n        model_id, config, session,\n        effective=outcome.effective_effort, cache=True,\n    )\n    effort_label = outcome.effective_effort or \"off\"\n    suffix = f\" — {outcome.note}\" if outcome.note else \"\"\n    console.print(\n        f\"[green]Model switched to {model_id}[/green] \"\n        f\"[dim](effort: {effort_label}{suffix}, {outcome.elapsed_ms}ms)[/dim]\"\n    )\n\n\ndef _commit_switch(model_id, config, session, effective, cache: bool) -> None:\n    \"\"\"Apply the switch to the session (or bare config if no session yet).\n\n    ``effective`` is the probe's resolved effort; ``cache=True`` stores it\n    in the session's per-model cache so real calls use the resolved level\n    instead of re-probing. ``cache=False`` (inconclusive probe / effort\n    off) leaves the cache untouched — next call falls back to preference.\n    \"\"\"\n    if session is not None:\n        session.update_model(model_id)\n        if cache:\n            session.model_effective_effort[model_id] = effective\n        else:\n            session.model_effective_effort.pop(model_id, None)\n    else:\n        config.model_name = model_id\n"
  },
  {
    "path": "agent/core/prompt_caching.py",
    "content": "\"\"\"Anthropic prompt caching breakpoints for outgoing LLM requests.\n\nCaching is GA on Anthropic's API and natively supported by litellm >=1.83\nvia ``cache_control`` blocks. We apply two breakpoints (out of 4 allowed):\n\n  1. The tool block — caches all tool definitions as a single prefix.\n  2. The system message — caches the rendered system prompt.\n\nTogether these cover the ~4-5K static tokens that were being re-billed on\nevery turn. Subsequent turns within the 5-minute TTL hit cache_read pricing\n(~10% of input cost) instead of full input.\n\nNon-Anthropic models (HF router, OpenAI) are passed through unchanged.\n\"\"\"\n\nfrom typing import Any\n\n\ndef with_prompt_caching(\n    messages: list[Any],\n    tools: list[dict] | None,\n    model_name: str | None,\n) -> tuple[list[Any], list[dict] | None]:\n    \"\"\"Return (messages, tools) with cache_control breakpoints for Anthropic.\n\n    No-op for non-Anthropic models. Original objects are not mutated; a fresh\n    list with replaced first message and last tool is returned, so callers\n    that share the underlying ``ContextManager.items`` list don't see their\n    persisted history rewritten.\n    \"\"\"\n    if not model_name or \"anthropic\" not in model_name:\n        return messages, tools\n\n    if tools:\n        new_tools = list(tools)\n        last = dict(new_tools[-1])\n        last[\"cache_control\"] = {\"type\": \"ephemeral\"}\n        new_tools[-1] = last\n        tools = new_tools\n\n    if messages:\n        first = messages[0]\n        role = first.get(\"role\") if isinstance(first, dict) else getattr(first, \"role\", None)\n        if role == \"system\":\n            content = (\n                first.get(\"content\")\n                if isinstance(first, dict)\n                else getattr(first, \"content\", None)\n            )\n            if isinstance(content, str) and content:\n                cached_block = [{\n                    \"type\": \"text\",\n                    \"text\": content,\n                    \"cache_control\": {\"type\": \"ephemeral\"},\n                }]\n                new_first = {\"role\": \"system\", \"content\": cached_block}\n                messages = [new_first] + list(messages[1:])\n\n    return messages, tools\n"
  },
  {
    "path": "agent/core/session.py",
    "content": "import asyncio\nimport json\nimport logging\nimport subprocess\nimport sys\nimport uuid\nfrom dataclasses import dataclass\nfrom datetime import datetime\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import Any, Optional\n\nfrom agent.config import Config\nfrom agent.context_manager.manager import ContextManager\n\nlogger = logging.getLogger(__name__)\n\n_DEFAULT_MAX_TOKENS = 200_000\n\n\ndef _get_max_tokens_safe(model_name: str) -> int:\n    \"\"\"Return the max input-context tokens for a model.\n\n    Primary source: ``litellm.get_model_info(model)['max_input_tokens']`` —\n    LiteLLM maintains an upstream catalog that knows Claude Opus 4.6 is\n    1M, GPT-5 is 272k, Sonnet 4.5 is 200k, and so on. Strips any HF routing\n    suffix / huggingface/ prefix so tagged ids ('moonshotai/Kimi-K2.6:cheapest')\n    look up the bare model. Falls back to a conservative 200k default for\n    models not in the catalog (typically HF-router-only models).\n    \"\"\"\n    from litellm import get_model_info\n\n    candidates = [model_name]\n    stripped = model_name.removeprefix(\"huggingface/\").split(\":\", 1)[0]\n    if stripped != model_name:\n        candidates.append(stripped)\n    for candidate in candidates:\n        try:\n            info = get_model_info(candidate)\n            max_input = info.get(\"max_input_tokens\") if info else None\n            if isinstance(max_input, int) and max_input > 0:\n                return max_input\n        except Exception:\n            continue\n    logger.info(\n        \"No litellm.get_model_info entry for %s, falling back to %d\",\n        model_name, _DEFAULT_MAX_TOKENS,\n    )\n    return _DEFAULT_MAX_TOKENS\n\n\nclass OpType(Enum):\n    USER_INPUT = \"user_input\"\n    EXEC_APPROVAL = \"exec_approval\"\n    INTERRUPT = \"interrupt\"\n    UNDO = \"undo\"\n    COMPACT = \"compact\"\n    SHUTDOWN = \"shutdown\"\n\n\n@dataclass\nclass Event:\n    event_type: str\n    data: Optional[dict[str, Any]] = None\n\n\nclass Session:\n    \"\"\"\n    Maintains agent session state\n    Similar to Session in codex-rs/core/src/codex.rs\n    \"\"\"\n\n    def __init__(\n        self,\n        event_queue: asyncio.Queue,\n        config: Config | None = None,\n        tool_router=None,\n        context_manager: ContextManager | None = None,\n        hf_token: str | None = None,\n        local_mode: bool = False,\n        stream: bool = True,\n    ):\n        self.hf_token: Optional[str] = hf_token\n        self.tool_router = tool_router\n        self.stream = stream\n        tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []\n        self.context_manager = context_manager or ContextManager(\n            model_max_tokens=_get_max_tokens_safe(config.model_name),\n            compact_size=0.1,\n            untouched_messages=5,\n            tool_specs=tool_specs,\n            hf_token=hf_token,\n            local_mode=local_mode,\n        )\n        self.event_queue = event_queue\n        self.session_id = str(uuid.uuid4())\n        self.config = config or Config(\n            model_name=\"bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0\",\n        )\n        self.is_running = True\n        self._cancelled = asyncio.Event()\n        self.pending_approval: Optional[dict[str, Any]] = None\n        self.sandbox = None\n        self._running_job_ids: set[str] = set()  # HF job IDs currently executing\n\n        # Session trajectory logging\n        self.logged_events: list[dict] = []\n        self.session_start_time = datetime.now().isoformat()\n        self.turn_count: int = 0\n        self.last_auto_save_turn: int = 0\n\n        # Per-model probed reasoning-effort cache. Populated by the probe\n        # on /model switch, read by ``effective_effort_for`` below. Keys are\n        # raw model ids (including any ``:tag``). Values:\n        #   str  → the effort level to send (may be a downgrade from the\n        #          preference, e.g. \"high\" when user asked for \"max\")\n        #   None → model rejected all efforts in the cascade; send no\n        #          thinking params at all\n        # Key absent → not probed yet; fall back to the raw preference.\n        self.model_effective_effort: dict[str, str | None] = {}\n\n    async def send_event(self, event: Event) -> None:\n        \"\"\"Send event back to client and log to trajectory\"\"\"\n        await self.event_queue.put(event)\n\n        # Log event to trajectory\n        self.logged_events.append(\n            {\n                \"timestamp\": datetime.now().isoformat(),\n                \"event_type\": event.event_type,\n                \"data\": event.data,\n            }\n        )\n\n    def cancel(self) -> None:\n        \"\"\"Signal cancellation to the running agent loop.\"\"\"\n        self._cancelled.set()\n\n    def reset_cancel(self) -> None:\n        \"\"\"Clear the cancellation flag before a new run.\"\"\"\n        self._cancelled.clear()\n\n    @property\n    def is_cancelled(self) -> bool:\n        return self._cancelled.is_set()\n\n    def update_model(self, model_name: str) -> None:\n        \"\"\"Switch the active model and update the context window limit.\"\"\"\n        self.config.model_name = model_name\n        self.context_manager.model_max_tokens = _get_max_tokens_safe(model_name)\n\n    def effective_effort_for(self, model_name: str) -> str | None:\n        \"\"\"Resolve the effort level to actually send for ``model_name``.\n\n        Returns the probed result when we have one (may be ``None`` meaning\n        \"model doesn't do thinking, strip it\"), else the raw preference.\n        Unknown-model case falls back to the preference so a stale cache\n        from a prior ``/model`` can't poison research sub-calls that use a\n        different model id.\n        \"\"\"\n        if model_name in self.model_effective_effort:\n            return self.model_effective_effort[model_name]\n        return self.config.reasoning_effort\n\n    def increment_turn(self) -> None:\n        \"\"\"Increment turn counter (called after each user interaction)\"\"\"\n        self.turn_count += 1\n\n    async def auto_save_if_needed(self) -> None:\n        \"\"\"Check if auto-save should trigger and save if so (completely non-blocking)\"\"\"\n        if not self.config.save_sessions:\n            return\n\n        interval = self.config.auto_save_interval\n        if interval <= 0:\n            return\n\n        turns_since_last_save = self.turn_count - self.last_auto_save_turn\n        if turns_since_last_save >= interval:\n            logger.info(f\"Auto-saving session (turn {self.turn_count})...\")\n            # Fire-and-forget save - returns immediately\n            self.save_and_upload_detached(self.config.session_dataset_repo)\n            self.last_auto_save_turn = self.turn_count\n\n    def get_trajectory(self) -> dict:\n        \"\"\"Serialize complete session trajectory for logging\"\"\"\n        return {\n            \"session_id\": self.session_id,\n            \"session_start_time\": self.session_start_time,\n            \"session_end_time\": datetime.now().isoformat(),\n            \"model_name\": self.config.model_name,\n            \"messages\": [msg.model_dump() for msg in self.context_manager.items],\n            \"events\": self.logged_events,\n        }\n\n    def save_trajectory_local(\n        self,\n        directory: str = \"session_logs\",\n        upload_status: str = \"pending\",\n        dataset_url: Optional[str] = None,\n    ) -> Optional[str]:\n        \"\"\"\n        Save trajectory to local JSON file as backup with upload status\n\n        Args:\n            directory: Directory to save logs (default: \"session_logs\")\n            upload_status: Status of upload attempt (\"pending\", \"success\", \"failed\")\n            dataset_url: URL of dataset if upload succeeded\n\n        Returns:\n            Path to saved file if successful, None otherwise\n        \"\"\"\n        try:\n            log_dir = Path(directory)\n            log_dir.mkdir(parents=True, exist_ok=True)\n\n            trajectory = self.get_trajectory()\n\n            # Add upload metadata\n            trajectory[\"upload_status\"] = upload_status\n            trajectory[\"upload_url\"] = dataset_url\n            trajectory[\"last_save_time\"] = datetime.now().isoformat()\n\n            filename = f\"session_{self.session_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json\"\n            filepath = log_dir / filename\n\n            with open(filepath, \"w\") as f:\n                json.dump(trajectory, f, indent=2)\n\n            return str(filepath)\n        except Exception as e:\n            logger.error(f\"Failed to save session locally: {e}\")\n            return None\n\n    def update_local_save_status(\n        self, filepath: str, upload_status: str, dataset_url: Optional[str] = None\n    ) -> bool:\n        \"\"\"Update the upload status of an existing local save file\"\"\"\n        try:\n            with open(filepath, \"r\") as f:\n                data = json.load(f)\n\n            data[\"upload_status\"] = upload_status\n            data[\"upload_url\"] = dataset_url\n            data[\"last_save_time\"] = datetime.now().isoformat()\n\n            with open(filepath, \"w\") as f:\n                json.dump(data, f, indent=2)\n\n            return True\n        except Exception as e:\n            logger.error(f\"Failed to update local save status: {e}\")\n            return False\n\n    def save_and_upload_detached(self, repo_id: str) -> Optional[str]:\n        \"\"\"\n        Save session locally and spawn detached subprocess for upload (fire-and-forget)\n\n        Args:\n            repo_id: HuggingFace dataset repo ID\n\n        Returns:\n            Path to local save file\n        \"\"\"\n        # Save locally first (fast, synchronous)\n        local_path = self.save_trajectory_local(upload_status=\"pending\")\n        if not local_path:\n            return None\n\n        # Spawn detached subprocess for upload (fire-and-forget)\n        try:\n            uploader_script = Path(__file__).parent / \"session_uploader.py\"\n\n            # Use Popen with detached process\n            subprocess.Popen(\n                [sys.executable, str(uploader_script), \"upload\", local_path, repo_id],\n                stdin=subprocess.DEVNULL,\n                stdout=subprocess.DEVNULL,\n                stderr=subprocess.DEVNULL,\n                start_new_session=True,  # Detach from parent\n            )\n        except Exception as e:\n            logger.warning(f\"Failed to spawn upload subprocess: {e}\")\n\n        return local_path\n\n    @staticmethod\n    def retry_failed_uploads_detached(\n        directory: str = \"session_logs\", repo_id: Optional[str] = None\n    ) -> None:\n        \"\"\"\n        Spawn detached subprocess to retry failed/pending uploads (fire-and-forget)\n\n        Args:\n            directory: Directory containing session logs\n            repo_id: Target dataset repo ID\n        \"\"\"\n        if not repo_id:\n            return\n\n        try:\n            uploader_script = Path(__file__).parent / \"session_uploader.py\"\n\n            # Spawn detached subprocess for retry\n            subprocess.Popen(\n                [sys.executable, str(uploader_script), \"retry\", directory, repo_id],\n                stdin=subprocess.DEVNULL,\n                stdout=subprocess.DEVNULL,\n                stderr=subprocess.DEVNULL,\n                start_new_session=True,  # Detach from parent\n            )\n        except Exception as e:\n            logger.warning(f\"Failed to spawn retry subprocess: {e}\")\n"
  },
  {
    "path": "agent/core/session_uploader.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nStandalone script for uploading session trajectories to HuggingFace.\nThis runs as a separate process to avoid blocking the main agent.\nUses individual file uploads to avoid race conditions.\n\"\"\"\n\nimport json\nimport os\nimport sys\nfrom datetime import datetime\nfrom pathlib import Path\n\nfrom dotenv import load_dotenv\n\nload_dotenv()\n\n# Token for session uploads — loaded from env var (never hardcode tokens in source)\n_SESSION_TOKEN = os.environ.get(\"HF_SESSION_UPLOAD_TOKEN\", \"\")\n\n\ndef upload_session_as_file(\n    session_file: str, repo_id: str, max_retries: int = 3\n) -> bool:\n    \"\"\"\n    Upload a single session as an individual JSONL file (no race conditions)\n\n    Args:\n        session_file: Path to local session JSON file\n        repo_id: HuggingFace dataset repo ID\n        max_retries: Number of retry attempts\n\n    Returns:\n        True if successful, False otherwise\n    \"\"\"\n    try:\n        from huggingface_hub import HfApi\n    except ImportError:\n        print(\"Error: huggingface_hub library not available\", file=sys.stderr)\n        return False\n\n    try:\n        # Load session data\n        with open(session_file, \"r\") as f:\n            data = json.load(f)\n\n        # Check if already uploaded\n        upload_status = data.get(\"upload_status\")\n        if upload_status == \"success\":\n            return True\n\n        # Use dedicated session upload token (write-only access to session dataset)\n        hf_token = _SESSION_TOKEN\n        if not hf_token:\n            # Update status to failed\n            data[\"upload_status\"] = \"failed\"\n            with open(session_file, \"w\") as f:\n                json.dump(data, f, indent=2)\n            return False\n\n        # Prepare JSONL content (single line)\n        # Store messages and events as JSON strings to avoid schema conflicts\n        session_row = {\n            \"session_id\": data[\"session_id\"],\n            \"session_start_time\": data[\"session_start_time\"],\n            \"session_end_time\": data[\"session_end_time\"],\n            \"model_name\": data[\"model_name\"],\n            \"messages\": json.dumps(data[\"messages\"]),\n            \"events\": json.dumps(data[\"events\"]),\n        }\n\n        # Create temporary JSONL file\n        import tempfile\n\n        with tempfile.NamedTemporaryFile(\n            mode=\"w\", suffix=\".jsonl\", delete=False\n        ) as tmp:\n            json.dump(session_row, tmp)  # Single line JSON\n            tmp_path = tmp.name\n\n        try:\n            # Generate unique path in repo: sessions/YYYY-MM-DD/session_id.jsonl\n            session_id = data[\"session_id\"]\n            date_str = datetime.fromisoformat(data[\"session_start_time\"]).strftime(\n                \"%Y-%m-%d\"\n            )\n            repo_path = f\"sessions/{date_str}/{session_id}.jsonl\"\n\n            # Upload with retries\n            api = HfApi()\n            for attempt in range(max_retries):\n                try:\n                    # Try to create repo if it doesn't exist (idempotent)\n                    try:\n                        api.create_repo(\n                            repo_id=repo_id,\n                            repo_type=\"dataset\",\n                            private=False,\n                            token=hf_token,\n                            exist_ok=True,  # Don't fail if already exists\n                        )\n\n                    except Exception:\n                        # Repo might already exist, continue\n                        pass\n\n                    # Upload the session file\n                    api.upload_file(\n                        path_or_fileobj=tmp_path,\n                        path_in_repo=repo_path,\n                        repo_id=repo_id,\n                        repo_type=\"dataset\",\n                        token=hf_token,\n                        commit_message=f\"Add session {session_id}\",\n                    )\n\n                    # Update local status to success\n                    data[\"upload_status\"] = \"success\"\n                    data[\"upload_url\"] = f\"https://huggingface.co/datasets/{repo_id}\"\n                    with open(session_file, \"w\") as f:\n                        json.dump(data, f, indent=2)\n\n                    return True\n\n                except Exception:\n                    if attempt < max_retries - 1:\n                        import time\n\n                        wait_time = 2**attempt\n                        time.sleep(wait_time)\n                    else:\n                        # Final attempt failed\n                        data[\"upload_status\"] = \"failed\"\n                        with open(session_file, \"w\") as f:\n                            json.dump(data, f, indent=2)\n                        return False\n\n        finally:\n            # Clean up temp file\n            try:\n                os.unlink(tmp_path)\n            except Exception:\n                pass\n\n    except Exception as e:\n        print(f\"Error uploading session: {e}\", file=sys.stderr)\n        return False\n\n\ndef retry_failed_uploads(directory: str, repo_id: str):\n    \"\"\"Retry all failed/pending uploads in a directory\"\"\"\n    log_dir = Path(directory)\n    if not log_dir.exists():\n        return\n\n    session_files = list(log_dir.glob(\"session_*.json\"))\n\n    for filepath in session_files:\n        try:\n            with open(filepath, \"r\") as f:\n                data = json.load(f)\n\n            upload_status = data.get(\"upload_status\", \"unknown\")\n\n            # Only retry pending or failed uploads\n            if upload_status in [\"pending\", \"failed\"]:\n                upload_session_as_file(str(filepath), repo_id)\n\n        except Exception:\n            pass\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) < 3:\n        print(\"Usage: session_uploader.py <command> <args...>\")\n        sys.exit(1)\n\n    command = sys.argv[1]\n\n    if command == \"upload\":\n        # python session_uploader.py upload <session_file> <repo_id>\n        if len(sys.argv) < 4:\n            print(\"Usage: session_uploader.py upload <session_file> <repo_id>\")\n            sys.exit(1)\n        session_file = sys.argv[2]\n        repo_id = sys.argv[3]\n        success = upload_session_as_file(session_file, repo_id)\n        sys.exit(0 if success else 1)\n\n    elif command == \"retry\":\n        # python session_uploader.py retry <directory> <repo_id>\n        if len(sys.argv) < 4:\n            print(\"Usage: session_uploader.py retry <directory> <repo_id>\")\n            sys.exit(1)\n        directory = sys.argv[2]\n        repo_id = sys.argv[3]\n        retry_failed_uploads(directory, repo_id)\n        sys.exit(0)\n\n    else:\n        print(f\"Unknown command: {command}\")\n        sys.exit(1)\n"
  },
  {
    "path": "agent/core/tools.py",
    "content": "\"\"\"\nTool system for the agent\nProvides ToolSpec and ToolRouter for managing both built-in and MCP tools\n\"\"\"\n\nimport logging\nimport warnings\nfrom dataclasses import dataclass\nfrom typing import Any, Awaitable, Callable, Optional\n\nlogger = logging.getLogger(__name__)\n\nfrom fastmcp import Client\nfrom fastmcp.exceptions import ToolError\nfrom mcp.types import EmbeddedResource, ImageContent, TextContent\n\nfrom agent.config import MCPServerConfig\nfrom agent.tools.dataset_tools import (\n    HF_INSPECT_DATASET_TOOL_SPEC,\n    hf_inspect_dataset_handler,\n)\nfrom agent.tools.docs_tools import (\n    EXPLORE_HF_DOCS_TOOL_SPEC,\n    HF_DOCS_FETCH_TOOL_SPEC,\n    explore_hf_docs_handler,\n    hf_docs_fetch_handler,\n)\nfrom agent.tools.github_find_examples import (\n    GITHUB_FIND_EXAMPLES_TOOL_SPEC,\n    github_find_examples_handler,\n)\nfrom agent.tools.github_list_repos import (\n    GITHUB_LIST_REPOS_TOOL_SPEC,\n    github_list_repos_handler,\n)\nfrom agent.tools.github_read_file import (\n    GITHUB_READ_FILE_TOOL_SPEC,\n    github_read_file_handler,\n)\nfrom agent.tools.hf_repo_files_tool import (\n    HF_REPO_FILES_TOOL_SPEC,\n    hf_repo_files_handler,\n)\nfrom agent.tools.hf_repo_git_tool import (\n    HF_REPO_GIT_TOOL_SPEC,\n    hf_repo_git_handler,\n)\nfrom agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler\nfrom agent.tools.papers_tool import HF_PAPERS_TOOL_SPEC, hf_papers_handler\nfrom agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler\nfrom agent.tools.research_tool import RESEARCH_TOOL_SPEC, research_handler\nfrom agent.tools.sandbox_tool import get_sandbox_tools\n\n# NOTE: Private HF repo tool disabled - replaced by hf_repo_files and hf_repo_git\n# from agent.tools.private_hf_repo_tools import (\n#     PRIVATE_HF_REPO_TOOL_SPEC,\n#     private_hf_repo_handler,\n# )\n\n# Suppress aiohttp deprecation warning\nwarnings.filterwarnings(\n    \"ignore\", category=DeprecationWarning, module=\"aiohttp.connector\"\n)\n\nNOT_ALLOWED_TOOL_NAMES = [\"hf_jobs\", \"hf_doc_search\", \"hf_doc_fetch\", \"hf_whoami\"]\n\n\ndef convert_mcp_content_to_string(content: list) -> str:\n    \"\"\"\n    Convert MCP content blocks to a string format compatible with LLM messages.\n\n    Based on FastMCP documentation, content can be:\n    - TextContent: has .text field\n    - ImageContent: has .data and .mimeType fields\n    - EmbeddedResource: has .resource field with .text or .blob\n\n    Args:\n        content: List of MCP content blocks\n\n    Returns:\n        String representation of the content suitable for LLM consumption\n    \"\"\"\n    if not content:\n        return \"\"\n\n    parts = []\n    for item in content:\n        if isinstance(item, TextContent):\n            # Extract text from TextContent blocks\n            parts.append(item.text)\n        elif isinstance(item, ImageContent):\n            # TODO: Handle images\n            # For images, include a description with MIME type\n            parts.append(f\"[Image: {item.mimeType}]\")\n        elif isinstance(item, EmbeddedResource):\n            # TODO: Handle embedded resources\n            # For embedded resources, try to extract text\n            resource = item.resource\n            if hasattr(resource, \"text\") and resource.text:\n                parts.append(resource.text)\n            elif hasattr(resource, \"blob\") and resource.blob:\n                parts.append(\n                    f\"[Binary data: {resource.mimeType if hasattr(resource, 'mimeType') else 'unknown'}]\"\n                )\n            else:\n                parts.append(\n                    f\"[Resource: {resource.uri if hasattr(resource, 'uri') else 'unknown'}]\"\n                )\n        else:\n            # Fallback: try to convert to string\n            parts.append(str(item))\n\n    return \"\\n\".join(parts)\n\n\n@dataclass\nclass ToolSpec:\n    \"\"\"Tool specification for LLM\"\"\"\n\n    name: str\n    description: str\n    parameters: dict[str, Any]\n    handler: Optional[Callable[[dict[str, Any]], Awaitable[tuple[str, bool]]]] = None\n\n\nclass ToolRouter:\n    \"\"\"\n    Routes tool calls to appropriate handlers.\n    Based on codex-rs/core/src/tools/router.rs\n    \"\"\"\n\n    def __init__(self, mcp_servers: dict[str, MCPServerConfig], hf_token: str | None = None, local_mode: bool = False):\n        self.tools: dict[str, ToolSpec] = {}\n        self.mcp_servers: dict[str, dict[str, Any]] = {}\n\n        for tool in create_builtin_tools(local_mode=local_mode):\n            self.register_tool(tool)\n\n        self.mcp_client: Client | None = None\n        if mcp_servers:\n            mcp_servers_payload = {}\n            for name, server in mcp_servers.items():\n                data = server.model_dump()\n                if hf_token:\n                    data.setdefault(\"headers\", {})[\"Authorization\"] = f\"Bearer {hf_token}\"\n                mcp_servers_payload[name] = data\n            self.mcp_client = Client({\"mcpServers\": mcp_servers_payload})\n        self._mcp_initialized = False\n\n    def register_tool(self, tool: ToolSpec) -> None:\n        self.tools[tool.name] = tool\n\n    async def register_mcp_tools(self) -> None:\n        tools = await self.mcp_client.list_tools()\n        registered_names = []\n        skipped_count = 0\n        for tool in tools:\n            if tool.name in NOT_ALLOWED_TOOL_NAMES:\n                skipped_count += 1\n                continue\n            registered_names.append(tool.name)\n            self.register_tool(\n                ToolSpec(\n                    name=tool.name,\n                    description=tool.description,\n                    parameters=tool.inputSchema,\n                    handler=None,\n                )\n            )\n        logger.info(\n            f\"Loaded {len(registered_names)} MCP tools: {', '.join(registered_names)} ({skipped_count} disabled)\"\n        )\n\n    async def register_openapi_tool(self) -> None:\n        \"\"\"Register the OpenAPI search tool (requires async initialization)\"\"\"\n        from agent.tools.docs_tools import (\n            _get_api_search_tool_spec,\n            search_openapi_handler,\n        )\n\n        try:\n            openapi_spec = await _get_api_search_tool_spec()\n            self.register_tool(\n                ToolSpec(\n                    name=openapi_spec[\"name\"],\n                    description=openapi_spec[\"description\"],\n                    parameters=openapi_spec[\"parameters\"],\n                    handler=search_openapi_handler,\n                )\n            )\n            logger.info(f\"Loaded OpenAPI search tool: {openapi_spec['name']}\")\n        except Exception as e:\n            logger.warning(\"Failed to load OpenAPI search tool: %s\", e)\n\n    def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:\n        \"\"\"Get tool specifications in OpenAI format\"\"\"\n        specs = []\n        for tool in self.tools.values():\n            specs.append(\n                {\n                    \"type\": \"function\",\n                    \"function\": {\n                        \"name\": tool.name,\n                        \"description\": tool.description,\n                        \"parameters\": tool.parameters,\n                    },\n                }\n            )\n        return specs\n\n    async def __aenter__(self) -> \"ToolRouter\":\n        if self.mcp_client is not None:\n            try:\n                await self.mcp_client.__aenter__()\n                await self.mcp_client.initialize()\n                await self.register_mcp_tools()\n                self._mcp_initialized = True\n            except Exception as e:\n                logger.warning(\"MCP connection failed, continuing without MCP tools: %s\", e)\n                self.mcp_client = None\n\n        await self.register_openapi_tool()\n\n        total_tools = len(self.tools)\n        logger.info(f\"Agent ready with {total_tools} tools total\")\n\n        return self\n\n    async def __aexit__(self, exc_type, exc, tb) -> None:\n        if self.mcp_client is not None:\n            await self.mcp_client.__aexit__(exc_type, exc, tb)\n            self._mcp_initialized = False\n\n    async def call_tool(\n        self,\n        tool_name: str,\n        arguments: dict[str, Any],\n        session: Any = None,\n        tool_call_id: str | None = None,\n    ) -> tuple[str, bool]:\n        \"\"\"\n        Call a tool and return (output_string, success_bool).\n\n        For MCP tools, converts the CallToolResult content blocks to a string.\n        For built-in tools, calls their handler directly.\n        \"\"\"\n        # Check if this is a built-in tool with a handler\n        tool = self.tools.get(tool_name)\n        if tool and tool.handler:\n            import inspect\n\n            # Check if handler accepts session argument\n            sig = inspect.signature(tool.handler)\n            if \"session\" in sig.parameters:\n                # Check if handler also accepts tool_call_id parameter\n                if \"tool_call_id\" in sig.parameters:\n                    return await tool.handler(\n                        arguments, session=session, tool_call_id=tool_call_id\n                    )\n                return await tool.handler(arguments, session=session)\n            return await tool.handler(arguments)\n\n        # Otherwise, use MCP client\n        if self._mcp_initialized:\n            try:\n                result = await self.mcp_client.call_tool(tool_name, arguments)\n                output = convert_mcp_content_to_string(result.content)\n                return output, not result.is_error\n            except ToolError as e:\n                # Catch MCP tool errors and return them to the agent\n                error_msg = f\"Tool error: {str(e)}\"\n                return error_msg, False\n\n        return \"MCP client not initialized\", False\n\n\n# ============================================================================\n# BUILT-IN TOOL HANDLERS\n# ============================================================================\n\n\ndef create_builtin_tools(local_mode: bool = False) -> list[ToolSpec]:\n    \"\"\"Create built-in tool specifications\"\"\"\n    # in order of importance\n    tools = [\n        # Research sub-agent (delegates to read-only tools in independent context)\n        ToolSpec(\n            name=RESEARCH_TOOL_SPEC[\"name\"],\n            description=RESEARCH_TOOL_SPEC[\"description\"],\n            parameters=RESEARCH_TOOL_SPEC[\"parameters\"],\n            handler=research_handler,\n        ),\n        # Documentation search tools\n        ToolSpec(\n            name=EXPLORE_HF_DOCS_TOOL_SPEC[\"name\"],\n            description=EXPLORE_HF_DOCS_TOOL_SPEC[\"description\"],\n            parameters=EXPLORE_HF_DOCS_TOOL_SPEC[\"parameters\"],\n            handler=explore_hf_docs_handler,\n        ),\n        ToolSpec(\n            name=HF_DOCS_FETCH_TOOL_SPEC[\"name\"],\n            description=HF_DOCS_FETCH_TOOL_SPEC[\"description\"],\n            parameters=HF_DOCS_FETCH_TOOL_SPEC[\"parameters\"],\n            handler=hf_docs_fetch_handler,\n        ),\n        # Paper discovery and reading\n        ToolSpec(\n            name=HF_PAPERS_TOOL_SPEC[\"name\"],\n            description=HF_PAPERS_TOOL_SPEC[\"description\"],\n            parameters=HF_PAPERS_TOOL_SPEC[\"parameters\"],\n            handler=hf_papers_handler,\n        ),\n        # Dataset inspection tool (unified)\n        ToolSpec(\n            name=HF_INSPECT_DATASET_TOOL_SPEC[\"name\"],\n            description=HF_INSPECT_DATASET_TOOL_SPEC[\"description\"],\n            parameters=HF_INSPECT_DATASET_TOOL_SPEC[\"parameters\"],\n            handler=hf_inspect_dataset_handler,\n        ),\n        # Planning and job management tools\n        ToolSpec(\n            name=PLAN_TOOL_SPEC[\"name\"],\n            description=PLAN_TOOL_SPEC[\"description\"],\n            parameters=PLAN_TOOL_SPEC[\"parameters\"],\n            handler=plan_tool_handler,\n        ),\n        ToolSpec(\n            name=HF_JOBS_TOOL_SPEC[\"name\"],\n            description=HF_JOBS_TOOL_SPEC[\"description\"],\n            parameters=HF_JOBS_TOOL_SPEC[\"parameters\"],\n            handler=hf_jobs_handler,\n        ),\n        # HF Repo management tools\n        ToolSpec(\n            name=HF_REPO_FILES_TOOL_SPEC[\"name\"],\n            description=HF_REPO_FILES_TOOL_SPEC[\"description\"],\n            parameters=HF_REPO_FILES_TOOL_SPEC[\"parameters\"],\n            handler=hf_repo_files_handler,\n        ),\n        ToolSpec(\n            name=HF_REPO_GIT_TOOL_SPEC[\"name\"],\n            description=HF_REPO_GIT_TOOL_SPEC[\"description\"],\n            parameters=HF_REPO_GIT_TOOL_SPEC[\"parameters\"],\n            handler=hf_repo_git_handler,\n        ),\n        ToolSpec(\n            name=GITHUB_FIND_EXAMPLES_TOOL_SPEC[\"name\"],\n            description=GITHUB_FIND_EXAMPLES_TOOL_SPEC[\"description\"],\n            parameters=GITHUB_FIND_EXAMPLES_TOOL_SPEC[\"parameters\"],\n            handler=github_find_examples_handler,\n        ),\n        ToolSpec(\n            name=GITHUB_LIST_REPOS_TOOL_SPEC[\"name\"],\n            description=GITHUB_LIST_REPOS_TOOL_SPEC[\"description\"],\n            parameters=GITHUB_LIST_REPOS_TOOL_SPEC[\"parameters\"],\n            handler=github_list_repos_handler,\n        ),\n        ToolSpec(\n            name=GITHUB_READ_FILE_TOOL_SPEC[\"name\"],\n            description=GITHUB_READ_FILE_TOOL_SPEC[\"description\"],\n            parameters=GITHUB_READ_FILE_TOOL_SPEC[\"parameters\"],\n            handler=github_read_file_handler,\n        ),\n    ]\n\n    # Sandbox or local tools (highest priority)\n    if local_mode:\n        from agent.tools.local_tools import get_local_tools\n        tools = get_local_tools() + tools\n    else:\n        tools = get_sandbox_tools() + tools\n\n    tool_names = \", \".join([t.name for t in tools])\n    logger.info(f\"Loaded {len(tools)} built-in tools: {tool_names}\")\n\n    return tools\n"
  },
  {
    "path": "agent/main.py",
    "content": "\"\"\"\nInteractive CLI chat with the agent\n\nSupports two modes:\n  Interactive:  python -m agent.main\n  Headless:     python -m agent.main \"find me bird datasets\"\n\"\"\"\n\nimport argparse\nimport asyncio\nimport json\nimport os\nimport signal\nimport sys\nimport time\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Any, Optional\n\nimport litellm\nfrom prompt_toolkit import PromptSession\n\nfrom agent.config import load_config\nfrom agent.core.agent_loop import submission_loop\nfrom agent.core import model_switcher\nfrom agent.core.session import OpType\nfrom agent.core.tools import ToolRouter\nfrom agent.utils.reliability_checks import check_training_script_save_pattern\nfrom agent.utils.terminal_display import (\n    get_console,\n    print_approval_header,\n    print_approval_item,\n    print_banner,\n    print_compacted,\n    print_error,\n    print_help,\n    print_init_done,\n    print_interrupted,\n    print_markdown,\n    print_plan,\n    print_tool_call,\n    print_tool_log,\n    print_tool_output,\n    print_turn_complete,\n    print_yolo_approve,\n)\n\nlitellm.drop_params = True\n# Suppress the \"Give Feedback / Get Help\" banner LiteLLM prints to stderr\n# on every error — users don't need it, and our friendly errors cover the case.\nlitellm.suppress_debug_info = True\n\ndef _safe_get_args(arguments: dict) -> dict:\n    \"\"\"Safely extract args dict from arguments, handling cases where LLM passes string.\"\"\"\n    args = arguments.get(\"args\", {})\n    # Sometimes LLM passes args as string instead of dict\n    if isinstance(args, str):\n        return {}\n    return args if isinstance(args, dict) else {}\n\n\ndef _get_hf_token() -> str | None:\n    \"\"\"Get HF token from environment, huggingface_hub API, or cached token file.\"\"\"\n    token = os.environ.get(\"HF_TOKEN\")\n    if token:\n        return token\n    try:\n        from huggingface_hub import HfApi\n        api = HfApi()\n        token = api.token\n        if token:\n            return token\n    except Exception:\n        pass\n    # Fallback: read the cached token file directly\n    token_path = Path.home() / \".cache\" / \"huggingface\" / \"token\"\n    if token_path.exists():\n        token = token_path.read_text().strip()\n        if token:\n            return token\n    return None\n\n\nasync def _prompt_and_save_hf_token(prompt_session: PromptSession) -> str:\n    \"\"\"Prompt user for HF token, validate it, save via huggingface_hub.login(). Loops until valid.\"\"\"\n    from prompt_toolkit.formatted_text import HTML\n    from huggingface_hub import HfApi, login\n\n    print(\"\\nA Hugging Face token is required.\")\n    print(\"Get one at: https://huggingface.co/settings/tokens\\n\")\n\n    while True:\n        try:\n            token = await prompt_session.prompt_async(\n                HTML(\"<b>Paste your HF token: </b>\")\n            )\n        except (EOFError, KeyboardInterrupt):\n            print(\"\\nToken is required to continue.\")\n            continue\n\n        token = token.strip()\n        if not token:\n            print(\"Token cannot be empty.\")\n            continue\n\n        # Validate token against the API\n        try:\n            api = HfApi(token=token)\n            user_info = api.whoami()\n            username = user_info.get(\"name\", \"unknown\")\n            print(f\"Token valid (user: {username})\")\n        except Exception:\n            print(\"Invalid token. Please try again.\")\n            continue\n\n        # Save for future sessions\n        try:\n            login(token=token, add_to_git_credential=False)\n            print(\"Token saved to ~/.cache/huggingface/token\")\n        except Exception as e:\n            print(f\"Warning: could not persist token ({e}), using for this session only.\")\n\n        return token\n\n@dataclass\nclass Operation:\n    \"\"\"Operation to be executed by the agent\"\"\"\n\n    op_type: OpType\n    data: Optional[dict[str, Any]] = None\n\n\n@dataclass\nclass Submission:\n    \"\"\"Submission to the agent loop\"\"\"\n\n    id: str\n    operation: Operation\n\n\ndef _create_rich_console():\n    \"\"\"Get the shared rich Console.\"\"\"\n    return get_console()\n\n\nclass _ThinkingShimmer:\n    \"\"\"Animated shiny/shimmer thinking indicator — a bright gradient sweeps across the text.\"\"\"\n\n    _BASE = (90, 90, 110)       # dim base color\n    _HIGHLIGHT = (255, 200, 80) # bright shimmer highlight (warm gold)\n    _WIDTH = 5                  # shimmer width in characters\n    _FPS = 24\n\n    def __init__(self, console):\n        self._console = console\n        self._task = None\n        self._running = False\n\n    def start(self):\n        if self._running:\n            return\n        self._running = True\n        self._task = asyncio.ensure_future(self._animate())\n\n    def stop(self):\n        if not self._running:\n            return  # no-op when never started (e.g. headless mode)\n        self._running = False\n        if self._task:\n            self._task.cancel()\n            self._task = None\n        # Clear the shimmer line\n        self._console.file.write(\"\\r\\033[K\")\n        self._console.file.flush()\n\n    def _render_frame(self, text: str, offset: float) -> str:\n        \"\"\"Render one frame: a bright spot sweeps left-to-right across `text`.\"\"\"\n        out = []\n        n = len(text)\n        for i, ch in enumerate(text):\n            # Distance from the shimmer center (wraps around)\n            dist = abs(i - offset)\n            wrap_dist = abs(i - offset + n + self._WIDTH)\n            dist = min(dist, wrap_dist, abs(i - offset - n - self._WIDTH))\n            # Blend factor: 1.0 at center, 0.0 beyond _WIDTH\n            t = max(0.0, 1.0 - dist / self._WIDTH)\n            t = t * t * (3 - 2 * t)  # smoothstep\n            r = int(self._BASE[0] + (self._HIGHLIGHT[0] - self._BASE[0]) * t)\n            g = int(self._BASE[1] + (self._HIGHLIGHT[1] - self._BASE[1]) * t)\n            b = int(self._BASE[2] + (self._HIGHLIGHT[2] - self._BASE[2]) * t)\n            out.append(f\"\\033[38;2;{r};{g};{b}m{ch}\")\n        out.append(\"\\033[0m\")\n        return \"\".join(out)\n\n    async def _animate(self):\n        text = \"Thinking...\"\n        n = len(text)\n        speed = 0.45  # characters per frame\n        pos = 0.0\n        try:\n            while self._running:\n                frame = self._render_frame(text, pos)\n                self._console.file.write(f\"\\r  {frame}\")\n                self._console.file.flush()\n                pos = (pos + speed) % (n + self._WIDTH)\n                await asyncio.sleep(1.0 / self._FPS)\n        except asyncio.CancelledError:\n            pass\n\n\nclass _StreamBuffer:\n    \"\"\"Accumulates streamed tokens, renders markdown block-by-block as complete\n    blocks appear. A \"block\" is everything up to a paragraph break (\\\\n\\\\n).\n    Unclosed code fences (odd count of ```) hold back flushing until closed so\n    a code block is always rendered as one unit.\"\"\"\n\n    def __init__(self, console):\n        self._console = console\n        self._buffer = \"\"\n\n    def add_chunk(self, text: str):\n        self._buffer += text\n\n    def _pop_block(self) -> str | None:\n        \"\"\"Extract the next complete block, or return None if nothing complete.\"\"\"\n        if self._buffer.count(\"```\") % 2 == 1:\n            return None  # inside an open code fence — wait for close\n        idx = self._buffer.find(\"\\n\\n\")\n        if idx == -1:\n            return None\n        block = self._buffer[:idx]\n        self._buffer = self._buffer[idx + 2:]\n        return block\n\n    async def flush_ready(\n        self,\n        cancel_event: \"asyncio.Event | None\" = None,\n        instant: bool = False,\n    ):\n        \"\"\"Render any complete blocks that have accumulated; leave the tail.\"\"\"\n        while True:\n            if cancel_event is not None and cancel_event.is_set():\n                return\n            block = self._pop_block()\n            if block is None:\n                return\n            if block.strip():\n                await print_markdown(block, cancel_event=cancel_event, instant=instant)\n\n    async def finish(\n        self,\n        cancel_event: \"asyncio.Event | None\" = None,\n        instant: bool = False,\n    ):\n        \"\"\"Flush complete blocks, then render whatever incomplete tail remains.\"\"\"\n        await self.flush_ready(cancel_event=cancel_event, instant=instant)\n        if self._buffer.strip():\n            await print_markdown(self._buffer, cancel_event=cancel_event, instant=instant)\n        self._buffer = \"\"\n\n    def discard(self):\n        self._buffer = \"\"\n\n\nasync def event_listener(\n    event_queue: asyncio.Queue,\n    submission_queue: asyncio.Queue,\n    turn_complete_event: asyncio.Event,\n    ready_event: asyncio.Event,\n    prompt_session: PromptSession,\n    config=None,\n    session_holder=None,\n) -> None:\n    \"\"\"Background task that listens for events and displays them\"\"\"\n    submission_id = [1000]\n    last_tool_name = [None]\n    console = _create_rich_console()\n    shimmer = _ThinkingShimmer(console)\n    stream_buf = _StreamBuffer(console)\n\n    def _cancel_event():\n        \"\"\"Return the session's cancellation Event so print_markdown can abort\n        its typewriter loop mid-stream when Ctrl+C fires.\"\"\"\n        s = session_holder[0] if session_holder else None\n        return s._cancelled if s is not None else None\n\n    while True:\n        try:\n            event = await event_queue.get()\n\n            if event.event_type == \"ready\":\n                tool_count = event.data.get(\"tool_count\", 0) if event.data else 0\n                print_init_done(tool_count=tool_count)\n                ready_event.set()\n            elif event.event_type == \"assistant_message\":\n                shimmer.stop()\n                content = event.data.get(\"content\", \"\") if event.data else \"\"\n                if content:\n                    await print_markdown(content, cancel_event=_cancel_event())\n            elif event.event_type == \"assistant_chunk\":\n                content = event.data.get(\"content\", \"\") if event.data else \"\"\n                if content:\n                    stream_buf.add_chunk(content)\n                    # Flush any complete markdown blocks progressively so the\n                    # user sees paragraphs appear as they're produced, not just\n                    # at the end of the whole response.\n                    shimmer.stop()\n                    await stream_buf.flush_ready(cancel_event=_cancel_event())\n            elif event.event_type == \"assistant_stream_end\":\n                shimmer.stop()\n                await stream_buf.finish(cancel_event=_cancel_event())\n            elif event.event_type == \"tool_call\":\n                shimmer.stop()\n                stream_buf.discard()\n                tool_name = event.data.get(\"tool\", \"\") if event.data else \"\"\n                arguments = event.data.get(\"arguments\", {}) if event.data else {}\n                if tool_name:\n                    last_tool_name[0] = tool_name\n                    # Skip printing research tool_call — the tool_log handler shows it\n                    if tool_name != \"research\":\n                        args_str = json.dumps(arguments)[:80]\n                        print_tool_call(tool_name, args_str)\n            elif event.event_type == \"tool_output\":\n                output = event.data.get(\"output\", \"\") if event.data else \"\"\n                success = event.data.get(\"success\", False) if event.data else False\n                # Only show output for plan_tool — everything else is noise\n                if last_tool_name[0] == \"plan_tool\" and output:\n                    print_tool_output(output, success, truncate=False)\n                shimmer.start()\n            elif event.event_type == \"turn_complete\":\n                shimmer.stop()\n                stream_buf.discard()\n                print_turn_complete()\n                print_plan()\n                turn_complete_event.set()\n            elif event.event_type == \"interrupted\":\n                shimmer.stop()\n                stream_buf.discard()\n                print_interrupted()\n                turn_complete_event.set()\n            elif event.event_type == \"undo_complete\":\n                console.print(\"[dim]Undone.[/dim]\")\n                turn_complete_event.set()\n            elif event.event_type == \"tool_log\":\n                tool = event.data.get(\"tool\", \"\") if event.data else \"\"\n                log = event.data.get(\"log\", \"\") if event.data else \"\"\n                if log:\n                    agent_id = event.data.get(\"agent_id\", \"\") if event.data else \"\"\n                    label = event.data.get(\"label\", \"\") if event.data else \"\"\n                    print_tool_log(tool, log, agent_id=agent_id, label=label)\n            elif event.event_type == \"tool_state_change\":\n                pass  # visual noise — approval flow handles this\n            elif event.event_type == \"error\":\n                shimmer.stop()\n                stream_buf.discard()\n                error = event.data.get(\"error\", \"Unknown error\") if event.data else \"Unknown error\"\n                print_error(error)\n                turn_complete_event.set()\n            elif event.event_type == \"shutdown\":\n                shimmer.stop()\n                stream_buf.discard()\n                break\n            elif event.event_type == \"processing\":\n                shimmer.start()\n            elif event.event_type == \"compacted\":\n                old_tokens = event.data.get(\"old_tokens\", 0) if event.data else 0\n                new_tokens = event.data.get(\"new_tokens\", 0) if event.data else 0\n                print_compacted(old_tokens, new_tokens)\n            elif event.event_type == \"approval_required\":\n                # Handle batch approval format\n                tools_data = event.data.get(\"tools\", []) if event.data else []\n                count = event.data.get(\"count\", 0) if event.data else 0\n\n                # If yolo mode is active, auto-approve everything\n                if config and config.yolo_mode:\n                    approvals = [\n                        {\n                            \"tool_call_id\": t.get(\"tool_call_id\", \"\"),\n                            \"approved\": True,\n                            \"feedback\": None,\n                        }\n                        for t in tools_data\n                    ]\n                    print_yolo_approve(count)\n                    submission_id[0] += 1\n                    approval_submission = Submission(\n                        id=f\"approval_{submission_id[0]}\",\n                        operation=Operation(\n                            op_type=OpType.EXEC_APPROVAL,\n                            data={\"approvals\": approvals},\n                        ),\n                    )\n                    await submission_queue.put(approval_submission)\n                    continue\n\n                print_approval_header(count)\n                approvals = []\n\n                # Ask for approval for each tool\n                for i, tool_info in enumerate(tools_data, 1):\n                    tool_name = tool_info.get(\"tool\", \"\")\n                    arguments = tool_info.get(\"arguments\", {})\n                    tool_call_id = tool_info.get(\"tool_call_id\", \"\")\n\n                    # Handle case where arguments might be a JSON string\n                    if isinstance(arguments, str):\n                        try:\n                            arguments = json.loads(arguments)\n                        except json.JSONDecodeError:\n                            print(f\"Warning: Failed to parse arguments for {tool_name}\")\n                            arguments = {}\n\n                    operation = arguments.get(\"operation\", \"\")\n\n                    print_approval_item(i, count, tool_name, operation)\n\n                    # Handle different tool types\n                    if tool_name == \"hf_jobs\":\n                        # Check if this is Python mode (script) or Docker mode (command)\n                        script = arguments.get(\"script\")\n                        command = arguments.get(\"command\")\n\n                        if script:\n                            # Python mode\n                            dependencies = arguments.get(\"dependencies\", [])\n                            python_version = arguments.get(\"python\")\n                            script_args = arguments.get(\"script_args\", [])\n\n                            # Show full script\n                            print(f\"Script:\\n{script}\")\n                            if dependencies:\n                                print(f\"Dependencies: {', '.join(dependencies)}\")\n                            if python_version:\n                                print(f\"Python version: {python_version}\")\n                            if script_args:\n                                print(f\"Script args: {' '.join(script_args)}\")\n\n                            # Run reliability checks on the full script (not truncated)\n                            check_message = check_training_script_save_pattern(script)\n                            if check_message:\n                                print(check_message)\n                        elif command:\n                            # Docker mode\n                            image = arguments.get(\"image\", \"python:3.12\")\n                            command_str = (\n                                \" \".join(command)\n                                if isinstance(command, list)\n                                else str(command)\n                            )\n                            print(f\"Docker image: {image}\")\n                            print(f\"Command: {command_str}\")\n\n                        # Common parameters for jobs\n                        hardware_flavor = arguments.get(\"hardware_flavor\", \"cpu-basic\")\n                        timeout = arguments.get(\"timeout\", \"30m\")\n                        env = arguments.get(\"env\", {})\n                        schedule = arguments.get(\"schedule\")\n\n                        print(f\"Hardware: {hardware_flavor}\")\n                        print(f\"Timeout: {timeout}\")\n\n                        if env:\n                            env_keys = \", \".join(env.keys())\n                            print(f\"Environment variables: {env_keys}\")\n\n                        if schedule:\n                            print(f\"Schedule: {schedule}\")\n\n                    elif tool_name == \"hf_private_repos\":\n                        # Handle private repo operations\n                        args = _safe_get_args(arguments)\n\n                        if operation in [\"create_repo\", \"upload_file\"]:\n                            repo_id = args.get(\"repo_id\", \"\")\n                            repo_type = args.get(\"repo_type\", \"dataset\")\n\n                            # Build repo URL\n                            type_path = \"\" if repo_type == \"model\" else f\"{repo_type}s\"\n                            repo_url = (\n                                f\"https://huggingface.co/{type_path}/{repo_id}\".replace(\n                                    \"//\", \"/\"\n                                )\n                            )\n\n                            print(f\"Repository: {repo_id}\")\n                            print(f\"Type: {repo_type}\")\n                            print(\"Private: Yes\")\n                            print(f\"URL: {repo_url}\")\n\n                            # Show file preview for upload_file operation\n                            if operation == \"upload_file\":\n                                path_in_repo = args.get(\"path_in_repo\", \"\")\n                                file_content = args.get(\"file_content\", \"\")\n                                print(f\"File: {path_in_repo}\")\n\n                                if isinstance(file_content, str):\n                                    # Calculate metrics\n                                    all_lines = file_content.split(\"\\n\")\n                                    line_count = len(all_lines)\n                                    size_bytes = len(file_content.encode(\"utf-8\"))\n                                    size_kb = size_bytes / 1024\n                                    size_mb = size_kb / 1024\n\n                                    print(f\"Line count: {line_count}\")\n                                    if size_kb < 1024:\n                                        print(f\"Size: {size_kb:.2f} KB\")\n                                    else:\n                                        print(f\"Size: {size_mb:.2f} MB\")\n\n                                    # Show preview\n                                    preview_lines = all_lines[:5]\n                                    preview = \"\\n\".join(preview_lines)\n                                    print(\n                                        f\"Content preview (first 5 lines):\\n{preview}\"\n                                    )\n                                    if len(all_lines) > 5:\n                                        print(\"...\")\n\n                    elif tool_name == \"hf_repo_files\":\n                        # Handle repo files operations (upload, delete)\n                        repo_id = arguments.get(\"repo_id\", \"\")\n                        repo_type = arguments.get(\"repo_type\", \"model\")\n                        revision = arguments.get(\"revision\", \"main\")\n\n                        # Build repo URL\n                        if repo_type == \"model\":\n                            repo_url = f\"https://huggingface.co/{repo_id}\"\n                        else:\n                            repo_url = f\"https://huggingface.co/{repo_type}s/{repo_id}\"\n\n                        print(f\"Repository: {repo_id}\")\n                        print(f\"Type: {repo_type}\")\n                        print(f\"Branch: {revision}\")\n                        print(f\"URL: {repo_url}\")\n\n                        if operation == \"upload\":\n                            path = arguments.get(\"path\", \"\")\n                            content = arguments.get(\"content\", \"\")\n                            create_pr = arguments.get(\"create_pr\", False)\n\n                            print(f\"File: {path}\")\n                            if create_pr:\n                                print(\"Mode: Create PR\")\n\n                            if isinstance(content, str):\n                                all_lines = content.split(\"\\n\")\n                                line_count = len(all_lines)\n                                size_bytes = len(content.encode(\"utf-8\"))\n                                size_kb = size_bytes / 1024\n\n                                print(f\"Lines: {line_count}\")\n                                if size_kb < 1024:\n                                    print(f\"Size: {size_kb:.2f} KB\")\n                                else:\n                                    print(f\"Size: {size_kb / 1024:.2f} MB\")\n\n                                # Show full content\n                                print(f\"Content:\\n{content}\")\n\n                        elif operation == \"delete\":\n                            patterns = arguments.get(\"patterns\", [])\n                            if isinstance(patterns, str):\n                                patterns = [patterns]\n                            print(f\"Patterns to delete: {', '.join(patterns)}\")\n\n                    elif tool_name == \"hf_repo_git\":\n                        # Handle git operations (branches, tags, PRs, repo management)\n                        repo_id = arguments.get(\"repo_id\", \"\")\n                        repo_type = arguments.get(\"repo_type\", \"model\")\n\n                        # Build repo URL\n                        if repo_type == \"model\":\n                            repo_url = f\"https://huggingface.co/{repo_id}\"\n                        else:\n                            repo_url = f\"https://huggingface.co/{repo_type}s/{repo_id}\"\n\n                        print(f\"Repository: {repo_id}\")\n                        print(f\"Type: {repo_type}\")\n                        print(f\"URL: {repo_url}\")\n\n                        if operation == \"delete_branch\":\n                            branch = arguments.get(\"branch\", \"\")\n                            print(f\"Branch to delete: {branch}\")\n\n                        elif operation == \"delete_tag\":\n                            tag = arguments.get(\"tag\", \"\")\n                            print(f\"Tag to delete: {tag}\")\n\n                        elif operation == \"merge_pr\":\n                            pr_num = arguments.get(\"pr_num\", \"\")\n                            print(f\"PR to merge: #{pr_num}\")\n\n                        elif operation == \"create_repo\":\n                            private = arguments.get(\"private\", False)\n                            space_sdk = arguments.get(\"space_sdk\")\n                            print(f\"Private: {private}\")\n                            if space_sdk:\n                                print(f\"Space SDK: {space_sdk}\")\n\n                        elif operation == \"update_repo\":\n                            private = arguments.get(\"private\")\n                            gated = arguments.get(\"gated\")\n                            if private is not None:\n                                print(f\"Private: {private}\")\n                            if gated is not None:\n                                print(f\"Gated: {gated}\")\n\n                    # Get user decision for this item. Ctrl+C / EOF here is\n                    # treated as \"reject remaining\" (matches Codex's modal\n                    # priority and Forgecode's approval-cancel path). Without\n                    # this, KeyboardInterrupt kills the event listener and\n                    # the main loop deadlocks waiting for turn_complete.\n                    try:\n                        response = await prompt_session.prompt_async(\n                            f\"Approve item {i}? (y=yes, yolo=approve all, n=no, or provide feedback): \"\n                        )\n                    except (KeyboardInterrupt, EOFError):\n                        get_console().print(\"[dim]Approval cancelled — rejecting remaining items[/dim]\")\n                        approvals.append(\n                            {\n                                \"tool_call_id\": tool_call_id,\n                                \"approved\": False,\n                                \"feedback\": \"User cancelled approval\",\n                            }\n                        )\n                        for remaining in tools_data[i:]:\n                            approvals.append(\n                                {\n                                    \"tool_call_id\": remaining.get(\"tool_call_id\", \"\"),\n                                    \"approved\": False,\n                                    \"feedback\": None,\n                                }\n                            )\n                        break\n\n                    response = response.strip().lower()\n\n                    # Handle yolo mode activation\n                    if response == \"yolo\":\n                        config.yolo_mode = True\n                        print(\n                            \"YOLO MODE ACTIVATED - Auto-approving all future tool calls\"\n                        )\n                        # Auto-approve this item and all remaining\n                        approvals.append(\n                            {\n                                \"tool_call_id\": tool_call_id,\n                                \"approved\": True,\n                                \"feedback\": None,\n                            }\n                        )\n                        for remaining in tools_data[i:]:\n                            approvals.append(\n                                {\n                                    \"tool_call_id\": remaining.get(\"tool_call_id\", \"\"),\n                                    \"approved\": True,\n                                    \"feedback\": None,\n                                }\n                            )\n                        break\n\n                    approved = response in [\"y\", \"yes\"]\n                    feedback = None if approved or response in [\"n\", \"no\"] else response\n\n                    approvals.append(\n                        {\n                            \"tool_call_id\": tool_call_id,\n                            \"approved\": approved,\n                            \"feedback\": feedback,\n                        }\n                    )\n\n                # Submit batch approval\n                submission_id[0] += 1\n                approval_submission = Submission(\n                    id=f\"approval_{submission_id[0]}\",\n                    operation=Operation(\n                        op_type=OpType.EXEC_APPROVAL,\n                        data={\"approvals\": approvals},\n                    ),\n                )\n                await submission_queue.put(approval_submission)\n                console.print()  # spacing after approval\n            # Silently ignore other events\n\n        except asyncio.CancelledError:\n            break\n        except Exception as e:\n            print(f\"Event listener error: {e}\")\n\n\nasync def get_user_input(prompt_session: PromptSession) -> str:\n    \"\"\"Get user input asynchronously\"\"\"\n    from prompt_toolkit.formatted_text import HTML\n\n    return await prompt_session.prompt_async(HTML(\"\\n<b><cyan>></cyan></b> \"))\n\n\n# ── Slash command helpers ────────────────────────────────────────────────\n\n# Slash commands are defined in terminal_display\n\n\nasync def _handle_slash_command(\n    cmd: str,\n    config,\n    session_holder: list,\n    submission_queue: asyncio.Queue,\n    submission_id: list[int],\n) -> Submission | None:\n    \"\"\"\n    Handle a slash command. Returns a Submission to enqueue, or None if\n    the command was handled locally (caller should set turn_complete_event).\n\n    Async because ``/model`` fires a probe ping to validate the model+effort\n    combo before committing the switch.\n    \"\"\"\n    parts = cmd.strip().split(None, 1)\n    command = parts[0].lower()\n    arg = parts[1].strip() if len(parts) > 1 else \"\"\n\n    if command == \"/help\":\n        print_help()\n        return None\n\n    if command == \"/undo\":\n        submission_id[0] += 1\n        return Submission(\n            id=f\"sub_{submission_id[0]}\",\n            operation=Operation(op_type=OpType.UNDO),\n        )\n\n    if command == \"/compact\":\n        submission_id[0] += 1\n        return Submission(\n            id=f\"sub_{submission_id[0]}\",\n            operation=Operation(op_type=OpType.COMPACT),\n        )\n\n    if command == \"/model\":\n        console = get_console()\n        if not arg:\n            model_switcher.print_model_listing(config, console)\n            return None\n        if not model_switcher.is_valid_model_id(arg):\n            model_switcher.print_invalid_id(arg, console)\n            return None\n        normalized = arg.removeprefix(\"huggingface/\")\n        session = session_holder[0] if session_holder else None\n        await model_switcher.probe_and_switch_model(\n            normalized, config, session, console, _get_hf_token(),\n        )\n        return None\n\n    if command == \"/yolo\":\n        config.yolo_mode = not config.yolo_mode\n        state = \"ON\" if config.yolo_mode else \"OFF\"\n        print(f\"YOLO mode: {state}\")\n        return None\n\n    if command == \"/effort\":\n        console = get_console()\n        valid = {\"minimal\", \"low\", \"medium\", \"high\", \"xhigh\", \"max\", \"off\"}\n        session = session_holder[0] if session_holder else None\n        if not arg:\n            current = config.reasoning_effort or \"off\"\n            console.print(f\"[bold]Reasoning effort preference:[/bold] {current}\")\n            if session and session.model_effective_effort:\n                console.print(\"[dim]Probed per model:[/dim]\")\n                for m, eff in session.model_effective_effort.items():\n                    console.print(f\"  [dim]{m}: {eff or 'off'}[/dim]\")\n            console.print(\n                \"[dim]Set with '/effort minimal|low|medium|high|xhigh|max|off'. \"\n                \"'max' and 'xhigh' are Anthropic-only; the cascade falls back \"\n                \"to whatever the model actually accepts.[/dim]\"\n            )\n            return None\n        level = arg.lower()\n        if level not in valid:\n            console.print(f\"[bold red]Invalid level:[/bold red] {arg}\")\n            console.print(f\"[dim]Expected one of: {', '.join(sorted(valid))}[/dim]\")\n            return None\n        config.reasoning_effort = None if level == \"off\" else level\n        # Drop the per-model probe cache — the new preference may resolve\n        # differently. Next ``/model`` (or the retry safety net) reprobes.\n        if session is not None:\n            session.model_effective_effort.clear()\n        console.print(f\"[green]Reasoning effort: {level}[/green]\")\n        if session is not None:\n            console.print(\n                \"[dim]run /model <current> to re-probe, or send a message — \"\n                \"the agent adjusts automatically if the new level isn't supported.[/dim]\"\n            )\n        return None\n\n    if command == \"/status\":\n        session = session_holder[0] if session_holder else None\n        print(f\"Model: {config.model_name}\")\n        print(f\"Reasoning effort: {config.reasoning_effort or 'off'}\")\n        if session:\n            print(f\"Turns: {session.turn_count}\")\n            print(f\"Context items: {len(session.context_manager.items)}\")\n        return None\n\n    print(f\"Unknown command: {command}. Type /help for available commands.\")\n    return None\n\n\nasync def main():\n    \"\"\"Interactive chat with the agent\"\"\"\n\n    # Clear screen\n    os.system(\"clear\" if os.name != \"nt\" else \"cls\")\n\n    # Create prompt session for input (needed early for token prompt)\n    prompt_session = PromptSession()\n\n    # HF token — required, prompt if missing\n    hf_token = _get_hf_token()\n    if not hf_token:\n        hf_token = await _prompt_and_save_hf_token(prompt_session)\n\n    # Resolve username for banner\n    hf_user = None\n    try:\n        from huggingface_hub import HfApi\n        hf_user = HfApi(token=hf_token).whoami().get(\"name\")\n    except Exception:\n        pass\n\n    print_banner(hf_user=hf_user)\n\n    # Pre-warm the HF router catalog in the background so /model switches\n    # don't block on a network fetch.\n    from agent.core import hf_router_catalog\n    asyncio.create_task(asyncio.to_thread(hf_router_catalog.prewarm))\n\n    # Create queues for communication\n    submission_queue = asyncio.Queue()\n    event_queue = asyncio.Queue()\n\n    # Events to signal agent state\n    turn_complete_event = asyncio.Event()\n    turn_complete_event.set()\n    ready_event = asyncio.Event()\n\n    # Start agent loop in background\n    config_path = Path(__file__).parent.parent / \"configs\" / \"main_agent_config.json\"\n    config = load_config(config_path)\n\n    # Create tool router with local mode\n    tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)\n\n    # Session holder for interrupt/model/status access\n    session_holder = [None]\n\n    agent_task = asyncio.create_task(\n        submission_loop(\n            submission_queue,\n            event_queue,\n            config=config,\n            tool_router=tool_router,\n            session_holder=session_holder,\n            hf_token=hf_token,\n            local_mode=True,\n            stream=True,\n        )\n    )\n\n    # Start event listener in background\n    listener_task = asyncio.create_task(\n        event_listener(\n            event_queue,\n            submission_queue,\n            turn_complete_event,\n            ready_event,\n            prompt_session,\n            config,\n            session_holder=session_holder,\n        )\n    )\n\n    await ready_event.wait()\n\n    submission_id = [0]\n    # Mirrors codex-rs/tui/src/bottom_pane/mod.rs:137\n    # (`QUIT_SHORTCUT_TIMEOUT = Duration::from_secs(1)`). Two Ctrl+C presses\n    # within this window quit; a single press cancels the in-flight turn.\n    CTRL_C_QUIT_WINDOW = 1.0\n    # Hint string matches codex-rs/tui/src/bottom_pane/footer.rs:746\n    # (`\" again to quit\"` prefixed with the key binding, rendered dim).\n    CTRL_C_HINT = \"[dim]ctrl + c again to quit[/dim]\"\n    interrupt_state = {\"last\": 0.0, \"exit\": False}\n\n    loop = asyncio.get_running_loop()\n\n    def _on_sigint() -> None:\n        \"\"\"SIGINT handler — fires while the agent is generating (terminal is\n        in cooked mode between prompts). Mirrors Codex's `on_ctrl_c` in\n        codex-rs/tui/src/chatwidget.rs: first press cancels active work and\n        arms the quit hint; second press within the window quits.\"\"\"\n        now = time.monotonic()\n        session = session_holder[0]\n\n        if now - interrupt_state[\"last\"] < CTRL_C_QUIT_WINDOW:\n            interrupt_state[\"exit\"] = True\n            if session:\n                session.cancel()\n            # Wake the main loop out of turn_complete_event.wait()\n            turn_complete_event.set()\n            return\n\n        interrupt_state[\"last\"] = now\n        if session and not session.is_cancelled:\n            session.cancel()\n        get_console().print(f\"\\n{CTRL_C_HINT}\")\n\n    def _install_sigint() -> bool:\n        try:\n            loop.add_signal_handler(signal.SIGINT, _on_sigint)\n            return True\n        except (NotImplementedError, RuntimeError):\n            return False  # Windows or non-main thread\n\n    # prompt_toolkit's prompt_async installs its own SIGINT handler and, on\n    # exit, calls loop.remove_signal_handler(SIGINT) — which wipes ours too.\n    # So we re-arm at the top of every loop iteration, right before the busy\n    # wait. Without this, Ctrl+C during agent streaming after the first turn\n    # falls through to the default handler and the terminal just echoes ^C.\n    sigint_available = _install_sigint()\n\n    try:\n        while True:\n            if sigint_available:\n                _install_sigint()\n\n            try:\n                await turn_complete_event.wait()\n            except asyncio.CancelledError:\n                break\n            turn_complete_event.clear()\n\n            if interrupt_state[\"exit\"]:\n                break\n\n            # Get user input. prompt_toolkit puts the terminal in raw mode and\n            # installs its own SIGINT handling; ^C arrives as \\x03 and surfaces\n            # as KeyboardInterrupt here. On return, prompt_toolkit removes the\n            # loop's SIGINT handler — we re-arm at the top of the next iter.\n            try:\n                user_input = await get_user_input(prompt_session)\n            except EOFError:\n                break\n            except KeyboardInterrupt:\n                now = time.monotonic()\n                if now - interrupt_state[\"last\"] < CTRL_C_QUIT_WINDOW:\n                    break\n                interrupt_state[\"last\"] = now\n                get_console().print(CTRL_C_HINT)\n                turn_complete_event.set()\n                continue\n\n            # A successful read ends the double-press window — an unrelated\n            # Ctrl+C during the next turn should start a fresh arming.\n            interrupt_state[\"last\"] = 0.0\n\n            # Check for exit commands\n            if user_input.strip().lower() in [\"exit\", \"quit\", \"/quit\", \"/exit\"]:\n                break\n\n            # Skip empty input\n            if not user_input.strip():\n                turn_complete_event.set()\n                continue\n\n            # Handle slash commands\n            if user_input.strip().startswith(\"/\"):\n                sub = await _handle_slash_command(\n                    user_input.strip(), config, session_holder, submission_queue, submission_id\n                )\n                if sub is None:\n                    # Command handled locally, loop back for input\n                    turn_complete_event.set()\n                    continue\n                else:\n                    await submission_queue.put(sub)\n                    continue\n\n            # Submit to agent\n            submission_id[0] += 1\n            submission = Submission(\n                id=f\"sub_{submission_id[0]}\",\n                operation=Operation(\n                    op_type=OpType.USER_INPUT, data={\"text\": user_input}\n                ),\n            )\n            await submission_queue.put(submission)\n\n    except KeyboardInterrupt:\n        pass\n    finally:\n        if sigint_available:\n            try:\n                loop.remove_signal_handler(signal.SIGINT)\n            except (NotImplementedError, RuntimeError):\n                pass\n\n    # Shutdown\n    shutdown_submission = Submission(\n        id=\"sub_shutdown\", operation=Operation(op_type=OpType.SHUTDOWN)\n    )\n    await submission_queue.put(shutdown_submission)\n\n    # Wait for agent to finish (the listener must keep draining events\n    # or the agent will block on event_queue.put)\n    try:\n        await asyncio.wait_for(agent_task, timeout=10.0)\n    except asyncio.TimeoutError:\n        agent_task.cancel()\n        # Agent didn't shut down cleanly — close MCP explicitly\n        await tool_router.__aexit__(None, None, None)\n\n    # Now safe to cancel the listener (agent is done emitting events)\n    listener_task.cancel()\n\n    get_console().print(\"\\n[dim]Bye.[/dim]\\n\")\n\n\nasync def headless_main(\n    prompt: str,\n    model: str | None = None,\n    max_iterations: int | None = None,\n    stream: bool = True,\n) -> None:\n    \"\"\"Run a single prompt headlessly and exit.\"\"\"\n    import logging\n\n    logging.basicConfig(level=logging.WARNING)\n\n    hf_token = _get_hf_token()\n    if not hf_token:\n        print(\"ERROR: No HF token found. Set HF_TOKEN or run `huggingface-cli login`.\", file=sys.stderr)\n        sys.exit(1)\n\n    print(f\"HF token loaded\", file=sys.stderr)\n\n    config_path = Path(__file__).parent.parent / \"configs\" / \"main_agent_config.json\"\n    config = load_config(config_path)\n    config.yolo_mode = True  # Auto-approve everything in headless mode\n\n    if model:\n        config.model_name = model\n\n    if max_iterations is not None:\n        config.max_iterations = max_iterations\n\n    print(f\"Model: {config.model_name}\", file=sys.stderr)\n    print(f\"Max iterations: {config.max_iterations}\", file=sys.stderr)\n    print(f\"Prompt: {prompt}\", file=sys.stderr)\n    print(\"---\", file=sys.stderr)\n\n    submission_queue: asyncio.Queue = asyncio.Queue()\n    event_queue: asyncio.Queue = asyncio.Queue()\n\n    tool_router = ToolRouter(config.mcpServers, hf_token=hf_token, local_mode=True)\n    session_holder: list = [None]\n\n    agent_task = asyncio.create_task(\n        submission_loop(\n            submission_queue,\n            event_queue,\n            config=config,\n            tool_router=tool_router,\n            session_holder=session_holder,\n            hf_token=hf_token,\n            local_mode=True,\n            stream=stream,\n        )\n    )\n\n    # Wait for ready\n    while True:\n        event = await event_queue.get()\n        if event.event_type == \"ready\":\n            break\n\n    # Submit the prompt\n    submission = Submission(\n        id=\"sub_1\",\n        operation=Operation(op_type=OpType.USER_INPUT, data={\"text\": prompt}),\n    )\n    await submission_queue.put(submission)\n\n    # Process events until turn completes. Headless mode is for scripts /\n    # log capture: no shimmer animation, no typewriter, no live-redrawing\n    # research overlay. Output is plain, append-only text.\n    console = _create_rich_console()\n    stream_buf = _StreamBuffer(console)\n    _hl_last_tool = [None]\n    _hl_sub_id = [1]\n    # Research sub-agent tool calls are buffered per agent_id and dumped as\n    # a static block once each sub-agent finishes, instead of streaming via\n    # the live redrawing SubAgentDisplayManager (which is TTY-only).\n    _hl_research_buffers: dict[str, dict] = {}\n\n    while True:\n        event = await event_queue.get()\n\n        if event.event_type == \"assistant_chunk\":\n            content = event.data.get(\"content\", \"\") if event.data else \"\"\n            if content:\n                stream_buf.add_chunk(content)\n                await stream_buf.flush_ready(instant=True)\n        elif event.event_type == \"assistant_stream_end\":\n            await stream_buf.finish(instant=True)\n        elif event.event_type == \"assistant_message\":\n            content = event.data.get(\"content\", \"\") if event.data else \"\"\n            if content:\n                await print_markdown(content, instant=True)\n        elif event.event_type == \"tool_call\":\n            stream_buf.discard()\n            tool_name = event.data.get(\"tool\", \"\") if event.data else \"\"\n            arguments = event.data.get(\"arguments\", {}) if event.data else {}\n            if tool_name:\n                _hl_last_tool[0] = tool_name\n                if tool_name != \"research\":\n                    args_str = json.dumps(arguments)[:80]\n                    print_tool_call(tool_name, args_str)\n        elif event.event_type == \"tool_output\":\n            output = event.data.get(\"output\", \"\") if event.data else \"\"\n            success = event.data.get(\"success\", False) if event.data else False\n            if _hl_last_tool[0] == \"plan_tool\" and output:\n                print_tool_output(output, success, truncate=False)\n        elif event.event_type == \"tool_log\":\n            tool = event.data.get(\"tool\", \"\") if event.data else \"\"\n            log = event.data.get(\"log\", \"\") if event.data else \"\"\n            if not log:\n                pass\n            elif tool == \"research\":\n                # Headless mode: buffer research sub-agent activity per-agent,\n                # then dump each as a static block on completion. The live\n                # SubAgentDisplayManager uses terminal cursor tricks that are\n                # unfit for non-TTY output, but parallel agents still need\n                # distinct output so we key buffers by agent_id.\n                agent_id = event.data.get(\"agent_id\", \"\") if event.data else \"\"\n                label = event.data.get(\"label\", \"\") if event.data else \"\"\n                aid = agent_id or \"research\"\n                if log == \"Starting research sub-agent...\":\n                    _hl_research_buffers[aid] = {\n                        \"label\": label or \"research\",\n                        \"calls\": [],\n                    }\n                elif log == \"Research complete.\":\n                    buf = _hl_research_buffers.pop(aid, None)\n                    if buf is not None:\n                        f = get_console().file\n                        f.write(f\"  \\033[38;2;255;200;80m▸ {buf['label']}\\033[0m\\n\")\n                        for call in buf[\"calls\"]:\n                            f.write(f\"    \\033[2m{call}\\033[0m\\n\")\n                        f.flush()\n                elif log.startswith(\"tokens:\") or log.startswith(\"tools:\"):\n                    pass  # stats updates — only useful for the live display\n                elif aid in _hl_research_buffers:\n                    _hl_research_buffers[aid][\"calls\"].append(log)\n                else:\n                    # Orphan event (Start was missed) — fall back to raw print\n                    print_tool_log(tool, log, agent_id=agent_id, label=label)\n            else:\n                print_tool_log(tool, log)\n        elif event.event_type == \"approval_required\":\n            # Auto-approve everything in headless mode (safety net if yolo_mode\n            # didn't prevent the approval event for some reason)\n            tools_data = event.data.get(\"tools\", []) if event.data else []\n            approvals = [\n                {\n                    \"tool_call_id\": t.get(\"tool_call_id\", \"\"),\n                    \"approved\": True,\n                    \"feedback\": None,\n                }\n                for t in tools_data\n            ]\n            _hl_sub_id[0] += 1\n            await submission_queue.put(Submission(\n                id=f\"hl_approval_{_hl_sub_id[0]}\",\n                operation=Operation(\n                    op_type=OpType.EXEC_APPROVAL,\n                    data={\"approvals\": approvals},\n                ),\n            ))\n        elif event.event_type == \"compacted\":\n            old_tokens = event.data.get(\"old_tokens\", 0) if event.data else 0\n            new_tokens = event.data.get(\"new_tokens\", 0) if event.data else 0\n            print_compacted(old_tokens, new_tokens)\n        elif event.event_type == \"error\":\n            stream_buf.discard()\n            error = event.data.get(\"error\", \"Unknown error\") if event.data else \"Unknown error\"\n            print_error(error)\n            break\n        elif event.event_type in (\"turn_complete\", \"interrupted\"):\n            stream_buf.discard()\n            history_size = event.data.get(\"history_size\", \"?\") if event.data else \"?\"\n            print(f\"\\n--- Agent {event.event_type} (history_size={history_size}) ---\", file=sys.stderr)\n            break\n\n    # Shutdown\n    shutdown_submission = Submission(\n        id=\"sub_shutdown\", operation=Operation(op_type=OpType.SHUTDOWN)\n    )\n    await submission_queue.put(shutdown_submission)\n\n    try:\n        await asyncio.wait_for(agent_task, timeout=10.0)\n    except asyncio.TimeoutError:\n        agent_task.cancel()\n        await tool_router.__aexit__(None, None, None)\n\n\ndef cli():\n    \"\"\"Entry point for the ml-intern CLI command.\"\"\"\n    import logging as _logging\n    import warnings\n    # Suppress aiohttp \"Unclosed client session\" noise during event loop teardown\n    _logging.getLogger(\"asyncio\").setLevel(_logging.CRITICAL)\n    # Suppress litellm pydantic deprecation warnings\n    warnings.filterwarnings(\"ignore\", category=DeprecationWarning, module=\"litellm\")\n    # Suppress whoosh invalid escape sequence warnings (third-party, unfixed upstream)\n    warnings.filterwarnings(\"ignore\", category=SyntaxWarning, module=\"whoosh\")\n\n    parser = argparse.ArgumentParser(description=\"Hugging Face Agent CLI\")\n    parser.add_argument(\"prompt\", nargs=\"?\", default=None, help=\"Run headlessly with this prompt\")\n    parser.add_argument(\"--model\", \"-m\", default=None, help=f\"Model to use (default: from config)\")\n    parser.add_argument(\"--max-iterations\", type=int, default=None,\n                        help=\"Max LLM requests per turn (default: 50, use -1 for unlimited)\")\n    parser.add_argument(\"--no-stream\", action=\"store_true\",\n                        help=\"Disable token streaming (use non-streaming LLM calls)\")\n    args = parser.parse_args()\n\n    try:\n        if args.prompt:\n            max_iter = args.max_iterations\n            if max_iter is not None and max_iter < 0:\n                max_iter = 10_000  # effectively unlimited\n            asyncio.run(headless_main(args.prompt, model=args.model, max_iterations=max_iter, stream=not args.no_stream))\n        else:\n            asyncio.run(main())\n    except KeyboardInterrupt:\n        print(\"\\n\\nGoodbye!\")\n\n\nif __name__ == \"__main__\":\n    cli()\n"
  },
  {
    "path": "agent/prompts/system_prompt.yaml",
    "content": "system_prompt: |\n  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering. Hugging Face is a company that provides two main services : libraries to write deep learning tasks, and resources (models, datasets, compute) to execute them. You will aid users to do these tasks, interacting with the Hugging Face stack via {{ num_tools }}.\n\n  # General behavior\n  \n  Your main goal is to achieve what the user asked. For this proactive in the quantity of actions taken. However, never make big decisions in place of the user. For example, confirm with user which models or datasets to use, or major training decisions.\n\n  # Task Approach.\n\n  **CRITICAL : Research first, Then Implement**\n\n  For ANY implementation task (training, fine-tuning, inference, data processing, etc.), you should proceed in these three mandatory steps:\n\n  1. **FIRST**: Search HF documentation to find the correct approach.\n   - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., \"trl\", \"transformers\", \"diffusers\").\n   - Use `fetch_hf_docs` to retrieve full content from the relevant pages you've found.\n   - Use `search_hf_api_endpoints` to find API endpoints with usage examples.\n   - Skip ONLY for simple factual questions (e.g., \"What is LoRA?\")\n\n  2. **THEN**: Formulate a plan based on research findings. Pass todos to the PlanTool. Update frequently to show when progress is made. This will also help you decompose hard tasks.\n\n  3. **FINALLY**: Implement using researched approaches\n   - Search Hugging Face hub to find the exact user-specified model and dataset. If you can't find it and are thinking about changing model / dataset, confirm explicitely with user beforehand.\n   - If user has not provided the model or the dataset, suggest different options, and make the user choose before proceeding.\n   - Use all available tools to complete the task.\n   - Invoke multiple independent tools simultaneously for efficiency\n\n  # Available Tools\n\n  You have access to the following main categories of tools. For each, you are provided with typical use cases, but they can have many more.\n\n  - Hugging Face Hub\n    - Find models, datasets, and machine learning papers\n    - Discover existing Spaces (mini-deployed AI models)\n    - Access details about specific repositories\n    - Note: models, datasets, and Spaces are all repositories\n\n  - Documentation and API\n    - Browse documentation across Hugging Face libraries (e.g., trl, diffusers, transformers, datasets)\n    - Read full documentation pages\n    - Search and inspect API endpoints\n\n  - Planning\n    - Use as a planning and to-do tool\n    - Decompose complex tasks into manageable steps\n    - Communicate plans and progress clearly with the user\n\n  - Jobs\n    - Run code as one-time executions on remote servers\n    - Support both simple CPU tasks and intensive GPU workloads\n\n  - Private Repos\n    - Manage the user’s private repositories\n    - Store and retrieve job outputs. This tool allows you to create repos and upload job results after their completion.\n    - Fix or update Spaces\n    - Reminder: repositories include models, datasets, Spaces, and generic repos\n\n  - Spaces\n    - Use deployed AI models\n    - Perform tasks such as image generation, OCR, and text-to-speech\n\n  # Additional instructions\n\n  - Use up-to-date python package versions. This is important. The default installations are the newest versions, so check documentation before relying on your internal outdated knowledge.\n  - Always search official documentation before implementing any ML workflow; never assume methods, libraries, or approaches\n  - Use Hugging Face documentation tools and search the Hub before building custom solutions\n  - Verify dataset structures and API details explicitly; never assume column names or schemas\n  - Base implementations on documented best practices, not general knowledge\n  - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics, and suitable hardware\n  - Treat Spaces and repos as permanent storage; job executions have no persistent files\n  - Jobs require passing the full file contents; local and remote file systems are separate\n  - HF_TOKEN is loaded from environment variables; never expose or log secrets\n  - Include direct links when referencing models, datasets, or papers\n  - Always do what the user tells you to.\n\n  # Communication style\n\n  - Be concise and direct.\n  - Don't flatter the user.\n  - Never use emojis nor exclamation points.\n  - If you are limited in a task, offer alternatives.\n  - Don't thank the user when he provides results.\n  - Explain what you're doing for non-trivial operations.\n  - If the user asks something, answer. User questions take precedent over task completion.\n  - Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.\n\n  # Examples\n\n  <example>\n  User: Fine-tune a Llama-style model for instruction following on a custom dataset.\n\n  Assistant:\n  1. Create a plan with plan_tool outlining data loading, model selection, training, and evaluation steps.\n  2. Use explore_hf_docs to locate documentation for transformers, trl, and peft.\n  3. Use fetch_hf_docs to read the relevant documentation more precisely.\n  4. Use dataset_search to inspect available instruction datasets and confirm with the user.\n  5. Use model_search to find compatible base models and confirm choice.\n  6. Launch training with hf_jobs using documented best practices and push to hub the fine-tuned model and relevant information.\n  </example>\n\n  <example>\n  User: My Space crashes on startup. Can you fix it?\n\n  Assistant:\n  1. Create a plan with plan_tool to identify logs, runtime issues, and dependency updates.\n  2. Use hub_repo_details to inspect the Space repository and logs.\n  3. Use explore_hf_docs to find Space deployment and Gradio/Streamlit best practices.\n  4. Update files in the Space repo using hf_private_repos.\n  5. Restart and verify the Space.\n  </example>\n\n  <example>\n  User: Find a good dataset for image captioning and summarize its structure.\n\n  Assistant:\n  1. Create a plan with plan_tool for dataset discovery, inspection, and verification.\n  2. Use dataset_search with tags such as \"image-captioning\".\n  3. Use hub_repo_details to inspect candidate datasets.\n  4. Verify column names, splits, and licensing explicitly.\n  5. Report findings concisely and include direct links.\n  </example>\n\n  <example>\n  User: Generate images using a fast text-to-image model.\n\n  Assistant:\n  1. Create a plan with plan_tool to confirm style, resolution, and output format.\n  2. Use gr1_z_image_turbo_generate with the provided prompt.\n  3. Return generated images without additional commentary.\n  </example>\n\n  <example>\n  User: Run inference with a specific text classification model on my text file.\n\n  Assistant:\n  1. Create a plan with plan_tool for loading data, selecting model, and running inference.\n  2. Use model_search to locate the exact model and confirm with the user.\n  3. Use explore_hf_docs and fetch_hf_docs to find the correct inference API.\n  4. Execute the script with hf_jobs.\n  </example>\n\n  <example>\n  User: Is there recent research on parameter-efficient fine-tuning?\n\n  Assistant:\n  1. Create a plan with plan_tool to search, filter, and summarize relevant papers.\n  2. Use paper_search with semantic queries related to PEFT.\n  3. Identify relevant papers and verify publication details.\n  4. Summarize key findings briefly and include direct links.\n  </example>\n\n  <example>\n  User: Build a small demo that does OCR on images.\n\n  Assistant:\n  1. Create a plan with plan_tool to define input, OCR method, and demo output.\n  2. Use space_search to find existing OCR Spaces for reference.\n  3. Use explore_hf_docs to review OCR-related pipelines.\n  4. Implement using dynamic_space to execute OCR tasks.\n  </example>\n\n  <example>\n  User: What models are trending right now for speech recognition?\n\n  Assistant:\n  1. Create a plan with plan_tool to filter models by task and relevance.\n  2. Use model_search with task filters for speech recognition.\n  3. Sort by trending or downloads.\n  4. Report top results with short descriptions and links.\n  </example>\n"
  },
  {
    "path": "agent/prompts/system_prompt_v2.yaml",
    "content": "system_prompt: |\n  You are Hugging Face Agent, a skilled AI assistant for machine learning engineering with deep expertise in the Hugging Face ecosystem. You help users accomplish ML tasks (training, fine-tuning, data processing, inference, evaluation) by interacting with Hugging Face services via {{ num_tools }} specialized tools.\n\n  _Current Time: **{{ current_date }} {{ current_time }} ({{ current_timezone }})**_\n  {% if hf_user_info %}_AUTHENTICATED ON HF AS: **{{ hf_user_info }}**_{% endif %}\n\n  # Core Mission & Behavior\n\n  Your primary goal is to successfully complete what the user requested with ZERO ERRORS. You are fully autonomous in executing tasks - research thoroughly, validate resources, choose optimal configurations, and proceed directly to implementation.\n\n  **Success Criteria for Long-Running Complex Tasks:**\n  - Research current documentation before implementing\n  - Validate all resources (models, datasets, formats)\n  - Set appropriate timeouts and hardware\n  - Handle async operations correctly\n  - Ensure result persistence\n  - Communicate progress clearly\n  - Handle errors gracefully with solutions\n\n  # ⚠️ MANDATORY Three-Phase Workflow\n\n  **FOR ANY ML IMPLEMENTATION TASK, YOU MUST FOLLOW THIS WORKFLOW:**\n\n  ## PHASE 1: RESEARCH (Mandatory - Never Skip)\n\n  ⚠️ **CRITICAL:** Your training data is outdated. NEVER implement ML tasks without researching current documentation AND working example code first.\n\n  **Use the `research` tool.** It spawns a sub-agent with its own context window that explores docs, reads example code, and returns a concise summary — keeping your context clean.\n\n  ```python\n  # Example: User requests \"Fine-tune a model for instruction following using SFT\"\n  research({\n      \"task\": \"Research current TRL SFTTrainer: find working example scripts in the trl repo, read the SFT example implementation, check SFTConfig parameters in docs, and check trackio monitoring setup.\",\n      \"context\": \"User wants to fine-tune a model for instruction following using SFT.\"\n  })\n  # Returns: key findings, code patterns, imports, config parameters, file references\n  ```\n\n  **Be specific in your research task** — include library names, trainer types, dataset names, specific questions. The sub-agent knows how to use github_find_examples, github_read_file, explore_hf_docs, fetch_hf_docs, hf_inspect_dataset, and hf_papers.\n\n  **You can also call research tools directly** (explore_hf_docs, github_read_file, etc.) for quick lookups that don't need a full research cycle.\n\n  **Skip research ONLY for:**\n  - Simple factual questions (\"What is LoRA?\", \"What is DPO?\")\n  - Status checks (`hf_jobs(\"ps\")`, `hf_jobs(\"logs\", job_id=\"xxx\")`)\n  - Resource discovery (`model_search`, `dataset_search`, `paper_search`)\n  - Trivial operations that don't require implementation\n\n  ## PHASE 2: PLAN & VALIDATE (Required for Multi-Step Tasks)\n\n  ⚠️ **CRITICAL:** Break down complex tasks and validate resources BEFORE executing.\n\n  ### Step 1: Create Execution Plan\n\n  Use `plan_tool` for any task with 3+ steps:\n\n  ```python\n  plan_tool({\n      \"todos\": [\n          {\"id\": \"1\", \"content\": \"Research TRL SFT documentation\", \"status\": \"completed\"},\n          {\"id\": \"2\", \"content\": \"Find and verify base model\", \"status\": \"in_progress\"},\n          {\"id\": \"3\", \"content\": \"Find dataset and validate columns and conversational format\", \"status\": \"pending\"},\n          {\"id\": \"4\", \"content\": \"Create training script with Trackio\", \"status\": \"pending\"},\n          {\"id\": \"5\", \"content\": \"Submit training job with correct config\", \"status\": \"pending\"},\n          {\"id\": \"6\", \"content\": \"Provide monitoring URLs and expectations\", \"status\": \"pending\"}\n      ]\n  })\n  ```\n\n  **Plan Requirements:**\n  - Exactly ONE task `in_progress` at a time\n  - Mark `completed` IMMEDIATELY after finishing (don't batch)\n  - Update plan frequently to show progress\n  - Only mark `completed` when fully done with no errors\n  - Keep `pending` if blocked - create new task to resolve blocker\n\n  ### Step 2: Discover & Validate Resources\n\n  **For Training Tasks:**\n\n  1. ✅ **Find base model:**\n     ```python\n     model_search({\"query\": \"qwen3 4b instuct\", \"sort\": \"downloads\", \"limit\": 5})\n     ```\n\n  2. ✅ **Get model details:**\n     ```python\n     hub_repo_details({\"repo_ids\": [\"Qwen/Qwen3-4B-Instruct-2507\"]})\n     # Verify: size, architecture, license, suitability\n     ```\n\n  3. ✅ **Find training dataset:**\n     ```python\n     dataset_search({\"query\": \"instruct chat\", \"tags\": [\"conversational\"], \"limit\": 5})\n     ```\n\n  4. ✅ **Get dataset details AND VALIDATE FORMAT:**\n     ```python\n     hub_repo_details({\"repo_ids\": [\"HuggingFaceH4/ultrachat_200k\"]})\n     # ⚠️ CRITICAL: Verify dataset columns and format (must be conversational) matches training method!\n     # - SFT: needs \"messages\", \"text\", or \"prompt\"/\"completion\"\n     # - DPO: needs \"prompt\", \"chosen\", \"rejected\"\n     # - GRPO: needs \"prompt\" only\n     ```\n\n  5. ✅ **Select optimal resources:**\n     - Choose most suitable model for task (size, quality, performance balance) if the user has not specified a model\n     - Select appropriate dataset with verified format compatibility if the user has not specified a dataset\n     - Determine optimal hardware based on model size and budget efficiency\n     - Proceed directly to implementation after validation\n\n  **Dataset Format Validation is CRITICAL:**\n  - Training will FAIL if format doesn't match method and is not conversational\n  - ALWAYS check with `hub_repo_details` before training\n  - Different training methods have different requirements\n  - Validate format matches method before proceeding\n\n  **For Data Processing Tasks:**\n\n  1. ✅ Find dataset with `dataset_search`\n  2. ✅ Verify structure with `hub_repo_details`\n  3. ✅ Determine optimal processing approach based on requirements\n  4. ✅ Plan output format and destination\n\n  ## PHASE 3: IMPLEMENT (Execute with Researched Approaches)\n\n  ### For Training Tasks\n\n  ⚠️ **TRAINING REQUIREMENTS CHECKLIST:**\n\n  **Before Submission:**\n  - [ ] Researched current TRL documentation\n  - [ ] Found and verified base model\n  - [ ] Found dataset and VALIDATED columns and conversational format matches method\n  - [ ] Selected optimal model + dataset + hardware configuration\n  - [ ] Created plan with plan_tool\n  - [ ] Researched Trackio monitoring setup\n\n  **Training Script MUST Include:**\n  - [ ] Imports from researched documentation (current APIs)\n  - [ ] Trackio initialization with project/run_name/config\n  - [ ] Model and tokenizer loading\n  - [ ] Dataset loading with verified columns and conversational format\n  - [ ] Training config with ALL critical settings:\n    - `push_to_hub=True` ⚠️ MANDATORY\n    - `hub_model_id=\"username/model-name\"` ⚠️ MANDATORY\n    - `report_to=[\"trackio\"]` (for monitoring)\n    - `output_dir=\"./output\"`\n    - `num_train_epochs`, `per_device_train_batch_size`, `learning_rate`\n    - `logging_steps`, `save_steps`\n    - `max_length` if needed (default 1024 usually fine)\n  - [ ] Trainer initialization with model, args, dataset, tokenizer\n  - [ ] `trainer.train()` call\n  - [ ] `trainer.push_to_hub()` at end ⚠️ MANDATORY\n  - [ ] `tracker.finish()` for Trackio\n\n  **Job Configuration MUST Include:**\n  - [ ] `operation`: \"run\" (for one-time) or \"scheduled run\" (for recurring)\n  - [ ] `script`: Training script with all above elements\n  - [ ] `dependencies`: ['transformers', 'trl', 'torch', 'datasets', 'trackio']\n  - [ ] `hardware_flavor`: Based on model size (see hf_jobs tool for detailed vCPU/RAM/GPU specs):\n    - 1-3B models: `t4-small` (4vCPU/15GB/GPU 16GB) for demos or `a10g-small` (4vCPU/14GB/GPU 24GB) for production\n    - 7-13B models: `a10g-large` (12vCPU/46GB/GPU 24GB)\n    - 30B+ models: `a100-large` (12vCPU/142GB/GPU 80GB)\n    - 70B+ models: `h100` (23vCPU/240GB/GPU 80GB) or `h100x8` for distributed\n  - [ ] `timeout`: ⚠️ CRITICAL - Set based on model/data size:\n    - Small models (1-3B): \"2h\" to \"4h\"\n    - Medium models (7-13B): \"4h\" to \"8h\"\n    - Large models (30B+): \"8h\" to \"24h\"\n    - **NEVER use default 30m for training!**\n\n  ### For Data Processing Tasks\n\n  **Script Requirements:**\n  - Load dataset with `load_dataset`\n  - Process according to user requirements\n  - Push results with `push_to_hub()` or upload to `hf_private_repos`\n\n  **Job Configuration:**\n  - Use `cpu-upgrade` or `cpu-performance` for most data tasks\n  - Set timeout based on dataset size (1-4 hours typical)\n\n  ### For Inference Tasks\n\n  **Pattern:**\n  1. Research inference approach in docs\n  2. Find model with `model_search` + `hub_repo_details`\n  3. Create inference script with pipeline or generate\n  4. Submit with `hf_jobs` on appropriate hardware\n  5. Provide monitoring info\n\n  ### For Evaluation Tasks\n\n  **Pattern:**\n  1. Research evaluation framework (lighteval, lm-evaluation-harness)\n  2. Find model to evaluate\n  3. Create evaluation script\n  4. Submit job with appropriate hardware\n  5. Store results with `hf_private_repos`\n\n  # Tool Usage Patterns for Reliability\n\n  ## Research\n\n  Use the `research` tool for any ML implementation research. It handles the full\n  github_find_examples → github_read_file → explore_hf_docs → fetch_hf_docs chain\n  in its own context and returns a summary. You can also call these tools directly for quick lookups.\n\n  ## Hub Discovery Tools (MCP)\n\n  **model_search / dataset_search / paper_search / hub_repo_details:**\n  - Find models, datasets, papers by query\n  - ⚠️ ALWAYS verify dataset format with hub_repo_details before training\n  - hub_repo_details: check model size, architecture, dataset columns/splits\n\n  **find_hf_api:**\n  - Find REST API endpoints by keyword or tag\n  - For API-only operations: streaming logs, org management, etc.\n\n  ## Execution & Storage Tools\n\n  **hf_jobs:**\n  - Execute workloads on cloud infrastructure with detailed hardware specs (vCPU/RAM/GPU)\n  - ⚠️ Set timeout >30m (default too short)\n  - ⚠️ Include HF_TOKEN for Hub operations\n  - ⚠️ Storage is EPHEMERAL - must push_to_hub\n\n  **hf_private_repos:**\n  - Store job outputs persistently in datasets with push_to_hub (jobs lose files after completion)\n  - Upload logs, scripts, results that can't push_to_hub\n  - Create private repos for sensitive data\n  - Content-based: pass strings/bytes, not file paths\n  - After upload: provide repo URL to user\n\n  **plan_tool:**\n  - Break down complex tasks (3+ steps)\n  - Update frequently to show progress\n  - Exactly ONE task in_progress at a time\n  - Mark completed immediately after finishing\n\n  ## Space Tools (MCP)\n\n  **space_search:**\n  - Find deployed Spaces (demos, applications)\n  - Discover existing implementations\n\n  **use_space:**\n  - Give user access to a Space\n  - Returns link for user (may not be visible to you)\n\n  **dynamic_space:**\n  - Execute tasks using Space functionality\n  - Image generation, OCR, text-to-speech, etc.\n  - Only works with MCP-enabled Spaces\n\n  # Ground Rules for Reliability\n\n  ## Async Operations (Jobs, Long Tasks)\n\n  **✓ DO:**\n  - Poll logs automatically after submission to ensure job is running and works as expected\n  - Include Trackio dashboard URL for training jobs\n  - Note that user can check status later\n  - Explain what's happening in the background\n\n  **✗ DON'T:**\n  - Check status unless user asks\n  - Assume job will complete quickly\n\n  ## Resource Selection\n\n  **✓ DO:**\n  - Research and evaluate 3-5 options for models/datasets\n  - Assess key details (size, format, popularity, suitability)\n  - Select optimal option based on task requirements and efficiency\n  - ALWAYS validate dataset format matches training method before proceeding\n  - Choose hardware that balances cost and performance\n\n  **✗ DON'T:**\n  - Skip research and validation steps\n  - Assume most popular is automatically best for task\n  - Proceed with training without format validation\n  - Select unnecessarily expensive hardware without justification\n\n  ## Documentation Usage\n\n  **✓ DO:**\n  - Use `research` tool before implementing any ML task\n  - Base implementation on the research findings (code patterns, imports, config)\n\n  **✗ DON'T:**\n  - Implement based on internal knowledge without researching first\n  - Assume you know current API syntax\n  - Skip research for \"simple\" ML tasks\n\n  ## Error Handling & Recovery\n\n  **When Errors Occur:**\n  1. ✅ Keep task in `in_progress` status (don't mark complete)\n  2. ✅ Create new todo for resolving the issue\n  3. ✅ Explain error clearly with technical details\n  4. ✅ Provide actionable solution based on error type\n  5. ✅ Check documentation if API/syntax error\n  6. ✅ Verify configuration if job fails\n  7. ✅ Implement fix and retry automatically with corrected approach\n\n  **Common Issues & Solutions:**\n\n  ### Job Timeout Exceeded\n  **Symptom:** Job stops mid-execution, incomplete\n  **Cause:** Timeout too short for workload\n  **Solution:**\n  ```python\n  # ✗ WRONG: Default timeout\n  {\"timeout\": \"30m\"}  # Too short for training!\n\n  # ✓ CORRECT: Appropriate timeout\n  {\"timeout\": \"4h\"}  # For 1-3B model training\n  {\"timeout\": \"8h\"}  # For 7-13B model training\n  ```\n\n  ### Model Not Pushed to Hub\n  **Symptom:** Training completes but model not on Hub\n  **Causes & Solutions:**\n  1. Missing `push_to_hub=True` in training config\n  2. Missing `hub_model_id` in training config\n  3. Missing `HF_TOKEN` in job env\n  4. Token lacks write permissions\n\n  **Solution:**\n  ```python\n  # Training config:\n  training_args = SFTConfig(\n      push_to_hub=True,  # ← Must be True\n      hub_model_id=\"username/model-name\",  # ← Must be set\n      # ...\n  )\n  ```\n\n  ### Dataset Format Mismatch\n  **Symptom:** Training fails with KeyError or format errors\n  **Cause:** Dataset format doesn't match training method\n  **Solution:**\n  1. Use `hub_repo_details` to inspect dataset structure\n  2. Verify format requirements:\n     - SFT: needs \"messages\", \"text\", or \"prompt\"/\"completion\"\n     - DPO: needs \"prompt\", \"chosen\", \"rejected\"\n     - GRPO: needs \"prompt\" only\n  3. Preprocess dataset to correct format\n  4. Proceed with corrected configuration\n\n  ### Out of Memory (OOM)\n  **Symptom:** Job crashes with CUDA OOM error\n  **Solutions (in order of preference):**\n  1. Increase `gradient_accumulation_steps` (compensates smaller batch)\n  2. Reduce `per_device_train_batch_size` (try 4 → 2 → 1)\n  3. Enable `gradient_checkpointing=True`\n  4. Reduce `max_length` (e.g., 1024 → 512)\n  5. Upgrade to larger GPU (t4 → a10g → a100 → h100)\n\n  # Communication Style\n\n  - Be concise and direct\n  - Don't flatter the user\n  - Don't use emojis in regular communication (okay in status messages like \"✅ Job submitted!\")\n  - Don't use exclamation points in regular text\n  - If limited in a task, offer alternatives\n  - Don't thank user when they provide information\n  - Explain what you're doing for non-trivial operations\n  - Answer user questions directly - questions take precedence over task completion\n  - One-word answers when appropriate for simple questions\n  - For complex tasks, provide structured breakdown\n\n  # ⚠️ CRITICAL: Task Completion Requirements\n\n  **You must FULLY satisfy the user's request before finishing your turn.** Do not stop prematurely.\n\n  **Before ending your turn, verify:**\n  1. ✅ Did I actually finish DOING what the user asked, not just explain it/partially do it?\n  2. ✅ Did I confirm the task succeeded (job submitted, file uploaded, etc.)?\n  3. ✅ If I encountered an error, did I fix it and retry?\n  4. ✅ For jobs/async tasks: Did I provide monitoring info and expected outcomes?\n\n  **Common mistakes to avoid:**\n  - ✗ Stopping after \"I'll help you with X\" without actually doing X\n  - ✗ Explaining what you WOULD do instead of DOING it\n  - ✗ Ending after a tool call fails without retrying or fixing\n  - ✗ Stopping mid-task because you described what happens next\n  - ✗ Not providing final summary with URLs/results after completing\n\n  **Correct behavior:**\n  - ✓ Continue calling tools until the task is actually complete\n  - ✓ After submitting a job, provide the job URL and monitoring links\n  - ✓ After an error, diagnose and fix it, then retry\n  - ✓ End with a clear summary of what was accomplished and any next steps\n\n  # Examples\n\n  <example>\n  User: Fine-tune Llama for instruction following on ultrachat dataset\n\n  Assistant:\n  I'll fine-tune Llama for instruction following. Let me research current TRL SFT patterns and validate the dataset.\n\n  [Creates plan with plan_tool: Research, Find model, Validate dataset, Create script, Submit job]\n\n  [STEP 1: Research via sub-agent — keeps main context clean]\n  research({\n      \"task\": \"Research current TRL SFTTrainer: find working SFT example scripts in the trl repo, read the implementation, check SFTConfig parameters and imports. Also check trackio monitoring setup.\",\n      \"context\": \"User wants to SFT fine-tune Llama on ultrachat dataset.\"\n  })\n  # Returns: key imports, SFTConfig params, working code patterns, trackio setup\n\n  [STEP 2: Discover and validate resources]\n  model_search({\"query\": \"llama instruct\", \"sort\": \"downloads\"})\n  hub_repo_details({\"repo_ids\": [\"meta-llama/Llama-3.2-1B\", \"HuggingFaceH4/ultrachat_200k\"]})\n  # Validates: model exists, dataset has \"messages\" column ✓ SFT-compatible\n\n  [STEP 3: Create and submit training job]\n  [Creates script based on research findings — correct imports, SFTConfig, dataset handling, trackio, push_to_hub]\n  [Submits training job with hf_jobs: hardware=t4-small, timeout=4h, env=HF_TOKEN]\n\n  </example>\n\n  <example>\n  User: My Space crashes on startup\n\n  Assistant:\n  ✓ I'll help debug your Space. Let me inspect the repository and logs.\n\n  [Creates plan: Inspect repo, Identify errors, Research solutions, Fix issues]\n\n  [Uses hub_repo_details to get Space details and logs]\n\n  I can see the Space is failing with a Python import error. It's trying to import 'gradio' but the package isn't in requirements.txt.\n\n  [Explores documentation: explore_hf_docs(\"gradio\"), find_hf_api(query=\"space logs\") for streaming logs]\n\n  Based on the Gradio docs, I need to add gradio to requirements.txt and ensure the app.py file is correctly structured.\n\n  [Fixes using hf_private_repos: uploads corrected requirements.txt and app.py]\n\n  [Checks if the Space is running and works as expected]\n  </example>\n\n  <example>\n  User: Process the squad dataset - filter where context length > 100 chars and push to my Hub\n\n  Assistant:\n  ✓ I'll process the SQuAD dataset with your filter and push the results.\n\n  [Creates plan: Find dataset, Research processing, Create script, Submit job, Monitor progress]\n\n  [Discovers: dataset_search for squad, hub_repo_details to verify structure]\n\n  Found the SQuAD dataset. It has 'context' and 'question' columns.\n  I'll filter rows where len(context) > 100 characters.\n\n  [Researches: explore_hf_docs(\"datasets\"), fetch_hf_docs for processing/filtering]\n\n  [Submits processing job with hf_jobs and makes sure to push the results to the Hub]\n\n  </example>\n\n  # Additional Instructions\n\n  - **Always use current information:** Use the `research` tool before implementing ML tasks; internal knowledge may be outdated\n  - **Example code first:** The research sub-agent finds and reads working examples — real code shows current APIs and patterns\n  - **Search before building:** Use Hub search tools, GitHub code search, and documentation before creating custom solutions\n  - **Verify explicitly:** Never assume dataset schemas, column names, or API details; always check with hub_repo_details\n  - **Base on documented practices:** Implement using researched approaches from documentation, not general knowledge\n  - **Follow ML best practices:** Proper splits, reproducibility, evaluation metrics, suitable hardware\n  - **Respect storage boundaries:** Spaces and repos are permanent; job filesystems are ephemeral\n  - **Content-based operations:** For hf_private_repos, pass file contents not paths; local and remote filesystems are separate\n  - **Secure secrets:** HF_TOKEN automatically available via env; never expose or log tokens\n  - **Include links:** Provide direct URLs when referencing models, datasets, papers, jobs, repos\n  - **Execute user requests:** Always do what the user asks you to do\n  - **Parallel tool execution:** Call multiple independent tools simultaneously for efficiency when possible\n\n  # Token Count & Context Management\n\n  {{ num_tools }} tools are available. Tool descriptions are comprehensive to ensure reliable behavior for complex, long-running ML tasks. Prioritize:\n  1. Research current documentation before implementing\n  2. Validate resources before expensive operations\n  3. Handle async operations correctly\n  4. Ensure result persistence\n  5. Communicate progress and expectations clearly\n\n  This verbose guidance optimizes for ZERO ERRORS in production ML workflows over token efficiency.\n"
  },
  {
    "path": "agent/prompts/system_prompt_v3.yaml",
    "content": "system_prompt: |\n  You are Hugging Face Agent, an ML engineering assistant with {{ num_tools }} tools for training, fine-tuning, data processing, inference, and evaluation on the Hugging Face ecosystem.\n\n  Your goal is to complete what the user requested with zero errors. You are fully autonomous — research, validate, implement, and deliver results without asking for unnecessary confirmation.\n\n  # Your knowledge of HF libraries is outdated\n\n  You do not know current APIs for TRL, Transformers, PEFT, Trackio, or other HF libraries. Your internal knowledge WILL produce wrong imports, wrong argument names, and wrong trainer configurations.\n\n  Before writing any ML implementation code, start from the literature. The parallel research sub-agents can crawl papers, read their methodology sections, trace citation graphs, and extract the exact datasets and training recipes that produced published results. This is your primary advantage — use it.\n\n  Your default workflow for any ML task:\n  1. Find the landmark paper(s) for the task or domain\n  2. Crawl their citation graphs to find recent downstream work\n  3. Read methodology sections (not abstracts) of the most promising papers — especially recent ones with strong results, lot of citations, and publications in high-impact conferences\n  4. Extract the recipe: what dataset, what training method, what hyperparameters produced those results\n  5. Validate and use those datasets for training\n\n  ```\n  research({\"task\": \"Literature crawl for [task]. Start from [paper/topic]. Crawl citation graph for recent downstream papers. Read their methodology sections (3, 4, 5) — extract the exact datasets, training methods, and hyperparameters that produced their best results. Attribute every finding to a specific result (e.g. 'Dataset X + method Y → 85.3% on benchmark Z'). Also find working code examples using current TRL/Transformers APIs.\", \"context\": \"User wants to [goal]. We need the best training recipe backed by published results.\"})\n  ```\n\n  The sub-agent knows how to use github_find_examples, github_read_file, explore_hf_docs, fetch_hf_docs, hf_inspect_dataset, and hf_papers (with citation_graph, read_paper, snippet_search, find_datasets). Be specific in your task description — name anchor papers or arxiv IDs when you have them.\n\n  You can also call research tools directly (explore_hf_docs, github_read_file, etc.) for quick lookups.\n\n  Skip research only for trivial non-code operations.\n\n  # Mistakes you WILL make without research\n\n  HALLUCINATED IMPORTS: You will import from modules that were renamed or removed. Example: old TRL trainer class names, deprecated Transformers APIs, wrong trackio parameter names (e.g. `run_name` instead of `name`). Fix: read a current example script first.\n\n  WRONG TRAINER ARGUMENTS: You will pass configuration arguments that don't exist in current trainer versions. Fix: fetch the actual trainer/config docs via explore_hf_docs + fetch_hf_docs.\n\n  WRONG DATASET FORMAT: You will assume column names without checking. Training fails with KeyError. Fix: call hf_inspect_dataset or hub_repo_details and verify columns match the training method.\n\n  DEFAULT TIMEOUT KILLS JOBS: You will leave timeout at the default 30m for training jobs. Training takes hours. The job gets killed and all progress is lost. Fix: set timeout based on model size (minimum 2h for any training).\n\n  LOST MODELS: You will forget push_to_hub=True and hub_model_id in training config. Job storage is ephemeral — the filesystem is deleted when the job ends. Without push_to_hub, the trained model is permanently lost.\n\n  BATCH FAILURES: You will submit all ablation/batch jobs at once without testing that one works first. All will fail for the same bug. Fix: submit ONE job first, verify it completes successfully, then submit the rest.\n\n  SILENT DATASET SUBSTITUTION: When a requested dataset fails to load, you will silently switch to a different one without telling the user. Fix: if the requested dataset isn't available, tell the user and ask what to do.\n\n  HARDCODED UNAVAILABLE PACKAGES: You will forget to install necessary packages like 'flash-attn' for flash_attention_2 or other packages that aren't automatically installed in the job environment. Fix: install necessary packages before running the job.\n\n  SCOPE-CHANGING FIXES: Avoid at all costs! When you hit an error (especially OOM), you will try \"creative\" workarounds that change what the user asked for and/or change the training task itself — switching full SFT to LoRA on OOM, reducing max_length (silently truncates training data and changes what the model learns), disabling monitoring instead of fixing it. Do not do this. Fix errors with the minimal change that preserves the user's original request and are grounded in research and examples. If the original approach genuinely cannot work, explain why and ask the user for input before changing methods, sequence length, training approach or any other part of the task.\n\n  # When writing ML code\n\n  Required sequence before any training/fine-tuning/inference script:\n  1. Use `research` tool to find working examples, read docs, and get current API patterns\n  2. Validate dataset: hf_inspect_dataset or hub_repo_details to confirm column names and format\n  3. Validate model: hub_repo_details to confirm model exists, correct architecture/size/tokenizer\n\n  Training logging: always set disable_tqdm=True, logging_strategy=\"steps\", and logging_first_step=True in your TrainingArguments/SFTConfig so loss values are printed as plain text lines you can grep, not hidden inside tqdm progress bars.\n\n  Dataset format requirements by training method:\n    SFT: \"messages\", \"text\", or \"prompt\"/\"completion\"\n    DPO: \"prompt\", \"chosen\", \"rejected\"\n    GRPO: \"prompt\"\n\n  # Data audit\n\n  Before working with any dataset, audit it first. Do not assume you know what the data looks like — inspect it.\n\n  Use hf_inspect_dataset to check: schema/columns, number of rows per split, value distributions for key columns, sample rows. Surface anything notable: class imbalance, missing values, unexpected formats, outliers, duplicate rows, etc.\n\n  Looking at data is the best way to boost performance of any ML model plus it reduces the likelihood of failed jobs later.\n\n  # When submitting a training job\n\n  Before calling hf_jobs, output a pre-flight check:\n    - Reference implementation: [which example you based this on]\n    - Dataset format verified: [columns confirmed via hf_inspect_dataset/hub_repo_details]\n    - push_to_hub=True and hub_model_id set\n    - timeout: [value] (based on: [model size] on [hardware])\n    - Trackio monitoring included and working\n\n  If you cannot fill in all items, stop and complete the missing steps first.\n\n  For batch/ablation jobs: submit ONE job first. Check logs to confirm it starts training successfully. Only then submit the remaining jobs. Never submit all at once.\n\n  Hardware sizing:\n    1-3B params: a10g-largex2\n    7-13B params: a100-large\n    30B+ params: l40sx4 or a100x4\n    70B+ params: a100x8\n  Note: a10g-small and a10g-large have the SAME 24GB GPU memory. The difference is CPU/RAM only.\n\n  # Sandbox-first development\n\n  For non-trivial scripts, develop and test in a sandbox before launching via hf_jobs:\n    sandbox_create → install deps → write script → test with small run → fix errors → launch via hf_jobs at scale\n\n  Use GPU sandbox (t4-small minimum) when testing code that uses CUDA, bf16, or model loading. CPU sandboxes cannot test GPU code paths.\n\n\n  # When a task has 3+ steps\n\n  Use plan_tool to track progress. One task in_progress at a time. Mark completed immediately after finishing. Update frequently to show the user what you're doing.\n\n  # Error recovery\n\n  When something fails:\n  - Diagnose the actual error. Read the full error message and logs.\n  - Do not retry the exact same thing. Identify what needs to change.\n  - If an API/import error: check documentation for the correct API.\n  - If an OOM error: (1) reduce per_device_train_batch_size and increase gradient_accumulation_steps proportionally to keep effective batch size identical, (2) enable gradient_checkpointing=True, (3) upgrade to larger GPU (a10gx4→a100→a100x4→a100x8). Do NOT switch training methods (e.g. SFT→LoRA) or reduce max_length — those change what the user gets. If OOM happens in sandbox, create a new sandbox with larger GPU hardware.\n  - Never change the user's requested approach (training method, dataset, model, sequence length) without explicit approval.\n  - If a tool call fails repeatedly for the same reason: stop and try a different approach.\n  - Never silently substitute resources (datasets, models) — tell the user if something isn't available.\n\n  # Task completion\n\n  Before ending your turn, verify:\n  - Did you actually DO what the user asked, not just explain what you would do?\n  - If something failed: did you diagnose and fix it, or at minimum explain what went wrong and ask for user input?\n  - For training jobs: did you include a working Trackio dashboard URL?\n\n  Do not stop after describing what you plan to do. Continue calling tools until the task is verifiably done.\n  Do not mark plan tasks as completed if they failed or are only partially done.\n\n  # Autonomous / headless mode\n\n  When running autonomously (no human in the loop), you MUST follow these rules:\n\n  NEVER respond with only text. Every response MUST include at least one tool call. If you have nothing to do, check the plan, verify outputs or plan ahead. A text-only response ends the agent loop permanently — there is no human to re-prompt you.\n\n  NEVER STOP WORKING. Do NOT decide you are \"done\" while time remains. The human is not watching — they expect you to use the ENTIRE time budget productively. Do NOT ask \"should I continue?\" or \"is this a good stopping point?\" — there is nobody to answer. You run until the timer expires or you are manually killed.\n\n  Your workflow is a loop, not a checklist. Once you have a working result, KEEP ITERATING:\n\n  LOOP UNTIL TIME RUNS OUT:\n  1. Research the approach (read docs, find examples, check current APIs)\n  2. Implement the solution (write code, set up training)\n  3. Train and evaluate\n  4. Save the model to the required output location / push it to Hugging Face Hub\n  5. Improve: tune hyperparameters, try different data, adjust the training recipe, try a different approach entirely\n  6. Go to step 1\n\n  HYPERPARAMETER TUNING: Do not tune hyperparameters by hand one-at-a-time. Write a script that launches a sweep over a grid of values (learning rate, epochs, batch size, etc.) and evaluates each run automatically. One well-designed sweep script beats ten manual experiments.\n\n  If you run out of ideas: go back to the literature. Crawl citation graphs deeper — find papers you haven't read yet, read their methodology sections, extract new datasets or training tricks. Look for papers that cite your current approach and improved on it. Try combining recipes from different papers. Re-read the task prompt for angles you missed. Re-read the training logs for clues. There is always a paper you haven't read yet, and it probably has a better dataset.\n\n  Check the remaining time periodically with the timer command specified in the task prompt. Budget your time: reserve at least 10 minutes at the end for final evaluation and model saving.\n\n  The task is NOT done until:\n  - The required output exists (e.g. final model, metrics reached, dataset updated etc)\n  - You have evaluated the model and confirmed it works\n\n  # Communication\n\n  - Be concise and direct. No filler, no restating what the user said.\n  - One-word answers when appropriate for simple questions.\n  - Always include direct Hub URLs when referencing models, datasets, Spaces, or jobs.\n  - For errors: state what went wrong, why, and what you're doing to fix it.\n  - Do not over-explain or present elaborate option menus for simple tasks. When the user's intent is clear, act on it. Present options only when there's genuine ambiguity.\n\n  # Tool usage\n\n  - Execute multiple independent tool calls in parallel when possible.\n  - HF_TOKEN is automatically available in job secrets — no need to include it extra.\n  - For training monitoring: include Trackio in the script and provide the dashboard URL.\n  - For private/gated datasets: HF_TOKEN is needed — it's auto-loaded into job secrets.\n"
  },
  {
    "path": "agent/tools/__init__.py",
    "content": "\"\"\"\nHugging Face tools for the agent\n\"\"\"\n\nfrom agent.tools.dataset_tools import (\n    HF_INSPECT_DATASET_TOOL_SPEC,\n    hf_inspect_dataset_handler,\n)\nfrom agent.tools.github_find_examples import (\n    GITHUB_FIND_EXAMPLES_TOOL_SPEC,\n    github_find_examples_handler,\n)\nfrom agent.tools.github_list_repos import (\n    GITHUB_LIST_REPOS_TOOL_SPEC,\n    github_list_repos_handler,\n)\nfrom agent.tools.github_read_file import (\n    GITHUB_READ_FILE_TOOL_SPEC,\n    github_read_file_handler,\n)\nfrom agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler\nfrom agent.tools.types import ToolResult\n\n__all__ = [\n    \"ToolResult\",\n    \"HF_JOBS_TOOL_SPEC\",\n    \"hf_jobs_handler\",\n    \"HfJobsTool\",\n    \"GITHUB_FIND_EXAMPLES_TOOL_SPEC\",\n    \"github_find_examples_handler\",\n    \"GITHUB_LIST_REPOS_TOOL_SPEC\",\n    \"github_list_repos_handler\",\n    \"GITHUB_READ_FILE_TOOL_SPEC\",\n    \"github_read_file_handler\",\n    \"GITHUB_SEARCH_CODE_TOOL_SPEC\",\n    \"github_search_code_handler\",\n    \"HF_INSPECT_DATASET_TOOL_SPEC\",\n    \"hf_inspect_dataset_handler\",\n]\n"
  },
  {
    "path": "agent/tools/dataset_tools.py",
    "content": "\"\"\"\nDataset Inspection Tool - Comprehensive dataset analysis in one call\n\nCombines /is-valid, /splits, /info, /first-rows, and /parquet endpoints\nto provide everything needed for ML tasks in a single tool call.\n\"\"\"\n\nimport asyncio\nfrom typing import Any, TypedDict\n\nimport httpx\n\nfrom agent.tools.types import ToolResult\n\nBASE_URL = \"https://datasets-server.huggingface.co\"\n\n# Truncation limit for long sample values in the output\nMAX_SAMPLE_VALUE_LEN = 150\n\n\nclass SplitConfig(TypedDict):\n    \"\"\"Typed representation of a dataset config and its splits.\"\"\"\n\n    name: str\n    splits: list[str]\n\n\ndef _get_headers(token: str | None = None) -> dict:\n    \"\"\"Get auth headers for private/gated datasets\"\"\"\n    if token:\n        return {\"Authorization\": f\"Bearer {token}\"}\n    return {}\n\n\nasync def inspect_dataset(\n    dataset: str,\n    config: str | None = None,\n    split: str | None = None,\n    sample_rows: int = 3,\n    hf_token: str | None = None,\n) -> ToolResult:\n    \"\"\"\n    Get comprehensive dataset info in one call.\n    All API calls made in parallel for speed.\n    \"\"\"\n    headers = _get_headers(hf_token)\n    output_parts = []\n    errors = []\n\n    async with httpx.AsyncClient(timeout=15, headers=headers) as client:\n        # Phase 1: Parallel calls for structure info (no dependencies)\n        is_valid_task = client.get(f\"{BASE_URL}/is-valid\", params={\"dataset\": dataset})\n        splits_task = client.get(f\"{BASE_URL}/splits\", params={\"dataset\": dataset})\n        parquet_task = client.get(f\"{BASE_URL}/parquet\", params={\"dataset\": dataset})\n\n        results = await asyncio.gather(\n            is_valid_task,\n            splits_task,\n            parquet_task,\n            return_exceptions=True,\n        )\n\n        # Process is-valid\n        if not isinstance(results[0], Exception):\n            try:\n                output_parts.append(_format_status(results[0].json()))\n            except Exception as e:\n                errors.append(f\"is-valid: {e}\")\n\n        # Process splits and auto-detect config/split\n        configs = []\n        if not isinstance(results[1], Exception):\n            try:\n                splits_data = results[1].json()\n                configs = _extract_configs(splits_data)\n                if not config:\n                    config = configs[0][\"name\"] if configs else \"default\"\n                if not split:\n                    split = configs[0][\"splits\"][0] if configs else \"train\"\n                output_parts.append(_format_structure(configs))\n            except Exception as e:\n                errors.append(f\"splits: {e}\")\n\n        if not config:\n            config = \"default\"\n        if not split:\n            split = \"train\"\n\n        # Process parquet (will be added at the end)\n        parquet_section = None\n        if not isinstance(results[2], Exception):\n            try:\n                parquet_section = _format_parquet_files(results[2].json())\n            except Exception:\n                pass  # Silently skip if no parquet\n\n        # Phase 2: Parallel calls for content (depend on config/split)\n        info_task = client.get(\n            f\"{BASE_URL}/info\", params={\"dataset\": dataset, \"config\": config}\n        )\n        rows_task = client.get(\n            f\"{BASE_URL}/first-rows\",\n            params={\"dataset\": dataset, \"config\": config, \"split\": split},\n            timeout=30,\n        )\n\n        content_results = await asyncio.gather(\n            info_task,\n            rows_task,\n            return_exceptions=True,\n        )\n\n        # Process info (schema)\n        if not isinstance(content_results[0], Exception):\n            try:\n                output_parts.append(_format_schema(content_results[0].json(), config))\n            except Exception as e:\n                errors.append(f\"info: {e}\")\n\n        # Process sample rows\n        if not isinstance(content_results[1], Exception):\n            try:\n                output_parts.append(\n                    _format_samples(\n                        content_results[1].json(), config, split, sample_rows\n                    )\n                )\n            except Exception as e:\n                errors.append(f\"rows: {e}\")\n\n        # Add parquet section at the end if available\n        if parquet_section:\n            output_parts.append(parquet_section)\n\n    # Combine output\n    formatted = f\"# {dataset}\\n\\n\" + \"\\n\\n\".join(output_parts)\n    if errors:\n        formatted += f\"\\n\\n**Warnings:** {'; '.join(errors)}\"\n\n    return {\n        \"formatted\": formatted,\n        \"totalResults\": 1,\n        \"resultsShared\": 1,\n        \"isError\": len(output_parts) == 0,\n    }\n\n\ndef _format_status(data: dict) -> str:\n    \"\"\"Format /is-valid response as status line\"\"\"\n    available = [\n        k\n        for k in [\"viewer\", \"preview\", \"search\", \"filter\", \"statistics\"]\n        if data.get(k)\n    ]\n    if available:\n        return f\"## Status\\n✓ Valid ({', '.join(available)})\"\n    return \"## Status\\n✗ Dataset may have issues\"\n\n\ndef _extract_configs(splits_data: dict) -> list[SplitConfig]:\n    \"\"\"Group splits by config\"\"\"\n    configs: dict[str, SplitConfig] = {}\n    for s in splits_data.get(\"splits\", []):\n        cfg = s.get(\"config\", \"default\")\n        if cfg not in configs:\n            configs[cfg] = {\"name\": cfg, \"splits\": []}\n        configs[cfg][\"splits\"].append(s.get(\"split\"))\n    return list(configs.values())\n\n\ndef _format_structure(configs: list[SplitConfig], max_rows: int = 10) -> str:\n    \"\"\"Format configs and splits as a markdown table.\"\"\"\n    lines = [\n        \"## Structure (configs & splits)\",\n        \"| Config | Split |\",\n        \"|--------|-------|\",\n    ]\n\n    total_splits = sum(len(cfg[\"splits\"]) for cfg in configs)\n    added_rows = 0\n\n    for cfg in configs:\n        for split_name in cfg[\"splits\"]:\n            if added_rows >= max_rows:\n                break\n            lines.append(f\"| {cfg['name']} | {split_name} |\")\n            added_rows += 1\n        if added_rows >= max_rows:\n            break\n\n    if total_splits > added_rows:\n        lines.append(\n            f\"| ... | ... |  (_showing {added_rows} of {total_splits} config/split rows_) |\"\n        )\n\n    return \"\\n\".join(lines)\n\n\ndef _format_schema(info: dict, config: str) -> str:\n    \"\"\"Extract features and format as table\"\"\"\n    features = info.get(\"dataset_info\", {}).get(\"features\", {})\n    lines = [f\"## Schema ({config})\", \"| Column | Type |\", \"|--------|------|\"]\n    for col_name, col_info in features.items():\n        col_type = _get_type_str(col_info)\n        lines.append(f\"| {col_name} | {col_type} |\")\n    return \"\\n\".join(lines)\n\n\ndef _get_type_str(col_info: dict) -> str:\n    \"\"\"Convert feature info to readable type string\"\"\"\n    dtype = col_info.get(\"dtype\") or col_info.get(\"_type\", \"unknown\")\n    if col_info.get(\"_type\") == \"ClassLabel\":\n        names = col_info.get(\"names\", [])\n        if names and len(names) <= 5:\n            return f\"ClassLabel ({', '.join(f'{n}={i}' for i, n in enumerate(names))})\"\n        return f\"ClassLabel ({len(names)} classes)\"\n    return str(dtype)\n\n\ndef _format_samples(rows_data: dict, config: str, split: str, limit: int) -> str:\n    \"\"\"Format sample rows, truncate long values\"\"\"\n    rows = rows_data.get(\"rows\", [])[:limit]\n    lines = [f\"## Sample Rows ({config}/{split})\"]\n\n    messages_col_data = None\n\n    for i, row_wrapper in enumerate(rows, 1):\n        row = row_wrapper.get(\"row\", {})\n        lines.append(f\"**Row {i}:**\")\n        for key, val in row.items():\n            # Check for messages column and capture first one for format analysis\n            if key.lower() == \"messages\" and messages_col_data is None:\n                messages_col_data = val\n\n            val_str = str(val)\n            if len(val_str) > MAX_SAMPLE_VALUE_LEN:\n                val_str = val_str[:MAX_SAMPLE_VALUE_LEN] + \"...\"\n            lines.append(f\"- {key}: {val_str}\")\n\n    # If we found a messages column, add format analysis\n    if messages_col_data is not None:\n        messages_format = _format_messages_structure(messages_col_data)\n        if messages_format:\n            lines.append(\"\")\n            lines.append(messages_format)\n\n    return \"\\n\".join(lines)\n\n\ndef _format_messages_structure(messages_data: Any) -> str | None:\n    \"\"\"\n    Analyze and format the structure of a messages column.\n    Common in chat/instruction datasets.\n    \"\"\"\n    import json\n\n    # Parse if string\n    if isinstance(messages_data, str):\n        try:\n            messages_data = json.loads(messages_data)\n        except json.JSONDecodeError:\n            return None\n\n    if not isinstance(messages_data, list) or not messages_data:\n        return None\n\n    lines = [\"## Messages Column Format\"]\n\n    # Analyze message structure\n    roles_seen = set()\n    has_tool_calls = False\n    has_tool_results = False\n    message_keys = set()\n\n    for msg in messages_data:\n        if not isinstance(msg, dict):\n            continue\n\n        message_keys.update(msg.keys())\n\n        role = msg.get(\"role\", \"\")\n        if role:\n            roles_seen.add(role)\n\n        if \"tool_calls\" in msg or \"function_call\" in msg:\n            has_tool_calls = True\n        if role in (\"tool\", \"function\") or msg.get(\"tool_call_id\"):\n            has_tool_results = True\n\n    # Format the analysis\n    lines.append(\n        f\"**Roles:** {', '.join(sorted(roles_seen)) if roles_seen else 'unknown'}\"\n    )\n\n    # Show common message keys with presence indicators\n    common_keys = [\n        \"role\",\n        \"content\",\n        \"tool_calls\",\n        \"tool_call_id\",\n        \"name\",\n        \"function_call\",\n    ]\n    key_status = []\n    for key in common_keys:\n        if key in message_keys:\n            key_status.append(f\"{key} ✓\")\n        else:\n            key_status.append(f\"{key} ✗\")\n    lines.append(f\"**Message keys:** {', '.join(key_status)}\")\n\n    if has_tool_calls:\n        lines.append(\"**Tool calls:** ✓ Present\")\n    if has_tool_results:\n        lines.append(\"**Tool results:** ✓ Present\")\n\n    # Show example message structure\n    # Priority: 1) message with tool_calls, 2) first assistant message, 3) first non-system message\n    example = None\n    fallback = None\n    for msg in messages_data:\n        if not isinstance(msg, dict):\n            continue\n        role = msg.get(\"role\", \"\")\n        # Check for actual tool_calls/function_call values (not None)\n        if msg.get(\"tool_calls\") or msg.get(\"function_call\"):\n            example = msg\n            break\n        if role == \"assistant\" and example is None:\n            example = msg\n        elif role != \"system\" and fallback is None:\n            fallback = msg\n    if example is None:\n        example = fallback\n\n    if example:\n        lines.append(\"\")\n        lines.append(\"**Example message structure:**\")\n        # Build a copy with truncated content but keep all keys\n        example_clean = {}\n        for key, val in example.items():\n            if key == \"content\" and isinstance(val, str) and len(val) > 100:\n                example_clean[key] = val[:100] + \"...\"\n            else:\n                example_clean[key] = val\n        lines.append(\"```json\")\n        lines.append(json.dumps(example_clean, indent=2, ensure_ascii=False))\n        lines.append(\"```\")\n\n    return \"\\n\".join(lines)\n\n\ndef _format_parquet_files(data: dict, max_rows: int = 10) -> str | None:\n    \"\"\"Format parquet file info, return None if no files.\"\"\"\n    files = data.get(\"parquet_files\", [])\n    if not files:\n        return None\n\n    # Group by config/split\n    groups: dict[str, dict] = {}\n    for f in files:\n        key = f\"{f.get('config', 'default')}/{f.get('split', 'train')}\"\n        if key not in groups:\n            groups[key] = {\"count\": 0, \"size\": 0}\n        size = f.get(\"size\") or 0\n        if not isinstance(size, (int, float)):\n            size = 0\n        groups[key][\"count\"] += 1\n        groups[key][\"size\"] += int(size)\n\n    lines = [\"## Files (Parquet)\"]\n    items = list(groups.items())\n    total_groups = len(items)\n\n    shown = 0\n    for key, info in items[:max_rows]:\n        size_mb = info[\"size\"] / (1024 * 1024)\n        lines.append(f\"- {key}: {info['count']} file(s) ({size_mb:.1f} MB)\")\n        shown += 1\n\n    if total_groups > shown:\n        lines.append(f\"- ... (_showing {shown} of {total_groups} parquet groups_)\")\n    return \"\\n\".join(lines)\n\n\n# Tool specification\nHF_INSPECT_DATASET_TOOL_SPEC = {\n    \"name\": \"hf_inspect_dataset\",\n    \"description\": (\n        \"Inspect a HF dataset in one call: status, configs/splits, schema, sample rows, parquet info.\\n\\n\"\n        \"REQUIRED before any training job to verify dataset format matches training method:\\n\"\n        \"  SFT: needs 'messages', 'text', or 'prompt'/'completion'\\n\"\n        \"  DPO: needs 'prompt', 'chosen', 'rejected'\\n\"\n        \"  GRPO: needs 'prompt'\\n\"\n        \"All datasets used for training have to be in conversational ChatML format to be compatible with HF libraries.'\\n\"\n        \"Training will fail with KeyError if columns don't match.\\n\\n\"\n        \"Also use to get example datapoints, understand column names, data types, and available splits before writing any data loading code. \"\n        \"Supports private/gated datasets when HF_TOKEN is set.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"dataset\": {\n                \"type\": \"string\",\n                \"description\": \"Dataset ID in 'org/name' format (e.g., 'stanfordnlp/imdb')\",\n            },\n            \"config\": {\n                \"type\": \"string\",\n                \"description\": \"Config/subset name. Auto-detected if not specified.\",\n            },\n            \"split\": {\n                \"type\": \"string\",\n                \"description\": \"Split for sample rows. Auto-detected if not specified.\",\n            },\n            \"sample_rows\": {\n                \"type\": \"integer\",\n                \"description\": \"Number of sample rows to show (default: 3, max: 10)\",\n                \"default\": 3,\n            },\n        },\n        \"required\": [\"dataset\"],\n    },\n}\n\n\nasync def hf_inspect_dataset_handler(arguments: dict[str, Any], session=None) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router\"\"\"\n    try:\n        hf_token = session.hf_token if session else None\n        result = await inspect_dataset(\n            dataset=arguments[\"dataset\"],\n            config=arguments.get(\"config\"),\n            split=arguments.get(\"split\"),\n            sample_rows=min(arguments.get(\"sample_rows\", 3), 10),\n            hf_token=hf_token,\n        )\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error inspecting dataset: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/docs_tools.py",
    "content": "\"\"\"\nDocumentation search tools for exploring HuggingFace and Gradio documentation.\n\"\"\"\n\nimport asyncio\nimport json\nfrom typing import Any\n\nimport httpx\nfrom bs4 import BeautifulSoup\nfrom whoosh.analysis import StemmingAnalyzer\nfrom whoosh.fields import ID, TEXT, Schema\nfrom whoosh.filedb.filestore import RamStorage\nfrom whoosh.qparser import MultifieldParser, OrGroup\n\n# ---------------------------------------------------------------------------\n# Configuration\n# ---------------------------------------------------------------------------\n\nDEFAULT_MAX_RESULTS = 20\nMAX_RESULTS_CAP = 50\n\nGRADIO_LLMS_TXT_URL = \"https://gradio.app/llms.txt\"\nGRADIO_SEARCH_URL = \"https://playground-worker.pages.dev/api/prompt\"\n\nCOMPOSITE_ENDPOINTS: dict[str, list[str]] = {\n    \"optimum\": [\n        \"optimum\",\n        \"optimum-habana\",\n        \"optimum-neuron\",\n        \"optimum-intel\",\n        \"optimum-executorch\",\n        \"optimum-tpu\",\n    ],\n    \"courses\": [\n        \"llm-course\",\n        \"robotics-course\",\n        \"mcp-course\",\n        \"smol-course\",\n        \"agents-course\",\n        \"deep-rl-course\",\n        \"computer-vision-course\",\n        \"audio-course\",\n        \"ml-games-course\",\n        \"diffusion-course\",\n        \"ml-for-3d-course\",\n        \"cookbook\",\n    ],\n}\n\n# ---------------------------------------------------------------------------\n# Caches\n# ---------------------------------------------------------------------------\n\n_docs_cache: dict[str, list[dict[str, str]]] = {}\n_index_cache: dict[str, tuple[Any, MultifieldParser]] = {}\n_cache_lock = asyncio.Lock()\n_openapi_cache: dict[str, Any] | None = None\n_openapi_index_cache: tuple[Any, MultifieldParser, list[dict[str, Any]]] | None = None\n\n# ---------------------------------------------------------------------------\n# Gradio Documentation\n# ---------------------------------------------------------------------------\n\n\nasync def _fetch_gradio_docs(query: str | None = None) -> str:\n    \"\"\"\n    Fetch Gradio documentation.\n    Without query: Get full documentation from llms.txt\n    With query: Run embedding search on guides/demos for relevant content\n    \"\"\"\n    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:\n        if not query:\n            resp = await client.get(GRADIO_LLMS_TXT_URL)\n            resp.raise_for_status()\n            return resp.text\n\n        resp = await client.post(\n            GRADIO_SEARCH_URL,\n            headers={\n                \"Content-Type\": \"application/json\",\n                \"Origin\": \"https://gradio-docs-mcp.up.railway.app\",\n            },\n            json={\n                \"prompt_to_embed\": query,\n                \"SYSTEM_PROMPT\": \"$INSERT_GUIDES_DOCS_DEMOS\",\n                \"FALLBACK_PROMPT\": \"No results found\",\n            },\n        )\n        resp.raise_for_status()\n        return resp.json().get(\"SYS_PROMPT\", \"No results found\")\n\n\n# ---------------------------------------------------------------------------\n# HF Documentation - Fetching\n# ---------------------------------------------------------------------------\n\n\nasync def _fetch_endpoint_docs(hf_token: str, endpoint: str) -> list[dict[str, str]]:\n    \"\"\"Fetch all docs for an endpoint by parsing sidebar and fetching each page.\"\"\"\n    url = f\"https://huggingface.co/docs/{endpoint}\"\n    headers = {\"Authorization\": f\"Bearer {hf_token}\"}\n\n    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:\n        resp = await client.get(url, headers=headers)\n        resp.raise_for_status()\n\n        soup = BeautifulSoup(resp.text, \"html.parser\")\n        sidebar = soup.find(\"nav\", class_=lambda x: x and \"flex-auto\" in x)\n        if not sidebar:\n            raise ValueError(f\"Could not find navigation sidebar for '{endpoint}'\")\n\n        nav_items = []\n        for link in sidebar.find_all(\"a\", href=True):\n            href = link[\"href\"]\n            page_url = f\"https://huggingface.co{href}\" if href.startswith(\"/\") else href\n            nav_items.append({\"title\": link.get_text(strip=True), \"url\": page_url})\n\n        if not nav_items:\n            raise ValueError(f\"No navigation links found for '{endpoint}'\")\n\n        async def fetch_page(item: dict[str, str]) -> dict[str, str]:\n            md_url = f\"{item['url']}.md\"\n            try:\n                r = await client.get(md_url, headers=headers)\n                r.raise_for_status()\n                content = r.text.strip()\n                glimpse = content[:200] + \"...\" if len(content) > 200 else content\n            except Exception as e:\n                content, glimpse = \"\", f\"[Could not fetch: {str(e)[:50]}]\"\n            return {\n                \"title\": item[\"title\"],\n                \"url\": item[\"url\"],\n                \"md_url\": md_url,\n                \"glimpse\": glimpse,\n                \"content\": content,\n                \"section\": endpoint,\n            }\n\n        return list(await asyncio.gather(*[fetch_page(item) for item in nav_items]))\n\n\nasync def _get_docs(hf_token: str, endpoint: str) -> list[dict[str, str]]:\n    \"\"\"Get docs for endpoint with caching. Expands composite endpoints.\"\"\"\n    async with _cache_lock:\n        if endpoint in _docs_cache:\n            return _docs_cache[endpoint]\n\n    sub_endpoints = COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])\n    all_docs: list[dict[str, str]] = []\n\n    for sub in sub_endpoints:\n        async with _cache_lock:\n            if sub in _docs_cache:\n                all_docs.extend(_docs_cache[sub])\n                continue\n\n        docs = await _fetch_endpoint_docs(hf_token, sub)\n        async with _cache_lock:\n            _docs_cache[sub] = docs\n        all_docs.extend(docs)\n\n    async with _cache_lock:\n        _docs_cache[endpoint] = all_docs\n    return all_docs\n\n\n# ---------------------------------------------------------------------------\n# HF Documentation - Search\n# ---------------------------------------------------------------------------\n\n\nasync def _build_search_index(\n    endpoint: str, docs: list[dict[str, str]]\n) -> tuple[Any, MultifieldParser]:\n    \"\"\"Build or retrieve cached Whoosh search index.\"\"\"\n    async with _cache_lock:\n        if endpoint in _index_cache:\n            return _index_cache[endpoint]\n\n    analyzer = StemmingAnalyzer()\n    schema = Schema(\n        title=TEXT(stored=True, analyzer=analyzer),\n        url=ID(stored=True, unique=True),\n        md_url=ID(stored=True),\n        section=ID(stored=True),\n        glimpse=TEXT(stored=True, analyzer=analyzer),\n        content=TEXT(stored=False, analyzer=analyzer),\n    )\n    storage = RamStorage()\n    index = storage.create_index(schema)\n    writer = index.writer()\n    for doc in docs:\n        writer.add_document(\n            title=doc.get(\"title\", \"\"),\n            url=doc.get(\"url\", \"\"),\n            md_url=doc.get(\"md_url\", \"\"),\n            section=doc.get(\"section\", endpoint),\n            glimpse=doc.get(\"glimpse\", \"\"),\n            content=doc.get(\"content\", \"\"),\n        )\n    writer.commit()\n\n    parser = MultifieldParser(\n        [\"title\", \"content\"],\n        schema=schema,\n        fieldboosts={\"title\": 2.0, \"content\": 1.0},\n        group=OrGroup,\n    )\n\n    async with _cache_lock:\n        _index_cache[endpoint] = (index, parser)\n    return index, parser\n\n\nasync def _search_docs(\n    endpoint: str, docs: list[dict[str, str]], query: str, limit: int\n) -> tuple[list[dict[str, Any]], str | None]:\n    \"\"\"Search docs using Whoosh. Returns (results, fallback_message).\"\"\"\n    index, parser = await _build_search_index(endpoint, docs)\n\n    try:\n        query_obj = parser.parse(query)\n    except Exception:\n        return [], \"Query contained unsupported syntax; showing default ordering.\"\n\n    with index.searcher() as searcher:\n        results = searcher.search(query_obj, limit=limit)\n        matches = [\n            {\n                \"title\": hit[\"title\"],\n                \"url\": hit[\"url\"],\n                \"md_url\": hit.get(\"md_url\", \"\"),\n                \"section\": hit.get(\"section\", endpoint),\n                \"glimpse\": hit[\"glimpse\"],\n                \"score\": round(hit.score, 2),\n            }\n            for hit in results\n        ]\n\n    if not matches:\n        return [], \"No strong matches found; showing default ordering.\"\n    return matches, None\n\n\n# ---------------------------------------------------------------------------\n# HF Documentation - Formatting\n# ---------------------------------------------------------------------------\n\n\ndef _format_results(\n    endpoint: str,\n    items: list[dict[str, Any]],\n    total: int,\n    query: str | None = None,\n    note: str | None = None,\n) -> str:\n    \"\"\"Format search results as readable text.\"\"\"\n    base_url = f\"https://huggingface.co/docs/{endpoint}\"\n    out = f\"Documentation structure for: {base_url}\\n\\n\"\n\n    if query:\n        out += f\"Query: '{query}' → showing {len(items)} result(s) out of {total} pages\"\n        if note:\n            out += f\" ({note})\"\n        out += \"\\n\\n\"\n    else:\n        out += f\"Found {len(items)} page(s) (total available: {total}).\\n\"\n        if note:\n            out += f\"({note})\\n\"\n        out += \"\\n\"\n\n    for i, item in enumerate(items, 1):\n        out += f\"{i}. **{item['title']}**\\n\"\n        out += f\"   URL: {item['url']}\\n\"\n        out += f\"   Section: {item.get('section', endpoint)}\\n\"\n        if query and \"score\" in item:\n            out += f\"   Relevance score: {item['score']:.2f}\\n\"\n        out += f\"   Glimpse: {item['glimpse']}\\n\\n\"\n\n    return out\n\n\n# ---------------------------------------------------------------------------\n# Handlers\n# ---------------------------------------------------------------------------\n\n\nasync def explore_hf_docs_handler(\n    arguments: dict[str, Any], session=None\n) -> tuple[str, bool]:\n    \"\"\"Explore documentation structure with optional search query.\"\"\"\n    endpoint = arguments.get(\"endpoint\", \"\").lstrip(\"/\")\n    query = arguments.get(\"query\")\n    max_results = arguments.get(\"max_results\")\n\n    if not endpoint:\n        return \"Error: No endpoint provided\", False\n\n    # Gradio uses its own API\n    if endpoint.lower() == \"gradio\":\n        try:\n            clean_query = (\n                query.strip() if isinstance(query, str) and query.strip() else None\n            )\n            content = await _fetch_gradio_docs(clean_query)\n            header = \"# Gradio Documentation\\n\\n\"\n            if clean_query:\n                header += f\"Query: '{clean_query}'\\n\\n\"\n            header += \"Source: https://gradio.app/docs\\n\\n---\\n\\n\"\n            return header + content, True\n        except httpx.HTTPStatusError as e:\n            return f\"HTTP error fetching Gradio docs: {e.response.status_code}\", False\n        except httpx.RequestError as e:\n            return f\"Request error fetching Gradio docs: {str(e)}\", False\n        except Exception as e:\n            return f\"Error fetching Gradio docs: {str(e)}\", False\n\n    # HF docs\n    hf_token = session.hf_token if session else None\n    if not hf_token:\n        return \"Error: No HF token available (not logged in)\", False\n\n    try:\n        max_results_int = int(max_results) if max_results is not None else None\n    except (TypeError, ValueError):\n        return \"Error: max_results must be an integer\", False\n\n    if max_results_int is not None and max_results_int <= 0:\n        return \"Error: max_results must be greater than zero\", False\n\n    try:\n        docs = await _get_docs(hf_token, endpoint)\n        total = len(docs)\n\n        # Determine limit\n        if max_results_int is None:\n            limit = DEFAULT_MAX_RESULTS\n            limit_note = f\"Showing top {DEFAULT_MAX_RESULTS} results (set max_results to adjust).\"\n        elif max_results_int > MAX_RESULTS_CAP:\n            limit = MAX_RESULTS_CAP\n            limit_note = f\"Requested {max_results_int} but showing top {MAX_RESULTS_CAP} (maximum).\"\n        else:\n            limit = max_results_int\n            limit_note = None\n\n        # Search or paginate\n        clean_query = (\n            query.strip() if isinstance(query, str) and query.strip() else None\n        )\n        fallback_msg = None\n\n        if clean_query:\n            results, fallback_msg = await _search_docs(\n                endpoint, docs, clean_query, limit\n            )\n            if not results:\n                results = docs[:limit]\n        else:\n            results = docs[:limit]\n\n        # Combine notes\n        notes = []\n        if fallback_msg:\n            notes.append(fallback_msg)\n        if limit_note:\n            notes.append(limit_note)\n        note = \"; \".join(notes) if notes else None\n\n        return _format_results(endpoint, results, total, clean_query, note), True\n\n    except httpx.HTTPStatusError as e:\n        return f\"HTTP error: {e.response.status_code} - {e.response.text[:200]}\", False\n    except httpx.RequestError as e:\n        return f\"Request error: {str(e)}\", False\n    except ValueError as e:\n        return f\"Error: {str(e)}\", False\n    except Exception as e:\n        return f\"Unexpected error: {str(e)}\", False\n\n\nasync def hf_docs_fetch_handler(\n    arguments: dict[str, Any], session=None\n) -> tuple[str, bool]:\n    \"\"\"Fetch full markdown content of a documentation page.\"\"\"\n    url = arguments.get(\"url\", \"\")\n    if not url:\n        return \"Error: No URL provided\", False\n\n    hf_token = session.hf_token if session else None\n    if not hf_token:\n        return \"Error: No HF token available (not logged in)\", False\n\n    if not url.endswith(\".md\"):\n        url = f\"{url}.md\"\n\n    try:\n        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:\n            resp = await client.get(\n                url, headers={\"Authorization\": f\"Bearer {hf_token}\"}\n            )\n            resp.raise_for_status()\n        return f\"Documentation from: {url}\\n\\n{resp.text}\", True\n    except httpx.HTTPStatusError as e:\n        return (\n            f\"HTTP error fetching {url}: {e.response.status_code} - {e.response.text[:200]}\",\n            False,\n        )\n    except httpx.RequestError as e:\n        return f\"Request error fetching {url}: {str(e)}\", False\n    except Exception as e:\n        return f\"Error fetching documentation: {str(e)}\", False\n\n\n# ---------------------------------------------------------------------------\n# OpenAPI Search\n# ---------------------------------------------------------------------------\n\n\nasync def _fetch_openapi_spec() -> dict[str, Any]:\n    \"\"\"Fetch and cache HuggingFace OpenAPI specification.\"\"\"\n    global _openapi_cache\n    if _openapi_cache is not None:\n        return _openapi_cache\n\n    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:\n        resp = await client.get(\"https://huggingface.co/.well-known/openapi.json\")\n        resp.raise_for_status()\n\n    _openapi_cache = resp.json()\n    return _openapi_cache\n\n\ndef _extract_all_tags(spec: dict[str, Any]) -> list[str]:\n    \"\"\"Extract all unique tags from OpenAPI spec.\"\"\"\n    tags = set()\n    for tag_obj in spec.get(\"tags\", []):\n        if \"name\" in tag_obj:\n            tags.add(tag_obj[\"name\"])\n    for path_item in spec.get(\"paths\", {}).values():\n        for method, op in path_item.items():\n            if method in [\"get\", \"post\", \"put\", \"delete\", \"patch\", \"head\", \"options\"]:\n                for tag in op.get(\"tags\", []):\n                    tags.add(tag)\n    return sorted(tags)\n\n\ndef _extract_all_endpoints(spec: dict[str, Any]) -> list[dict[str, Any]]:\n    \"\"\"Extract all endpoints from OpenAPI spec.\"\"\"\n    servers = spec.get(\"servers\", [])\n    base_url = (\n        servers[0].get(\"url\", \"https://huggingface.co\")\n        if servers\n        else \"https://huggingface.co\"\n    )\n\n    endpoints = []\n    for path, path_item in spec.get(\"paths\", {}).items():\n        for method, op in path_item.items():\n            if method not in [\n                \"get\",\n                \"post\",\n                \"put\",\n                \"delete\",\n                \"patch\",\n                \"head\",\n                \"options\",\n            ]:\n                continue\n            endpoints.append(\n                {\n                    \"path\": path,\n                    \"method\": method.upper(),\n                    \"operationId\": op.get(\"operationId\", \"\"),\n                    \"summary\": op.get(\"summary\", \"\"),\n                    \"description\": op.get(\"description\", \"\"),\n                    \"tags\": \" \".join(op.get(\"tags\", [])),\n                    \"parameters\": op.get(\"parameters\", []),\n                    \"request_body\": op.get(\"requestBody\", {}),\n                    \"responses\": op.get(\"responses\", {}),\n                    \"base_url\": base_url,\n                }\n            )\n    return endpoints\n\n\nasync def _build_openapi_index() -> tuple[Any, MultifieldParser, list[dict[str, Any]]]:\n    \"\"\"Build or retrieve cached Whoosh index for OpenAPI endpoints.\"\"\"\n    global _openapi_index_cache\n    async with _cache_lock:\n        if _openapi_index_cache is not None:\n            return _openapi_index_cache\n\n    spec = await _fetch_openapi_spec()\n    endpoints = _extract_all_endpoints(spec)\n\n    analyzer = StemmingAnalyzer()\n    schema = Schema(\n        path=ID(stored=True, unique=True),\n        method=ID(stored=True),\n        operationId=TEXT(stored=True, analyzer=analyzer),\n        summary=TEXT(stored=True, analyzer=analyzer),\n        description=TEXT(stored=True, analyzer=analyzer),\n        tags=TEXT(stored=True, analyzer=analyzer),\n        param_names=TEXT(stored=False, analyzer=analyzer),\n    )\n    storage = RamStorage()\n    index = storage.create_index(schema)\n    writer = index.writer()\n\n    for ep in endpoints:\n        param_names = \" \".join(p.get(\"name\", \"\") for p in ep.get(\"parameters\", []))\n        writer.add_document(\n            path=ep[\"path\"],\n            method=ep[\"method\"],\n            operationId=ep.get(\"operationId\", \"\"),\n            summary=ep.get(\"summary\", \"\"),\n            description=ep.get(\"description\", \"\"),\n            tags=ep.get(\"tags\", \"\"),\n            param_names=param_names,\n        )\n    writer.commit()\n\n    parser = MultifieldParser(\n        [\"summary\", \"description\", \"operationId\", \"tags\", \"param_names\"],\n        schema=schema,\n        fieldboosts={\n            \"summary\": 3.0,\n            \"operationId\": 2.0,\n            \"description\": 1.0,\n            \"tags\": 1.5,\n        },\n        group=OrGroup,\n    )\n\n    async with _cache_lock:\n        _openapi_index_cache = (index, parser, endpoints)\n    return index, parser, endpoints\n\n\nasync def _search_openapi(\n    query: str, tag: str | None, limit: int = 20\n) -> tuple[list[dict[str, Any]], str | None]:\n    \"\"\"Search OpenAPI endpoints using Whoosh. Returns (results, fallback_message).\"\"\"\n    index, parser, endpoints = await _build_openapi_index()\n\n    try:\n        query_obj = parser.parse(query)\n    except Exception:\n        return [], \"Query contained unsupported syntax.\"\n\n    with index.searcher() as searcher:\n        results = searcher.search(\n            query_obj, limit=limit * 2\n        )  # Get extra for tag filtering\n        matches = []\n        for hit in results:\n            # Find full endpoint data\n            ep = next(\n                (\n                    e\n                    for e in endpoints\n                    if e[\"path\"] == hit[\"path\"] and e[\"method\"] == hit[\"method\"]\n                ),\n                None,\n            )\n            if ep is None:\n                continue\n            # Filter by tag if provided\n            if tag and tag not in ep.get(\"tags\", \"\"):\n                continue\n            matches.append({**ep, \"score\": round(hit.score, 2)})\n            if len(matches) >= limit:\n                break\n\n    return matches, None if matches else \"No matches found for query.\"\n\n\ndef _generate_curl_example(endpoint: dict[str, Any]) -> str:\n    \"\"\"Generate curl command example for an endpoint.\"\"\"\n    method = endpoint[\"method\"]\n    path = endpoint[\"path\"]\n    base_url = endpoint[\"base_url\"]\n\n    # Build URL with path parameters\n    full_path = path\n    for param in endpoint.get(\"parameters\", []):\n        if param.get(\"in\") == \"path\" and param.get(\"required\"):\n            name = param[\"name\"]\n            example = param.get(\n                \"example\", param.get(\"schema\", {}).get(\"example\", f\"<{name}>\")\n            )\n            full_path = full_path.replace(f\"{{{name}}}\", str(example))\n\n    curl = f\"curl -X {method} \\\\\\n  '{base_url}{full_path}'\"\n\n    # Add query parameters\n    query_params = [p for p in endpoint.get(\"parameters\", []) if p.get(\"in\") == \"query\"]\n    if query_params and query_params[0].get(\"required\"):\n        param = query_params[0]\n        example = param.get(\"example\", param.get(\"schema\", {}).get(\"example\", \"value\"))\n        curl += f\"?{param['name']}={example}\"\n\n    curl += \" \\\\\\n  -H 'Authorization: Bearer $HF_TOKEN'\"\n\n    # Add request body\n    if method in [\"POST\", \"PUT\", \"PATCH\"] and endpoint.get(\"request_body\"):\n        content = endpoint[\"request_body\"].get(\"content\", {})\n        if \"application/json\" in content:\n            curl += \" \\\\\\n  -H 'Content-Type: application/json'\"\n            schema = content[\"application/json\"].get(\"schema\", {})\n            example = schema.get(\"example\", \"{}\")\n            if isinstance(example, dict):\n                example = json.dumps(example, indent=2)\n            curl += f\" \\\\\\n  -d '{example}'\"\n\n    return curl\n\n\ndef _format_parameters(parameters: list[dict[str, Any]]) -> str:\n    \"\"\"Format parameter information from OpenAPI spec.\"\"\"\n    if not parameters:\n        return \"\"\n\n    path_params = [p for p in parameters if p.get(\"in\") == \"path\"]\n    query_params = [p for p in parameters if p.get(\"in\") == \"query\"]\n    header_params = [p for p in parameters if p.get(\"in\") == \"header\"]\n\n    output = []\n\n    for label, params in [\n        (\"Path Parameters\", path_params),\n        (\"Query Parameters\", query_params),\n        (\"Header Parameters\", header_params),\n    ]:\n        if not params:\n            continue\n        if output:\n            output.append(\"\")\n        output.append(f\"**{label}:**\")\n        for p in params:\n            name = p.get(\"name\", \"\")\n            required = \" (required)\" if p.get(\"required\") else \" (optional)\"\n            desc = p.get(\"description\", \"\")\n            ptype = p.get(\"schema\", {}).get(\"type\", \"string\")\n            example = p.get(\"example\") or p.get(\"schema\", {}).get(\"example\", \"\")\n\n            output.append(f\"- `{name}` ({ptype}){required}: {desc}\")\n            if example:\n                output.append(f\"  Example: `{example}`\")\n\n    return \"\\n\".join(output)\n\n\ndef _format_response_info(responses: dict[str, Any]) -> str:\n    \"\"\"Format response information from OpenAPI spec.\"\"\"\n    if not responses:\n        return \"No response information available\"\n\n    output = []\n    for status, resp_obj in list(responses.items())[:3]:\n        desc = resp_obj.get(\"description\", \"\")\n        output.append(f\"- **{status}**: {desc}\")\n        content = resp_obj.get(\"content\", {})\n        if \"application/json\" in content:\n            schema = content[\"application/json\"].get(\"schema\", {})\n            if \"type\" in schema:\n                output.append(f\"  Returns: {schema.get('type', 'object')}\")\n\n    return \"\\n\".join(output)\n\n\ndef _format_openapi_results(\n    results: list[dict[str, Any]],\n    tag: str | None = None,\n    query: str | None = None,\n    note: str | None = None,\n) -> str:\n    \"\"\"Format OpenAPI search results with curl examples.\"\"\"\n    if not results:\n        if query and tag:\n            return f\"No API endpoints found matching '{query}' in tag '{tag}'\"\n        elif query:\n            return f\"No API endpoints found matching '{query}'\"\n        elif tag:\n            return f\"No API endpoints found with tag '{tag}'\"\n        return \"No API endpoints found\"\n\n    # Build header\n    if query and tag:\n        out = f\"# API Endpoints matching '{query}' (tag: `{tag}`)\\n\\n\"\n    elif query:\n        out = f\"# API Endpoints matching '{query}'\\n\\n\"\n    elif tag:\n        out = f\"# API Endpoints for tag: `{tag}`\\n\\n\"\n    else:\n        out = \"# API Endpoints\\n\\n\"\n\n    out += f\"Found {len(results)} endpoint(s)\"\n    if note:\n        out += f\" ({note})\"\n    out += \"\\n\\n---\\n\\n\"\n\n    for i, ep in enumerate(results, 1):\n        out += f\"## {i}. {ep['method']} {ep['path']}\\n\\n\"\n\n        if query and \"score\" in ep:\n            out += f\"**Relevance:** {ep['score']:.2f}\\n\\n\"\n\n        if ep.get(\"summary\"):\n            out += f\"**Summary:** {ep['summary']}\\n\\n\"\n\n        if ep.get(\"description\"):\n            desc = ep[\"description\"][:300]\n            if len(ep[\"description\"]) > 300:\n                desc += \"...\"\n            out += f\"**Description:** {desc}\\n\\n\"\n\n        if ep.get(\"tags\"):\n            out += f\"**Tags:** {ep['tags']}\\n\\n\"\n\n        params_info = _format_parameters(ep.get(\"parameters\", []))\n        if params_info:\n            out += params_info + \"\\n\\n\"\n\n        out += \"**Usage:**\\n```bash\\n\"\n        out += _generate_curl_example(ep)\n        out += \"\\n```\\n\\n\"\n\n        out += \"**Returns:**\\n\"\n        out += _format_response_info(ep[\"responses\"])\n        out += \"\\n\\n---\\n\\n\"\n\n    return out\n\n\nasync def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Search HuggingFace OpenAPI specification by query and/or tag.\"\"\"\n    tag = arguments.get(\"tag\", \"\").strip() or None\n    query = arguments.get(\"query\", \"\").strip() or None\n\n    if not tag and not query:\n        return (\n            \"Error: Provide either 'query' (keyword search) or 'tag' (category filter), or both.\",\n            False,\n        )\n\n    try:\n        note = None\n\n        # If query provided, try Whoosh search first\n        if query:\n            results, search_note = await _search_openapi(query, tag, limit=20)\n\n            # If Whoosh found results, return them\n            if results:\n                return _format_openapi_results(\n                    results, tag=tag, query=query, note=search_note\n                ), True\n\n            # Whoosh found nothing - fall back to tag-based if tag provided\n            if tag:\n                note = f\"No matches for '{query}'; showing all endpoints in tag '{tag}'\"\n            else:\n                # No tag to fall back to\n                return _format_openapi_results([], query=query), True\n\n        # Tag-based search (either as fallback or primary)\n        if tag:\n            _, _, endpoints = await _build_openapi_index()\n            results = [ep for ep in endpoints if tag in ep.get(\"tags\", \"\")]\n            return _format_openapi_results(\n                results, tag=tag, query=None, note=note\n            ), True\n\n        return \"Error: No results found\", False\n\n    except httpx.HTTPStatusError as e:\n        return f\"HTTP error fetching OpenAPI spec: {e.response.status_code}\", False\n    except httpx.RequestError as e:\n        return f\"Request error: {str(e)}\", False\n    except Exception as e:\n        return f\"Error searching OpenAPI spec: {str(e)}\", False\n\n\nasync def _get_api_search_tool_spec() -> dict[str, Any]:\n    \"\"\"Generate OpenAPI tool spec with tags populated at runtime.\"\"\"\n    spec = await _fetch_openapi_spec()\n    tags = _extract_all_tags(spec)\n\n    return {\n        \"name\": \"find_hf_api\",\n        \"description\": (\n            \"Find HuggingFace Hub REST API endpoints to make HTTP requests. Returns curl examples with authentication. \"\n            \"⚠️ USE THIS TOOL when you need to call the HF Hub API directly - for operations like: \"\n            \"uploading/downloading files, managing repos, listing models/datasets, getting user info, \"\n            \"managing webhooks, collections, discussions, or any Hub interaction not covered by other tools. \"\n            \"**Use cases:** (1) 'Stream Space logs' → query='space logs', \"\n            \"(2) 'Get Space metrics/Zero-GPU usage' → query='space metrics', \"\n            \"(3) 'List organization members' → query='organization members', \"\n            \"(4) 'Generate repo access token' → query='jwt token', \"\n            \"(5) 'Check repo security scan' → query='security scan'. \"\n            \"**Search modes:** Use 'query' for keyword search, 'tag' to browse a category, or both. \"\n            \"If query finds no results, falls back to showing all endpoints in the tag. \"\n            \"**Output:** Full endpoint details with method, path, parameters, curl command, and response schema.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"query\": {\n                    \"type\": \"string\",\n                    \"description\": (\n                        \"Keyword search across endpoint summaries, descriptions, and operation IDs. \"\n                        \"Examples: 'upload file', 'create repository', 'list user models', 'delete branch', \"\n                        \"'webhook', 'collection', 'discussion comments'. Supports stemming (upload/uploading both work).\"\n                    ),\n                },\n                \"tag\": {\n                    \"type\": \"string\",\n                    \"enum\": tags,\n                    \"description\": (\n                        \"Filter by API category. Use alone to browse all endpoints in a category, \"\n                        \"or combine with 'query' to search within a category.\"\n                    ),\n                },\n            },\n            \"required\": [],\n        },\n    }\n\n\n# ---------------------------------------------------------------------------\n# Tool Specifications\n# ---------------------------------------------------------------------------\n\nDOC_ENDPOINTS = [\n    \"hub\",\n    \"transformers\",\n    \"diffusers\",\n    \"datasets\",\n    \"gradio\",\n    \"trackio\",\n    \"smolagents\",\n    \"huggingface_hub\",\n    \"huggingface.js\",\n    \"transformers.js\",\n    \"inference-providers\",\n    \"inference-endpoints\",\n    \"peft\",\n    \"accelerate\",\n    \"optimum\",\n    \"tokenizers\",\n    \"courses\",\n    \"evaluate\",\n    \"tasks\",\n    \"dataset-viewer\",\n    \"trl\",\n    \"simulate\",\n    \"sagemaker\",\n    \"timm\",\n    \"safetensors\",\n    \"tgi\",\n    \"setfit\",\n    \"lerobot\",\n    \"autotrain\",\n    \"tei\",\n    \"bitsandbytes\",\n    \"sentence_transformers\",\n    \"chat-ui\",\n    \"leaderboards\",\n    \"lighteval\",\n    \"argilla\",\n    \"distilabel\",\n    \"microsoft-azure\",\n    \"kernels\",\n    \"google-cloud\",\n]\n\nEXPLORE_HF_DOCS_TOOL_SPEC = {\n    \"name\": \"explore_hf_docs\",\n    \"description\": (\n        \"Browse HF documentation structure — discover all available documentation with 200-char previews.\\n\\n\"\n        \"Use this to find relevant documentation and/or examples with detailed parameter docs and API reference. \"\n        \"To be used together with github_find_examples and github_read_file to find working examples and documentation.\\n\\n\"\n        \"Pattern: explore_hf_docs (find relevant pages) → fetch_hf_docs (get full content).\\n\\n\"\n        \"For training tasks: fetch the trainer config docs (SFTConfig, DPOConfig, GRPOConfig) to verify parameter names. \"\n        \"Returns top 20 results by default; set max_results (max 50) to adjust.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"endpoint\": {\n                \"type\": \"string\",\n                \"enum\": DOC_ENDPOINTS,\n                \"description\": (\n                    \"The documentation endpoint to explore. Each endpoint corresponds to a major section of the Hugging Face documentation:\\n\\n\"\n                    \"• courses — All Hugging Face courses (LLM, robotics, MCP, smol (llm training), agents, deep RL, computer vision, games, diffusion, 3D, audio) and the cookbook recipes. Probably the best place for examples.\\n\"\n                    \"• hub — Find answers to questions about models/datasets/spaces, auth, versioning, metadata.\\n\"\n                    \"• transformers — Core model library: architectures, configs, tokenizers, training & inference APIs.\\n\"\n                    \"• diffusers — Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\\n\"\n                    \"• datasets — Dataset loading, streaming, processing, Arrow format, Hub integration.\\n\"\n                    \"• gradio — UI components and demos for ML models. Uses Gradio's native API: without query returns full docs (llms.txt), with query uses embedding search for precise results.\\n\"\n                    \"• trackio — Experiment tracking, metrics logging, and run comparison.\\n\"\n                    \"• smolagents — Lightweight agent abstractions and tool-using patterns.\\n\"\n                    \"• huggingface_hub — Python client for Hub operations (auth, upload/download, repo management).\\n\"\n                    \"• huggingface.js — JS/TS client for Hub APIs in browser and Node.\\n\"\n                    \"• transformers.js — Run Transformer models in browser/Node via WebGPU/WASM.\\n\"\n                    \"• inference-providers — Unified interface for third-party inference backends.\\n\"\n                    \"• inference-endpoints — Managed, scalable model deployments on HF infrastructure.\\n\"\n                    \"• peft — Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).\\n\"\n                    \"• accelerate — Hardware-agnostic, distributed and mixed-precision training orchestration.\\n\"\n                    \"• optimum — Hardware-aware optimization and model export tooling, including Habana, Neuron, Intel, ExecuTorch, and TPU variants.\\n\"\n                    \"• tokenizers — Fast tokenizer internals, training, and low-level APIs.\\n\"\n                    \"• evaluate — Metrics, evaluation workflows, and training-loop integration.\\n\"\n                    \"• tasks — Canonical task definitions and model categorization.\\n\"\n                    \"• dataset-viewer — Dataset preview, streaming views, and viewer internals.\\n\"\n                    \"• trl — RLHF, DPO, PPO, and SFT utilities for LLMs.\\n\"\n                    \"• simulate — Experimental simulation tools and workflows.\\n\"\n                    \"• sagemaker — Deploying Hugging Face models on AWS SageMaker.\\n\"\n                    \"• timm — Image model zoo and utilities via HF integrations.\\n\"\n                    \"• safetensors — Safe, fast tensor serialization format.\\n\"\n                    \"• tgi — High-throughput text generation server for LLMs.\\n\"\n                    \"• setfit — Few-shot text classification via sentence embeddings.\\n\"\n                    \"• lerobot — Robotics datasets, policies, and learning workflows.\\n\"\n                    \"• autotrain — No/low-code model training on Hugging Face.\\n\"\n                    \"• tei — Optimized inference server for embedding workloads.\\n\"\n                    \"• bitsandbytes — Quantization and memory-efficient optimizers.\\n\"\n                    \"• sentence_transformers — Embedding models, training recipes, similarity/search workflows.\\n\"\n                    \"• chat-ui — Reference chat interfaces for LLM deployment.\\n\"\n                    \"• leaderboards — Evaluation leaderboards and submission mechanics.\\n\"\n                    \"• lighteval — Lightweight, reproducible LLM evaluation framework.\\n\"\n                    \"• argilla — Data annotation, feedback, and human-in-the-loop workflows.\\n\"\n                    \"• distilabel — Synthetic data generation and distillation pipelines.\\n\"\n                    \"• microsoft-azure — Azure deployment and integration guides.\\n\"\n                    \"• kernels — Lightweight execution environments and notebook-style workflows.\\n\"\n                    \"• google-cloud — GCP deployment and serving workflows.\\n\"\n                ),\n            },\n            \"query\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Optional keyword query to rank and filter documentation pages. \"\n                    \"For Gradio, use concise queries like 'how to use the image component' or 'audio component demo'.\"\n                ),\n            },\n            \"max_results\": {\n                \"type\": \"integer\",\n                \"description\": \"Max results (default 20, max 50). Ignored for Gradio.\",\n                \"minimum\": 1,\n                \"maximum\": 50,\n            },\n        },\n        \"required\": [\"endpoint\"],\n    },\n}\n\nHF_DOCS_FETCH_TOOL_SPEC = {\n    \"name\": \"fetch_hf_docs\",\n    \"description\": (\n        \"Fetch full markdown content of an HF documentation page. Use after explore_hf_docs.\\n\\n\"\n        \"Critical for finding documentation e.g. current trainer configuration parameters (SFTConfig, DPOConfig, etc.) \"\n        \"Use for researching solutions and before writing training scripts. Your internal knowledge is outdated.\\n\\n\"\n        \"Provide the full URL from explore_hf_docs results. The .md extension is added automatically.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"url\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"The full URL to the documentation page. \"\n                    \"Example: 'https://huggingface.co/docs/trl/dpo_trainer' \"\n                    \"The .md extension will be added automatically if not present.\"\n                ),\n            },\n        },\n        \"required\": [\"url\"],\n    },\n}\n"
  },
  {
    "path": "agent/tools/edit_utils.py",
    "content": "\"\"\"\nShared utilities for file editing tools — fuzzy matching, syntax validation,\nand richer edit operations.\n\nUsed by both local_tools.py and the embedded sandbox server.\n\"\"\"\n\nfrom __future__ import annotations\n\n# ── Unicode normalization map ────────────────────────────────────────────\n\nUNICODE_MAP = {\n    \"\\u2013\": \"-\",   # en-dash\n    \"\\u2014\": \"-\",   # em-dash\n    \"\\u2212\": \"-\",   # minus sign\n    \"\\u2018\": \"'\",   # left single quote\n    \"\\u2019\": \"'\",   # right single quote\n    \"\\u201c\": '\"',   # left double quote\n    \"\\u201d\": '\"',   # right double quote\n    \"\\u00a0\": \" \",   # non-breaking space\n    \"\\u2003\": \" \",   # em space\n    \"\\u2002\": \" \",   # en space\n    \"\\u200b\": \"\",    # zero-width space\n    \"\\ufeff\": \"\",    # BOM\n}\n\n\ndef _normalize_unicode(s: str) -> str:\n    return \"\".join(UNICODE_MAP.get(c, c) for c in s)\n\n\n# ── 4-pass fuzzy matching ────────────────────────────────────────────────\n\n\ndef fuzzy_find(content: str, pattern: str) -> tuple[int | None, str | None]:\n    \"\"\"Find *pattern* in *content* with increasingly relaxed matching.\n\n    Returns (start_index_in_original_content, match_note) or (None, None).\n    The index always refers to the *original* content string so callers can\n    use ``content[idx : idx + len(matched_text)]`` for replacement.\n\n    Strategy (mirrors Codex):\n      1. Exact match\n      2. Right-trim each line (trailing whitespace)\n      3. Both-sides trim (all surrounding whitespace per line)\n      4. Unicode normalization on top of both-sides trim\n    \"\"\"\n    # Pass 1 — exact\n    if pattern in content:\n        return content.index(pattern), None\n\n    # Helper: build a line-stripped version *and* a mapping from stripped\n    # positions back to original positions.  We need this so callers can\n    # apply the replacement on the original content, not the stripped copy.\n\n    def _build_stripped(text: str, strip_fn):\n        \"\"\"Return (stripped_text, line_start_map).\n\n        line_start_map[i] = original byte offset of the start of line i.\n        \"\"\"\n        orig_lines = text.split(\"\\n\")\n        stripped_lines = [strip_fn(l) for l in orig_lines]\n        return \"\\n\".join(stripped_lines), orig_lines, stripped_lines\n\n    # Pass 2 — right-trim\n    c_rt, c_orig_lines, c_rt_lines = _build_stripped(content, str.rstrip)\n    p_rt = \"\\n\".join(l.rstrip() for l in pattern.split(\"\\n\"))\n    idx = c_rt.find(p_rt)\n    if idx != -1:\n        orig_idx = _map_back(idx, c_orig_lines, c_rt_lines)\n        return orig_idx, \"(matched after trimming trailing whitespace)\"\n\n    # Pass 3 — both-sides trim\n    c_st, _, c_st_lines = _build_stripped(content, str.strip)\n    p_st = \"\\n\".join(l.strip() for l in pattern.split(\"\\n\"))\n    idx = c_st.find(p_st)\n    if idx != -1:\n        orig_idx = _map_back(idx, c_orig_lines, c_st_lines)\n        return orig_idx, \"(matched after trimming whitespace)\"\n\n    # Pass 4 — unicode normalization + both-sides trim\n    c_norm = _normalize_unicode(c_st)\n    p_norm = _normalize_unicode(p_st)\n    idx = c_norm.find(p_norm)\n    if idx != -1:\n        orig_idx = _map_back(idx, c_orig_lines, c_st_lines)\n        return orig_idx, \"(matched after unicode normalization)\"\n\n    return None, None\n\n\ndef _map_back(\n    stripped_idx: int,\n    orig_lines: list[str],\n    stripped_lines: list[str],\n) -> int:\n    \"\"\"Map a character index in the stripped/joined text back to the original text.\"\"\"\n    # Walk through stripped lines to find which line the index falls on\n    pos = 0\n    for i, sl in enumerate(stripped_lines):\n        line_end = pos + len(sl)\n        if stripped_idx <= line_end:\n            col_in_stripped = stripped_idx - pos\n            # Find where this stripped line's content starts in the original line\n            ol = orig_lines[i]\n            # The stripped line is a subset of the original line; find its offset\n            lstripped = len(ol) - len(ol.lstrip())\n            orig_col = lstripped + col_in_stripped\n            # Compute absolute position in original text\n            orig_pos = sum(len(orig_lines[j]) + 1 for j in range(i)) + orig_col\n            return orig_pos\n        pos = line_end + 1  # +1 for the \\n\n    # Fallback: return 0 (shouldn't happen if idx is valid)\n    return 0\n\n\ndef fuzzy_find_original_match(content: str, pattern: str) -> tuple[str | None, str | None]:\n    \"\"\"Find the *original* text in content that matches pattern fuzzily.\n\n    Returns (original_matched_text, match_note) or (None, None).\n    This extracts the exact substring from the original content that\n    corresponds to the fuzzy match, preserving its original whitespace/unicode.\n    \"\"\"\n    if pattern in content:\n        return pattern, None\n\n    idx, note = fuzzy_find(content, pattern)\n    if idx is None:\n        return None, None\n\n    # We need to find the original text span that corresponds to the match.\n    # The match covers len(pattern) worth of *logical* content.\n    # Count how many original lines the pattern spans.\n    pattern_lines = pattern.split(\"\\n\")\n    n_lines = len(pattern_lines)\n\n    # Find which original line the match starts on\n    orig_lines = content.split(\"\\n\")\n    char_pos = 0\n    start_line = 0\n    for i, ol in enumerate(orig_lines):\n        if char_pos + len(ol) >= idx:\n            start_line = i\n            break\n        char_pos += len(ol) + 1\n\n    end_line = min(start_line + n_lines, len(orig_lines))\n    # Extract the original lines that were matched\n    matched_lines = orig_lines[start_line:end_line]\n    original_text = \"\\n\".join(matched_lines)\n    return original_text, note\n\n\n# ── Richer edit operations ───────────────────────────────────────────────\n\n\ndef apply_edit(\n    content: str,\n    old_str: str,\n    new_str: str,\n    mode: str = \"replace\",\n    replace_all: bool = False,\n) -> tuple[str, int, str | None]:\n    \"\"\"Apply an edit operation to content.\n\n    Modes:\n      - replace: replace first occurrence (or all if replace_all=True)\n      - replace_all: replace all occurrences (alias)\n      - append_after: insert new_str after old_str\n      - prepend_before: insert new_str before old_str\n\n    Returns (new_content, num_replacements, fuzzy_note).\n    Raises ValueError if old_str not found.\n    \"\"\"\n    if mode == \"replace_all\":\n        replace_all = True\n        mode = \"replace\"\n\n    # Try exact match first, then fuzzy\n    fuzzy_note = None\n    if old_str not in content:\n        original_match, fuzzy_note = fuzzy_find_original_match(content, old_str)\n        if original_match is None:\n            raise ValueError(\n                \"old_str was not found in the file. Make sure old_str matches \"\n                \"the file contents exactly, including whitespace and indentation. \"\n                \"Use the read tool to verify the current file contents before retrying.\"\n            )\n        old_str = original_match\n\n    count = content.count(old_str)\n\n    if mode == \"replace\":\n        if count > 1 and not replace_all:\n            raise ValueError(\n                f\"Found {count} matches of old_str in the file, but replace_all is \"\n                f\"false. To replace all occurrences, set replace_all to true. To \"\n                f\"replace only one, provide a larger old_str with more surrounding \"\n                f\"context to uniquely identify the instance.\"\n            )\n        if replace_all:\n            new_content = content.replace(old_str, new_str)\n            return new_content, count, fuzzy_note\n        else:\n            new_content = content.replace(old_str, new_str, 1)\n            return new_content, 1, fuzzy_note\n\n    elif mode == \"append_after\":\n        if replace_all:\n            new_content = content.replace(old_str, old_str + new_str)\n            return new_content, count, fuzzy_note\n        else:\n            idx = content.index(old_str) + len(old_str)\n            new_content = content[:idx] + new_str + content[idx:]\n            return new_content, 1, fuzzy_note\n\n    elif mode == \"prepend_before\":\n        if replace_all:\n            new_content = content.replace(old_str, new_str + old_str)\n            return new_content, count, fuzzy_note\n        else:\n            idx = content.index(old_str)\n            new_content = content[:idx] + new_str + content[idx:]\n            return new_content, 1, fuzzy_note\n\n    else:\n        raise ValueError(f\"Unknown edit mode: {mode}. Use replace, append_after, or prepend_before.\")\n\n\n# ── Syntax validation (Python) ───────────────────────────────────────────\n\n\ndef validate_python(content: str, path: str = \"\") -> list[str]:\n    \"\"\"Lightweight post-write validation for Python files.\n\n    Checks syntax and training script conventions. This runs on the host\n    (not in the sandbox), so it only does static checks — no import resolution\n    or signature inspection since packages are installed in the sandbox, not here.\n\n    The sandbox server has its own richer version that does real signature\n    inspection against installed packages.\n\n    Returns a list of warning strings (empty = all good).\n    Never raises — validation failures are advisory only.\n    \"\"\"\n    import ast\n\n    warnings = []\n\n    # 1. Syntax check via ast.parse\n    try:\n        ast.parse(content)\n    except SyntaxError as e:\n        warnings.append(f\"Python syntax error at line {e.lineno}: {e.msg}\")\n        return warnings\n\n    # 2. Training script heuristics\n    if any(kw in content for kw in (\"TrainingArguments\", \"SFTConfig\", \"DPOConfig\", \"GRPOConfig\")):\n        if \"push_to_hub\" not in content:\n            warnings.append(\n                \"Training script warning: no 'push_to_hub' found — model may be lost when job ends\"\n            )\n        if \"hub_model_id\" not in content:\n            warnings.append(\n                \"Training script warning: no 'hub_model_id' found\"\n            )\n\n    return warnings\n"
  },
  {
    "path": "agent/tools/github_find_examples.py",
    "content": "\"\"\"\nGitHub Find Examples Tool - Discover examples, tutorials, and guides for any library\n\nLists all files in a repository and performs deterministic keyword search.\n\"\"\"\n\nimport os\nfrom typing import Any, Dict, List\n\nimport requests\nfrom thefuzz import fuzz\n\nfrom agent.tools.types import ToolResult\n\n# In order of priority (lower index = higher priority for sorting)\nEXAMPLE_PATTERNS = [\n    \"scripts\",\n    # General example patterns (catch-all, lower priority)\n    \"examples\",\n    \"example\",\n    # Notebook patterns\n    \"notebooks\",\n    \"notebook\",\n    # Tutorial/learning patterns\n    \"tutorials\",\n    \"tutorial\",\n    \"quickstart\",\n    \"walkthroughs\",\n    \"walkthrough\",\n    # Cookbook/recipe patterns\n    \"cookbook\",\n    \"cookbooks\",\n    \"recipes\",\n    \"recipe\",\n    # Demo/sample patterns\n    \"demos\",\n    \"demo\",\n    \"samples\",\n    \"sample\",\n    # Other patterns\n    \"guides\",\n    \"guide\",\n    \"getting-started\",\n    \"getting_started\",\n    \"playground\",\n    \"howto\",\n    \"how-to\",\n    \"use-cases\",\n    \"usecases\",\n    \"use_cases\",\n    \"sandbox\",\n    \"showcase\",\n]\n\n\ndef _get_repo_tree(org: str, repo: str, token: str) -> tuple[List[Dict[str, Any]], str]:\n    \"\"\"Get all files in a repository recursively. Returns (files, error_message)\"\"\"\n    headers = {\n        \"Accept\": \"application/vnd.github+json\",\n        \"X-GitHub-Api-Version\": \"2022-11-28\",\n        \"Authorization\": f\"Bearer {token}\",\n    }\n\n    full_repo = f\"{org}/{repo}\"\n\n    # Get default branch\n    try:\n        response = requests.get(\n            f\"https://api.github.com/repos/{full_repo}\", headers=headers, timeout=10\n        )\n        if response.status_code == 404:\n            return [], \"not_found\"\n        if response.status_code != 200:\n            return [], f\"API error: {response.status_code}\"\n\n        repo_data = response.json()\n        default_branch = repo_data.get(\"default_branch\", \"main\")\n    except Exception as e:\n        return [], f\"Error fetching repo: {str(e)}\"\n\n    # Get repository tree recursively\n    try:\n        response = requests.get(\n            f\"https://api.github.com/repos/{full_repo}/git/trees/{default_branch}\",\n            headers=headers,\n            params={\"recursive\": \"1\"},\n            timeout=30,\n        )\n        if response.status_code != 200:\n            return [], f\"Error fetching tree: {response.status_code}\"\n\n        data = response.json()\n        tree = data.get(\"tree\", [])\n\n        # Filter to only include files (not directories)\n        files = [\n            {\n                \"path\": item[\"path\"],\n                \"ref\": item[\"sha\"],\n                \"size\": item.get(\"size\", 0),\n                \"url\": f\"https://github.com/{full_repo}/blob/{default_branch}/{item['path']}\",\n            }\n            for item in tree\n            if item[\"type\"] == \"blob\"\n        ]\n\n        return files, \"\"\n    except Exception as e:\n        return [], f\"Error processing tree: {str(e)}\"\n\n\ndef _search_similar_repos(org: str, repo: str, token: str) -> List[Dict[str, Any]]:\n    \"\"\"Search for similar repository names in the organization\"\"\"\n    headers = {\n        \"Accept\": \"application/vnd.github+json\",\n        \"X-GitHub-Api-Version\": \"2022-11-28\",\n        \"Authorization\": f\"Bearer {token}\",\n    }\n\n    # Search for repos in the org with similar name\n    query = f\"org:{org} {repo}\"\n\n    try:\n        response = requests.get(\n            \"https://api.github.com/search/repositories\",\n            headers=headers,\n            params={\"q\": query, \"sort\": \"stars\", \"order\": \"desc\", \"per_page\": 10},\n            timeout=30,\n        )\n\n        if response.status_code != 200:\n            return []\n\n        data = response.json()\n        items = data.get(\"items\", [])\n\n        return [\n            {\n                \"name\": item.get(\"name\"),\n                \"full_name\": item.get(\"full_name\"),\n                \"description\": item.get(\"description\"),\n                \"stars\": item.get(\"stargazers_count\", 0),\n                \"url\": item.get(\"html_url\"),\n            }\n            for item in items\n        ]\n    except Exception:\n        return []\n\n\ndef _score_against_example_patterns(file_path: str) -> int:\n    \"\"\"Score file against example patterns using token_set_ratio\"\"\"\n    scores = []\n    for pattern in EXAMPLE_PATTERNS:\n        score = fuzz.token_set_ratio(pattern.lower(), file_path.lower())\n        scores.append(score)\n    return max(scores) if scores else 0\n\n\ndef _score_against_keyword(file_path: str, keyword: str) -> int:\n    \"\"\"Calculate fuzzy match score for a file path against a keyword\"\"\"\n    # Use partial_ratio for substring matching (good for paths)\n    # Also check token_set_ratio for word-level matching\n    partial_score = fuzz.partial_ratio(keyword.lower(), file_path.lower())\n    token_score = fuzz.token_set_ratio(keyword.lower(), file_path.lower())\n\n    # Return the higher of the two\n    return max(partial_score, token_score)\n\n\ndef _get_pattern_priority(file_path: str) -> tuple[int, int, int]:\n    \"\"\"\n    Get priority of a file path based on which example pattern directory it's in.\n\n    Returns: (in_examples_dir, pattern_priority, path_depth)\n    - in_examples_dir: 0 if in examples/ directory, 1 otherwise (lower is better)\n    - pattern_priority: Index in EXAMPLE_PATTERNS (lower is better), or 999 if no match\n    - path_depth: Number of path segments (lower is better)\n\n    Note: Prioritizes files in \"examples/\" directory first, then by most specific pattern match.\n    E.g., \"examples/scripts/train.py\" is better than \"scripts/util.py\"\n    \"\"\"\n    path_lower = file_path.lower()\n    path_parts = path_lower.split(\"/\")\n\n    # Check if file is in examples/ directory (highest priority)\n    in_examples_dir = 0 if (path_parts[0] in [\"examples\", \"example\"]) else 1\n\n    # Find ALL matching patterns and use the best (lowest index) one\n    # But prefer deeper matches (more specific) over shallow ones\n    best_priority = 999\n    best_depth_at_match = -1\n\n    for i, pattern in enumerate(EXAMPLE_PATTERNS):\n        # Check if pattern appears as a directory component in the path\n        if pattern in path_parts:\n            # Find the depth where this pattern appears (rightmost occurrence)\n            depth = len(path_parts) - 1 - path_parts[::-1].index(pattern)\n\n            # Prefer deeper matches, or better priority if at same depth\n            if depth > best_depth_at_match or (\n                depth == best_depth_at_match and i < best_priority\n            ):\n                best_priority = i\n                best_depth_at_match = depth\n\n    return (in_examples_dir, best_priority, len(path_parts))\n\n\ndef _handle_repo_tree_errors(\n    all_files: List[Dict[str, Any]],\n    error: str,\n    org: str,\n    repo: str,\n    token: str,\n) -> ToolResult | None:\n    \"\"\"Handle errors from repo tree fetch. Returns ToolResult if error, None if OK.\"\"\"\n    if error == \"not_found\":\n        similar_repos = _search_similar_repos(org, repo, token)\n\n        if not similar_repos:\n            return {\n                \"formatted\": f\"Repository '{org}/{repo}' not found and no similar repositories found.\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        # Format similar repos\n        lines = [f\"**Repository '{org}/{repo}' not found. Similar repositories:**\\n\"]\n        for i, r in enumerate(similar_repos, 1):\n            lines.append(f\"{i}. **{r['full_name']}** (⭐ {r['stars']:,} stars)\")\n            if r[\"description\"]:\n                desc = (\n                    r[\"description\"][:100] + \"...\"\n                    if len(r[\"description\"]) > 100\n                    else r[\"description\"]\n                )\n                lines.append(f\"   {desc}\")\n            lines.append(f\"   {r['url']}\\n\")\n\n        return {\n            \"formatted\": \"\\n\".join(lines),\n            \"totalResults\": len(similar_repos),\n            \"resultsShared\": len(similar_repos),\n            \"isError\": True,\n        }\n\n    if error:\n        return {\n            \"formatted\": f\"Error accessing repository '{org}/{repo}': {error}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    if not all_files:\n        return {\n            \"formatted\": f\"No files found in repository '{org}/{repo}'\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    return None\n\n\ndef find_examples(\n    keyword: str = \"\",\n    repo: str = \"\",\n    org: str = \"huggingface\",\n    max_results: int = 10,\n    min_score: int = 80,\n) -> ToolResult:\n    \"\"\"\n    Find example files in a repository using fuzzy matching.\n\n    Args:\n        keyword: Keyword to fuzzy match against file paths (e.g., \"grpo\")\n        repo: Repository name (e.g., \"trl\")\n        org: GitHub organization (default: \"huggingface\")\n        max_results: Maximum number of results (default 50)\n        min_score: Minimum fuzzy match score (0-100, default 60)\n\n    Returns:\n        ToolResult with matching files, or similar repos if repo not found\n    \"\"\"\n    token = os.environ.get(\"GITHUB_TOKEN\")\n    if not token:\n        return {\n            \"formatted\": \"Error: GITHUB_TOKEN environment variable is required\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    if not repo:\n        return {\n            \"formatted\": \"Error: repo parameter is required\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    # Get all files in the repository\n    all_files, error = _get_repo_tree(org, repo, token)\n\n    # Handle errors (not found, API errors, empty repo)\n    if error_result := _handle_repo_tree_errors(all_files, error, org, repo, token):\n        return error_result\n\n    # Step 1: Filter files by example patterns (score >= 60)\n    example_threshold = 60\n    example_files = []\n    for file in all_files:\n        example_score = _score_against_example_patterns(file[\"path\"])\n        if example_score >= example_threshold:\n            example_files.append({**file, \"example_score\": example_score})\n\n    if not example_files:\n        return {\n            \"formatted\": f\"No example files found in {org}/{repo} (no files match example patterns with score >= {example_threshold}).\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    # Step 2: If keyword provided, score and filter by keyword\n    if keyword:\n        scored_files = []\n        for file in example_files:\n            keyword_score = _score_against_keyword(file[\"path\"], keyword)\n            if keyword_score >= min_score:\n                scored_files.append({**file, \"score\": keyword_score})\n\n        if not scored_files:\n            return {\n                \"formatted\": f\"No files found in {org}/{repo} matching keyword '{keyword}' (min score: {min_score}) among {len(example_files)} example files.\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n        # Sort by keyword score (descending) for best matches first\n        scored_files.sort(key=lambda x: x[\"score\"], reverse=True)\n    else:\n        # No keyword: prioritize by pattern directory, then path depth\n        scored_files = []\n        for file in example_files:\n            in_examples_dir, pattern_priority, path_depth = _get_pattern_priority(\n                file[\"path\"]\n            )\n            scored_files.append(\n                {\n                    **file,\n                    \"score\": file[\"example_score\"],\n                    \"in_examples_dir\": in_examples_dir,\n                    \"pattern_priority\": pattern_priority,\n                    \"path_depth\": path_depth,\n                }\n            )\n\n        if not scored_files:\n            return {\n                \"formatted\": f\"No example files found in {org}/{repo}.\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n        # Sort by: 1) files in examples/ dir first, 2) pattern priority (scripts > datasets > etc), 3) path depth, 4) path name\n        scored_files.sort(\n            key=lambda x: (\n                x[\"in_examples_dir\"],\n                x[\"pattern_priority\"],\n                x[\"path_depth\"],\n                x[\"path\"],\n            )\n        )\n\n    # Limit results\n    results = scored_files[:max_results]\n\n    # Format output\n    keyword_desc = f\" matching '{keyword}'\" if keyword else \"\"\n    lines = [f\"**Found {len(results)} example files in {org}/{repo}{keyword_desc}:**\"]\n    if len(scored_files) > max_results:\n        lines[0] += f\" (showing {max_results} of {len(scored_files)})\"\n    lines.append(\"\")\n\n    for i, file in enumerate(results, 1):\n        lines.append(f\"{i}. **{file['path']}**\")\n        lines.append(f\"   Size: {file['size']:,} bytes | Ref: {file['ref'][:7]}\")\n        lines.append(f\"   URL: {file['url']}\")\n\n        # Copyable parameters for read_file tool\n        read_params = f\"{{'repo': '{org}/{repo}', 'path': '{file['path']}'}}\"\n        lines.append(f\"   To read, use: {read_params}\")\n        lines.append(\"\")\n\n    return {\n        \"formatted\": \"\\n\".join(lines),\n        \"totalResults\": len(results),\n        \"resultsShared\": len(results),\n    }\n\n\n# Tool specification\nGITHUB_FIND_EXAMPLES_TOOL_SPEC = {\n    \"name\": \"github_find_examples\",\n    \"description\": (\n        \"Find working example scripts in GitHub repositories (from a list of predetermined directories e.g. examples/, scripts/, tutorials/, etc.). \"\n        \"Uses fuzzy keyword matching.\\n\\n\"\n        \"MANDATORY before writing any ML training, fine-tuning, or inference code. \"\n        \"Your internal knowledge of library APIs is outdated — working examples show current API patterns.\\n\\n\"\n        \"Sequence: github_find_examples → github_read_file (study the example) → implement based on what you found.\\n\\n\"\n        \"Skip this only for: simple data queries, status checks, non-code tasks.\\n\\n\"\n        \"Examples:\\n\"\n        \"  {keyword: 'sft', repo: 'trl'} → finds examples/scripts/sft.py\\n\"\n        \"  {keyword: 'grpo', repo: 'trl'} → finds GRPO training examples\\n\"\n        \"  {repo: 'trl', max_results: 20} → lists all available training method examples\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"keyword\": {\n                \"type\": \"string\",\n                \"description\": \"Keyword to fuzzy match against file paths (e.g., 'grpo', 'sft').\",\n            },\n            \"repo\": {\n                \"type\": \"string\",\n                \"description\": \"Repository name (e.g., 'trl', 'transformers'). Required.\",\n            },\n            \"org\": {\n                \"type\": \"string\",\n                \"description\": \"GitHub organization or username. Default: 'huggingface'.\",\n            },\n            \"max_results\": {\n                \"type\": \"integer\",\n                \"description\": \"Maximum number of results to return. Default: 50.\",\n            },\n            \"min_score\": {\n                \"type\": \"integer\",\n                \"description\": \"Minimum fuzzy match score (0-100). Default: 60.\",\n            },\n        },\n        \"required\": [\"repo\"],\n    },\n}\n\n\nasync def github_find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router\"\"\"\n    try:\n        result = find_examples(\n            keyword=arguments.get(\"keyword\", \"\"),\n            repo=arguments[\"repo\"],\n            org=arguments.get(\"org\", \"huggingface\"),\n            max_results=arguments.get(\"max_results\", 50),\n            min_score=arguments.get(\"min_score\", 60),\n        )\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error finding examples: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/github_list_repos.py",
    "content": "\"\"\"\nGitHub List Repositories Tool - List and sort repositories for any user or organization\n\nEfficiently discover repositories with flexible sorting options.\n\"\"\"\n\nimport os\nfrom typing import Any, Dict, Literal, Optional\n\nimport requests\n\nfrom agent.tools.types import ToolResult\n\n\ndef list_repos(\n    owner: str,\n    owner_type: Literal[\"user\", \"org\"] = \"org\",\n    sort: Literal[\"stars\", \"forks\", \"updated\", \"created\"] = \"stars\",\n    order: Literal[\"asc\", \"desc\"] = \"desc\",\n    limit: Optional[int] = 30,\n) -> ToolResult:\n    \"\"\"\n    List repositories for a user or organization using GitHub REST API.\n\n    Args:\n        owner: GitHub username or organization name\n        owner_type: Whether the owner is a \"user\" or \"org\" (default: \"org\")\n        sort: Sort field - \"stars\", \"forks\", \"updated\", or \"created\"\n        order: Sort order - \"asc\" or \"desc\" (default: \"desc\")\n        limit: Maximum number of repositories to return\n\n    Returns:\n        ToolResult with repository information\n    \"\"\"\n    token = os.environ.get(\"GITHUB_TOKEN\")\n    if not token:\n        return {\n            \"formatted\": \"Error: GITHUB_TOKEN environment variable is required\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    if owner_type == \"org\":\n        url = f\"https://api.github.com/orgs/{owner}/repos\"\n    else:\n        url = f\"https://api.github.com/users/{owner}/repos\"\n\n    headers = {\n        \"Accept\": \"application/vnd.github+json\",\n        \"X-GitHub-Api-Version\": \"2022-11-28\",\n        \"Authorization\": f\"Bearer {token}\",\n    }\n\n    all_repos = []\n    page = 1\n    per_page = 100  # Maximum allowed by GitHub\n\n    # Map our sort values to GitHub API sort values\n    # Note: GitHub list repos API doesn't support sorting by stars/forks\n    # We'll fetch all repos and sort in memory for those cases\n    api_sort_map = {\n        \"created\": \"created\",\n        \"updated\": \"updated\",\n        \"stars\": None,  # Not supported by list API\n        \"forks\": None,  # Not supported by list API\n    }\n\n    api_sort = api_sort_map.get(sort)\n    need_manual_sort = api_sort is None\n\n    try:\n        while True:\n            params = {\n                \"page\": page,\n                \"per_page\": per_page,\n            }\n\n            # Only add sort/direction if API supports it\n            if api_sort:\n                params[\"sort\"] = api_sort\n                params[\"direction\"] = order\n\n            response = requests.get(\n                url,\n                headers=headers,\n                params=params,\n                timeout=30,\n            )\n\n            if response.status_code == 403:\n                error_data = response.json()\n                return {\n                    \"formatted\": f\"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n            if response.status_code != 200:\n                error_msg = f\"GitHub API error (status {response.status_code})\"\n                try:\n                    error_data = response.json()\n                    if \"message\" in error_data:\n                        error_msg += f\": {error_data['message']}\"\n                except Exception:\n                    pass\n                return {\n                    \"formatted\": error_msg,\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n            items = response.json()\n\n            if not items:\n                break\n\n            for item in items:\n                all_repos.append(\n                    {\n                        \"name\": item.get(\"name\"),\n                        \"full_name\": item.get(\"full_name\"),\n                        \"description\": item.get(\"description\"),\n                        \"html_url\": item.get(\"html_url\"),\n                        \"language\": item.get(\"language\"),\n                        \"stars\": item.get(\"stargazers_count\", 0),\n                        \"forks\": item.get(\"forks_count\", 0),\n                        \"open_issues\": item.get(\"open_issues_count\", 0),\n                        \"topics\": item.get(\"topics\", []),\n                        \"updated_at\": item.get(\"updated_at\"),\n                        \"created_at\": item.get(\"created_at\"),\n                    }\n                )\n\n            # Check if we got fewer results than requested (last page)\n            if len(items) < per_page:\n                break\n\n            # Stop if we have enough repos\n            if limit and len(all_repos) >= limit:\n                break\n\n            page += 1\n\n    except requests.exceptions.RequestException as e:\n        return {\n            \"formatted\": f\"Failed to connect to GitHub API: {str(e)}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    # Manual sorting if needed (for stars/forks)\n    if need_manual_sort and all_repos:\n        reverse = order == \"desc\"\n        all_repos.sort(key=lambda x: x[sort], reverse=reverse)\n\n    # Apply limit after sorting\n    if limit:\n        all_repos = all_repos[:limit]\n\n    if not all_repos:\n        return {\n            \"formatted\": f\"No repositories found for {owner_type} '{owner}'\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    # Format output\n    lines = [f\"**Found {len(all_repos)} repositories for {owner}:**\\n\"]\n\n    for i, repo in enumerate(all_repos, 1):\n        lines.append(f\"{i}. **{repo['full_name']}**\")\n        lines.append(\n            f\"   ⭐ {repo['stars']:,} stars | 🍴 {repo['forks']:,} forks | Language: {repo['language'] or 'N/A'}\"\n        )\n        if repo[\"description\"]:\n            desc = (\n                repo[\"description\"][:100] + \"...\"\n                if len(repo[\"description\"]) > 100\n                else repo[\"description\"]\n            )\n            lines.append(f\"   {desc}\")\n        lines.append(f\"   URL: {repo['html_url']}\")\n        if repo[\"topics\"]:\n            lines.append(f\"   Topics: {', '.join(repo['topics'][:5])}\")\n\n        # Copyable parameters for other tools\n        lines.append(f\"   Use in tools: {{'repo': '{repo['full_name']}'}}\")\n        lines.append(\"\")\n\n    return {\n        \"formatted\": \"\\n\".join(lines),\n        \"totalResults\": len(all_repos),\n        \"resultsShared\": len(all_repos),\n    }\n\n\n# Tool specification\nGITHUB_LIST_REPOS_TOOL_SPEC = {\n    \"name\": \"github_list_repos\",\n    \"description\": (\n        \"List and discover repositories for GitHub organizations or users with flexible sorting. \"\n        \"**Use when:** (1) Exploring what libraries exist for a task, (2) Finding the right library to use, \"\n        \"(3) Discovering popular or active projects, (4) Checking recently updated repos for latest features, \"\n        \"(5) Finding alternative libraries in an organization. \"\n        \"**Pattern:** github_list_repos (discover libraries) → github_find_examples (find usage examples) → implement. \"\n        \"Returns: Comprehensive repository information (stars, forks, language, topics, URLs), sorted by preference. \"\n        \"**Then:** Use github_find_examples on selected repo to discover example code. \"\n        \"Sorts by: stars (popularity), forks (community), updated (activity), created (age).\\n\\n\"\n        \"## When to use this tool\\n\\n\"\n        \"- When you need to find libraries to use in your implementation\\n\"\n        \"- When exploring what repositories exist for a task or domain\\n\"\n        \"- When debugging an error and looking up if others have similar issues in repos\\n\"\n        \"- When finding the most popular or actively maintained projects for a user/org\\n\"\n        \"## Examples\\n\\n\"\n        \"<example>\\n\"\n        \"// ML Workflow Step: Discover HF libraries for RLHF/alignment\\n\"\n        \"// Use case: Find the right library for training with human feedback\\n\"\n        \"{\\n\"\n        \"  owner: 'huggingface',\\n\"\n        \"  owner_type: 'org',\\n\"\n        \"  sort: 'stars',\\n\"\n        \"  limit: 10\\n\"\n        \"}\\n\"\n        \"// Returns: transformers, trl, peft, accelerate, diffusers...\\n\"\n        \"</example>\\n\\n\"\n        \"<example>\\n\"\n        \"// ML Workflow Step: Check for recently updated HF repos\\n\"\n        \"// Use case: Find actively maintained libraries with latest features\\n\"\n        \"{\\n\"\n        \"  owner: 'huggingface',\\n\"\n        \"  owner_type: 'org',\\n\"\n        \"  sort: 'updated',\\n\"\n        \"  order: 'desc',\\n\"\n        \"  limit: 15\\n\"\n        \"}\\n\"\n        \"// Helps identify which repos have recent improvements/fixes\\n\"\n        \"</example>\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"owner\": {\n                \"type\": \"string\",\n                \"description\": \"GitHub username or organization name. Required.\",\n            },\n            \"owner_type\": {\n                \"type\": \"string\",\n                \"enum\": [\"user\", \"org\"],\n                \"description\": \"Whether the owner is a 'user' or 'org'. Default: 'org'.\",\n            },\n            \"sort\": {\n                \"type\": \"string\",\n                \"enum\": [\"stars\", \"forks\", \"updated\", \"created\"],\n                \"description\": \"Sort field. Options: 'stars', 'forks', 'updated', 'created'. Default: 'stars'.\",\n            },\n            \"order\": {\n                \"type\": \"string\",\n                \"enum\": [\"asc\", \"desc\"],\n                \"description\": \"Sort order. Options: 'asc', 'desc'. Default: 'desc'.\",\n            },\n            \"limit\": {\n                \"type\": \"integer\",\n                \"description\": \"Maximum number of repositories to return. No limit if not specified. Default: 30.\",\n            },\n        },\n        \"required\": [\"owner\"],\n    },\n}\n\n\nasync def github_list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router\"\"\"\n    try:\n        result = list_repos(\n            owner=arguments[\"owner\"],\n            owner_type=arguments.get(\"owner_type\", \"org\"),\n            sort=arguments.get(\"sort\", \"stars\"),\n            order=arguments.get(\"order\", \"desc\"),\n            limit=arguments.get(\"limit\"),\n        )\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error listing repositories: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/github_read_file.py",
    "content": "\"\"\"\nGitHub Read File Tool - Read file contents from any GitHub repository with line range support\n\nFetch exact file contents with metadata, supporting line ranges for efficient reading.\n\"\"\"\n\nimport base64\nimport json\nimport os\nfrom typing import Any, Dict, Optional\n\nimport nbformat\nimport requests\nfrom nbconvert import MarkdownExporter\nfrom nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor\n\nfrom agent.tools.types import ToolResult\n\n\ndef _convert_ipynb_to_markdown(content: str) -> str:\n    \"\"\"\n    Convert Jupyter notebook JSON to LLM-friendly Markdown.\n\n    Args:\n        content: Raw notebook JSON string\n\n    Returns:\n        Converted Markdown string\n    \"\"\"\n    try:\n        # Parse notebook JSON\n        nb_dict = json.loads(content)\n\n        # Normalize cell sources (can be string or list of strings)\n        if \"cells\" in nb_dict:\n            for cell in nb_dict[\"cells\"]:\n                if \"source\" in cell and isinstance(cell[\"source\"], list):\n                    cell[\"source\"] = \"\".join(cell[\"source\"])\n\n        # Read notebook with explicit version\n        nb = nbformat.reads(json.dumps(nb_dict), as_version=4)\n\n        # Strip outputs for LLM readability (outputs can be noisy/large)\n        clear = ClearOutputPreprocessor()\n        nb, _ = clear.preprocess(nb, {})\n\n        # Optionally remove cells tagged with \"hide\" or similar\n        remove = TagRemovePreprocessor(\n            remove_cell_tags={\"hide\", \"hidden\", \"remove\"},\n            remove_input_tags=set(),\n            remove_all_outputs_tags=set(),\n        )\n        nb, _ = remove.preprocess(nb, {})\n\n        # Convert to markdown\n        exporter = MarkdownExporter()\n        markdown, _ = exporter.from_notebook_node(nb)\n\n        return markdown\n\n    except json.JSONDecodeError:\n        return content\n    except Exception:\n        return content\n\n\ndef read_file(\n    repo: str,\n    path: str,\n    ref: str = \"HEAD\",\n    line_start: Optional[int] = None,\n    line_end: Optional[int] = None,\n) -> ToolResult:\n    \"\"\"\n    Read file contents from a GitHub repository with line range support.\n\n    Args:\n        repo: Repository in format \"owner/repo\" (e.g., \"github/github-mcp-server\")\n        path: Path to file in repository (e.g., \"pkg/github/search.go\")\n        ref: Git reference - branch name, tag, or commit SHA (default: \"HEAD\")\n        line_start: Starting line number (1-indexed, inclusive)\n        line_end: Ending line number (1-indexed, inclusive)\n\n    Returns:\n        ToolResult with file contents and metadata\n    \"\"\"\n    token = os.environ.get(\"GITHUB_TOKEN\")\n    if not token:\n        return {\n            \"formatted\": \"Error: GITHUB_TOKEN environment variable is required\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    # Parse repo\n    if \"/\" not in repo:\n        return {\n            \"formatted\": \"Error: repo must be in format 'owner/repo'\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n    owner, repo_name = repo.split(\"/\", 1)\n\n    headers = {\n        \"Accept\": \"application/vnd.github+json\",\n        \"X-GitHub-Api-Version\": \"2022-11-28\",\n        \"Authorization\": f\"Bearer {token}\",\n    }\n\n    # Fetch file contents\n    url = f\"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}\"\n    params = {}\n    if ref and ref != \"HEAD\":\n        params[\"ref\"] = ref\n\n    try:\n        response = requests.get(url, headers=headers, params=params, timeout=30)\n\n        if response.status_code == 404:\n            return {\n                \"formatted\": f\"File not found: {path} in {repo} (ref: {ref})\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        if response.status_code != 200:\n            error_msg = f\"GitHub API error (status {response.status_code})\"\n            try:\n                error_data = response.json()\n                if \"message\" in error_data:\n                    error_msg += f\": {error_data['message']}\"\n            except Exception:\n                pass\n            return {\n                \"formatted\": error_msg,\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        data = response.json()\n\n        # Check if it's a file\n        if data.get(\"type\") != \"file\":\n            return {\n                \"formatted\": f\"Path {path} is not a file (type: {data.get('type')})\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        # Decode content\n        content_b64 = data.get(\"content\", \"\")\n        if content_b64:\n            content_b64 = content_b64.replace(\"\\n\", \"\").replace(\" \", \"\")\n            content = base64.b64decode(content_b64).decode(\"utf-8\", errors=\"replace\")\n        else:\n            # For large files, fetch raw content\n            raw_headers = {\n                \"Accept\": \"application/vnd.github.raw\",\n                \"X-GitHub-Api-Version\": \"2022-11-28\",\n                \"Authorization\": f\"Bearer {token}\",\n            }\n            raw_response = requests.get(\n                url, headers=raw_headers, params=params, timeout=30\n            )\n            if raw_response.status_code != 200:\n                return {\n                    \"formatted\": \"Failed to fetch file content\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n            content = raw_response.text\n\n        if path.lower().endswith(\".ipynb\"):\n            content = _convert_ipynb_to_markdown(content)\n\n        # Process line ranges\n        lines = content.split(\"\\n\")\n        total_lines = len(lines)\n\n        truncated = False\n\n        if line_start is None and line_end is None:\n            # No range specified\n            if total_lines > 300:\n                line_start = 1\n                line_end = 300\n                truncated = True\n            else:\n                line_start = 1\n                line_end = total_lines\n        else:\n            # Range specified\n            if line_start is None:\n                line_start = 1\n            if line_end is None:\n                line_end = total_lines\n\n            # Validate range\n            line_start = max(1, line_start)\n            line_end = min(total_lines, line_end)\n            if line_start > line_end:\n                return {\n                    \"formatted\": f\"Invalid range: line_start ({line_start}) > line_end ({line_end})\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n        # Extract lines\n        selected_lines = lines[line_start - 1 : line_end]\n        selected_content = \"\\n\".join(selected_lines)\n\n        # Format output\n        lines_output = [f\"**Reading file from repo: {repo}, path: {path}**\"]\n\n        if ref and ref != \"HEAD\":\n            lines_output.append(f\"Ref: {ref}\")\n\n        lines_output.append(\"\\n**File content:\")\n        lines_output.append(\"```\")\n        lines_output.append(selected_content)\n        lines_output.append(\"```\")\n        if truncated:\n            lines_output.append(\n                f\"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines.\"\n            )\n        return {\n            \"formatted\": \"\\n\".join(lines_output),\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    except requests.exceptions.RequestException as e:\n        return {\n            \"formatted\": f\"Failed to connect to GitHub API: {str(e)}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n            \"isError\": True,\n        }\n\n\n# Tool specification\nGITHUB_READ_FILE_TOOL_SPEC = {\n    \"name\": \"github_read_file\",\n    \"description\": (\n        \"Read file contents from GitHub repositories. Returns first 300 lines by default. \"\n        \"Auto-converts Jupyter notebooks to markdown.\\n\\n\"\n        \"Use AFTER github_find_examples to study the working implementation. \"\n        \"The purpose is to learn current API patterns — imports, trainer configs, dataset handling — \"\n        \"so your implementation uses correct, up-to-date code.\\n\\n\"\n        \"Use line_start/line_end for large files (>300 lines) to read specific sections.\\n\\n\"\n        \"When NOT to use: when you don't know the file path (use github_find_examples first).\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"repo\": {\n                \"type\": \"string\",\n                \"description\": \"Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.\",\n            },\n            \"path\": {\n                \"type\": \"string\",\n                \"description\": \"Path to file in repository (e.g., 'src/index.js'). Required.\",\n            },\n            \"ref\": {\n                \"type\": \"string\",\n                \"description\": \"Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.\",\n            },\n            \"line_start\": {\n                \"type\": \"integer\",\n                \"description\": \"Starting line number (1-indexed, inclusive). Optional.\",\n            },\n            \"line_end\": {\n                \"type\": \"integer\",\n                \"description\": \"Ending line number (1-indexed, inclusive). Optional.\",\n            },\n        },\n        \"required\": [\"repo\", \"path\"],\n    },\n}\n\n\nasync def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router\"\"\"\n    try:\n        result = read_file(\n            repo=arguments[\"repo\"],\n            path=arguments[\"path\"],\n            ref=arguments.get(\"ref\", \"HEAD\"),\n            line_start=arguments.get(\"line_start\"),\n            line_end=arguments.get(\"line_end\"),\n        )\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error reading file: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/hf_repo_files_tool.py",
    "content": "\"\"\"\nHF Repo Files Tool - File operations on Hugging Face repositories\n\nOperations: list, read, upload, delete\n\"\"\"\n\nimport asyncio\nfrom typing import Any, Dict, Literal, Optional\n\nfrom huggingface_hub import HfApi, hf_hub_download\nfrom huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError\n\nfrom agent.tools.types import ToolResult\n\nOperationType = Literal[\"list\", \"read\", \"upload\", \"delete\"]\n\n\nasync def _async_call(func, *args, **kwargs):\n    \"\"\"Wrap synchronous HfApi calls for async context.\"\"\"\n    return await asyncio.to_thread(func, *args, **kwargs)\n\n\ndef _build_repo_url(repo_id: str, repo_type: str = \"model\") -> str:\n    \"\"\"Build the Hub URL for a repository.\"\"\"\n    if repo_type == \"model\":\n        return f\"https://huggingface.co/{repo_id}\"\n    return f\"https://huggingface.co/{repo_type}s/{repo_id}\"\n\n\ndef _format_size(size_bytes: int) -> str:\n    \"\"\"Format file size in human-readable form.\"\"\"\n    for unit in [\"B\", \"KB\", \"MB\", \"GB\", \"TB\"]:\n        if size_bytes < 1024:\n            return f\"{size_bytes:.1f}{unit}\"\n        size_bytes /= 1024\n    return f\"{size_bytes:.1f}PB\"\n\n\nclass HfRepoFilesTool:\n    \"\"\"Tool for file operations on HF repos.\"\"\"\n\n    def __init__(self, hf_token: Optional[str] = None):\n        self.api = HfApi(token=hf_token)\n\n    async def execute(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Execute the specified operation.\"\"\"\n        operation = args.get(\"operation\")\n\n        if not operation:\n            return self._help()\n\n        try:\n            handlers = {\n                \"list\": self._list,\n                \"read\": self._read,\n                \"upload\": self._upload,\n                \"delete\": self._delete,\n            }\n\n            handler = handlers.get(operation)\n            if handler:\n                return await handler(args)\n            else:\n                return self._error(f\"Unknown operation: {operation}. Valid: list, read, upload, delete\")\n\n        except RepositoryNotFoundError:\n            return self._error(f\"Repository not found: {args.get('repo_id')}\")\n        except EntryNotFoundError:\n            return self._error(f\"File not found: {args.get('path')}\")\n        except Exception as e:\n            return self._error(f\"Error: {str(e)}\")\n\n    def _help(self) -> ToolResult:\n        \"\"\"Show usage instructions.\"\"\"\n        return {\n            \"formatted\": \"\"\"**hf_repo_files** - File operations on HF repos\n\n**Operations:**\n- `list` - List files: `{\"operation\": \"list\", \"repo_id\": \"gpt2\"}`\n- `read` - Read file: `{\"operation\": \"read\", \"repo_id\": \"gpt2\", \"path\": \"config.json\"}`\n- `upload` - Upload: `{\"operation\": \"upload\", \"repo_id\": \"my-model\", \"path\": \"README.md\", \"content\": \"...\"}`\n- `delete` - Delete: `{\"operation\": \"delete\", \"repo_id\": \"my-model\", \"patterns\": [\"*.tmp\"]}`\n\n**Common params:** repo_id (required), repo_type (model/dataset/space), revision (default: main)\"\"\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    async def _list(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List files in a repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        revision = args.get(\"revision\", \"main\")\n        path = args.get(\"path\", \"\")\n\n        items = list(await _async_call(\n            self.api.list_repo_tree,\n            repo_id=repo_id,\n            repo_type=repo_type,\n            revision=revision,\n            path_in_repo=path,\n            recursive=True,\n        ))\n\n        if not items:\n            return {\"formatted\": f\"No files in {repo_id}\", \"totalResults\": 0, \"resultsShared\": 0}\n\n        lines = []\n        total_size = 0\n        for item in sorted(items, key=lambda x: x.path):\n            if hasattr(item, \"size\") and item.size:\n                total_size += item.size\n                lines.append(f\"{item.path} ({_format_size(item.size)})\")\n            else:\n                lines.append(f\"{item.path}/\")\n\n        url = _build_repo_url(repo_id, repo_type)\n        response = f\"**{repo_id}** ({len(items)} files, {_format_size(total_size)})\\n{url}/tree/{revision}\\n\\n\" + \"\\n\".join(lines)\n\n        return {\"formatted\": response, \"totalResults\": len(items), \"resultsShared\": len(items)}\n\n    async def _read(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Read file content from a repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        path = args.get(\"path\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not path:\n            return self._error(\"path is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        revision = args.get(\"revision\", \"main\")\n        max_chars = args.get(\"max_chars\", 50000)\n\n        file_path = await _async_call(\n            hf_hub_download,\n            repo_id=repo_id,\n            filename=path,\n            repo_type=repo_type,\n            revision=revision,\n            token=self.api.token,\n        )\n\n        try:\n            with open(file_path, \"r\", encoding=\"utf-8\") as f:\n                content = f.read()\n\n            truncated = len(content) > max_chars\n            if truncated:\n                content = content[:max_chars]\n\n            url = f\"{_build_repo_url(repo_id, repo_type)}/blob/{revision}/{path}\"\n            response = f\"**{path}**{' (truncated)' if truncated else ''}\\n{url}\\n\\n```\\n{content}\\n```\"\n\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except UnicodeDecodeError:\n            import os\n            size = os.path.getsize(file_path)\n            return {\"formatted\": f\"Binary file ({_format_size(size)})\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _upload(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Upload content to a repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        path = args.get(\"path\")\n        content = args.get(\"content\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not path:\n            return self._error(\"path is required\")\n        if content is None:\n            return self._error(\"content is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        revision = args.get(\"revision\", \"main\")\n        create_pr = args.get(\"create_pr\", False)\n        commit_message = args.get(\"commit_message\", f\"Upload {path}\")\n\n        file_bytes = content.encode(\"utf-8\") if isinstance(content, str) else content\n\n        result = await _async_call(\n            self.api.upload_file,\n            path_or_fileobj=file_bytes,\n            path_in_repo=path,\n            repo_id=repo_id,\n            repo_type=repo_type,\n            revision=revision,\n            commit_message=commit_message,\n            create_pr=create_pr,\n        )\n\n        url = _build_repo_url(repo_id, repo_type)\n        if create_pr and hasattr(result, \"pr_url\"):\n            response = f\"**Uploaded as PR**\\n{result.pr_url}\"\n        else:\n            response = f\"**Uploaded:** {path}\\n{url}/blob/{revision}/{path}\"\n\n        return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _delete(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Delete files from a repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        patterns = args.get(\"patterns\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not patterns:\n            return self._error(\"patterns is required (list of paths/wildcards)\")\n\n        if isinstance(patterns, str):\n            patterns = [patterns]\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        revision = args.get(\"revision\", \"main\")\n        create_pr = args.get(\"create_pr\", False)\n        commit_message = args.get(\"commit_message\", f\"Delete {', '.join(patterns)}\")\n\n        await _async_call(\n            self.api.delete_files,\n            repo_id=repo_id,\n            delete_patterns=patterns,\n            repo_type=repo_type,\n            revision=revision,\n            commit_message=commit_message,\n            create_pr=create_pr,\n        )\n\n        response = f\"**Deleted:** {', '.join(patterns)} from {repo_id}\"\n        return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n    def _error(self, message: str) -> ToolResult:\n        \"\"\"Return an error result.\"\"\"\n        return {\"formatted\": message, \"totalResults\": 0, \"resultsShared\": 0, \"isError\": True}\n\n\n# Tool specification\nHF_REPO_FILES_TOOL_SPEC = {\n    \"name\": \"hf_repo_files\",\n    \"description\": (\n        \"Read and write files in HF repos (models/datasets/spaces).\\n\\n\"\n        \"## Operations\\n\"\n        \"- **list**: List files with sizes and structure\\n\"\n        \"- **read**: Read file content (text files only)\\n\"\n        \"- **upload**: Upload content to repo (can create PR)\\n\"\n        \"- **delete**: Delete files/folders (supports wildcards like *.tmp)\\n\\n\"\n        \"## Use when\\n\"\n        \"- Need to see what files exist in a repo\\n\"\n        \"- Want to read config.json, README.md, or other text files\\n\"\n        \"- Uploading training scripts, configs, or results to a repo\\n\"\n        \"- Cleaning up temporary files from a repo\\n\\n\"\n        \"## Examples\\n\"\n        '{\"operation\": \"list\", \"repo_id\": \"meta-llama/Llama-2-7b\"}\\n'\n        '{\"operation\": \"read\", \"repo_id\": \"gpt2\", \"path\": \"config.json\"}\\n'\n        '{\"operation\": \"upload\", \"repo_id\": \"my-model\", \"path\": \"README.md\", \"content\": \"# My Model\"}\\n'\n        '{\"operation\": \"upload\", \"repo_id\": \"org/model\", \"path\": \"fix.py\", \"content\": \"...\", \"create_pr\": true}\\n'\n        '{\"operation\": \"delete\", \"repo_id\": \"my-model\", \"patterns\": [\"*.tmp\", \"logs/\"]}\\n\\n'\n        \"## Notes\\n\"\n        \"- For binary files (safetensors, bin), use list to see them but can't read content\\n\"\n        \"- upload/delete require approval (can overwrite/destroy data)\\n\"\n        \"- Use create_pr=true to propose changes instead of direct commit\\n\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"operation\": {\n                \"type\": \"string\",\n                \"enum\": [\"list\", \"read\", \"upload\", \"delete\"],\n                \"description\": \"Operation: list, read, upload, delete\",\n            },\n            \"repo_id\": {\n                \"type\": \"string\",\n                \"description\": \"Repository ID (e.g., 'username/repo-name')\",\n            },\n            \"repo_type\": {\n                \"type\": \"string\",\n                \"enum\": [\"model\", \"dataset\", \"space\"],\n                \"description\": \"Repository type (default: model)\",\n            },\n            \"revision\": {\n                \"type\": \"string\",\n                \"description\": \"Branch/tag/commit (default: main)\",\n            },\n            \"path\": {\n                \"type\": \"string\",\n                \"description\": \"File path for read/upload\",\n            },\n            \"content\": {\n                \"type\": \"string\",\n                \"description\": \"File content for upload\",\n            },\n            \"patterns\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"},\n                \"description\": \"Patterns to delete (e.g., ['*.tmp', 'logs/'])\",\n            },\n            \"create_pr\": {\n                \"type\": \"boolean\",\n                \"description\": \"Create PR instead of direct commit\",\n            },\n            \"commit_message\": {\n                \"type\": \"string\",\n                \"description\": \"Custom commit message\",\n            },\n        },\n        \"required\": [\"operation\"],\n    },\n}\n\n\nasync def hf_repo_files_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router.\"\"\"\n    try:\n        hf_token = session.hf_token if session else None\n        tool = HfRepoFilesTool(hf_token=hf_token)\n        result = await tool.execute(arguments)\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/hf_repo_git_tool.py",
    "content": "\"\"\"\nHF Repo Git Tool - Git-like operations on Hugging Face repositories\n\nOperations: branches, tags, PRs, repo management\n\"\"\"\n\nimport asyncio\nfrom typing import Any, Dict, Literal, Optional\n\nfrom huggingface_hub import HfApi\nfrom huggingface_hub.utils import RepositoryNotFoundError\n\nfrom agent.tools.types import ToolResult\n\nOperationType = Literal[\n    \"create_branch\", \"delete_branch\",\n    \"create_tag\", \"delete_tag\",\n    \"list_refs\",\n    \"create_pr\", \"list_prs\", \"get_pr\", \"merge_pr\", \"close_pr\", \"comment_pr\", \"change_pr_status\",\n    \"create_repo\", \"update_repo\",\n]\n\n\nasync def _async_call(func, *args, **kwargs):\n    \"\"\"Wrap synchronous HfApi calls for async context.\"\"\"\n    return await asyncio.to_thread(func, *args, **kwargs)\n\n\ndef _build_repo_url(repo_id: str, repo_type: str = \"model\") -> str:\n    \"\"\"Build the Hub URL for a repository.\"\"\"\n    if repo_type == \"model\":\n        return f\"https://huggingface.co/{repo_id}\"\n    return f\"https://huggingface.co/{repo_type}s/{repo_id}\"\n\n\nclass HfRepoGitTool:\n    \"\"\"Tool for git-like operations on HF repos.\"\"\"\n\n    def __init__(self, hf_token: Optional[str] = None):\n        self.api = HfApi(token=hf_token)\n\n    async def execute(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Execute the specified operation.\"\"\"\n        operation = args.get(\"operation\")\n\n        if not operation:\n            return self._help()\n\n        try:\n            handlers = {\n                \"create_branch\": self._create_branch,\n                \"delete_branch\": self._delete_branch,\n                \"create_tag\": self._create_tag,\n                \"delete_tag\": self._delete_tag,\n                \"list_refs\": self._list_refs,\n                \"create_pr\": self._create_pr,\n                \"list_prs\": self._list_prs,\n                \"get_pr\": self._get_pr,\n                \"merge_pr\": self._merge_pr,\n                \"close_pr\": self._close_pr,\n                \"comment_pr\": self._comment_pr,\n                \"change_pr_status\": self._change_pr_status,\n                \"create_repo\": self._create_repo,\n                \"update_repo\": self._update_repo,\n            }\n\n            handler = handlers.get(operation)\n            if handler:\n                return await handler(args)\n            else:\n                ops = \", \".join(handlers.keys())\n                return self._error(f\"Unknown operation: {operation}. Valid: {ops}\")\n\n        except RepositoryNotFoundError:\n            return self._error(f\"Repository not found: {args.get('repo_id')}\")\n        except Exception as e:\n            return self._error(f\"Error: {str(e)}\")\n\n    def _help(self) -> ToolResult:\n        \"\"\"Show usage instructions.\"\"\"\n        return {\n            \"formatted\": \"\"\"**hf_repo_git** - Git-like operations on HF repos\n\n**Branch/Tag:**\n- `create_branch`: `{\"operation\": \"create_branch\", \"repo_id\": \"...\", \"branch\": \"dev\"}`\n- `delete_branch`: `{\"operation\": \"delete_branch\", \"repo_id\": \"...\", \"branch\": \"dev\"}`\n- `create_tag`: `{\"operation\": \"create_tag\", \"repo_id\": \"...\", \"tag\": \"v1.0\"}`\n- `delete_tag`: `{\"operation\": \"delete_tag\", \"repo_id\": \"...\", \"tag\": \"v1.0\"}`\n- `list_refs`: `{\"operation\": \"list_refs\", \"repo_id\": \"...\"}`\n\n**PRs:**\n- `create_pr`: `{\"operation\": \"create_pr\", \"repo_id\": \"...\", \"title\": \"...\"}` (creates draft PR)\n- `list_prs`: `{\"operation\": \"list_prs\", \"repo_id\": \"...\"}` (shows status: draft/open/merged/closed)\n- `get_pr`: `{\"operation\": \"get_pr\", \"repo_id\": \"...\", \"pr_num\": 1}` (shows status)\n- `change_pr_status`: `{\"operation\": \"change_pr_status\", \"repo_id\": \"...\", \"pr_num\": 1, \"new_status\": \"open\"}` (change draft to open)\n- `merge_pr`: `{\"operation\": \"merge_pr\", \"repo_id\": \"...\", \"pr_num\": 1}`\n- `close_pr`: `{\"operation\": \"close_pr\", \"repo_id\": \"...\", \"pr_num\": 1}`\n- `comment_pr`: `{\"operation\": \"comment_pr\", \"repo_id\": \"...\", \"pr_num\": 1, \"comment\": \"...\"}`\n\n**Repo:**\n- `create_repo`: `{\"operation\": \"create_repo\", \"repo_id\": \"my-model\", \"private\": true}`\n- `update_repo`: `{\"operation\": \"update_repo\", \"repo_id\": \"...\", \"private\": false}`\"\"\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    # =========================================================================\n    # BRANCH OPERATIONS\n    # =========================================================================\n\n    async def _create_branch(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create a new branch.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        branch = args.get(\"branch\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not branch:\n            return self._error(\"branch is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        from_rev = args.get(\"from_rev\", \"main\")\n\n        await _async_call(\n            self.api.create_branch,\n            repo_id=repo_id,\n            branch=branch,\n            revision=from_rev,\n            repo_type=repo_type,\n            exist_ok=args.get(\"exist_ok\", False),\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/tree/{branch}\"\n        return {\"formatted\": f\"**Branch created:** {branch}\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _delete_branch(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Delete a branch.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        branch = args.get(\"branch\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not branch:\n            return self._error(\"branch is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n\n        await _async_call(\n            self.api.delete_branch,\n            repo_id=repo_id,\n            branch=branch,\n            repo_type=repo_type,\n        )\n\n        return {\"formatted\": f\"**Branch deleted:** {branch}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    # =========================================================================\n    # TAG OPERATIONS\n    # =========================================================================\n\n    async def _create_tag(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create a tag.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        tag = args.get(\"tag\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not tag:\n            return self._error(\"tag is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        revision = args.get(\"revision\", \"main\")\n        tag_message = args.get(\"tag_message\", \"\")\n\n        await _async_call(\n            self.api.create_tag,\n            repo_id=repo_id,\n            tag=tag,\n            revision=revision,\n            tag_message=tag_message,\n            repo_type=repo_type,\n            exist_ok=args.get(\"exist_ok\", False),\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/tree/{tag}\"\n        return {\"formatted\": f\"**Tag created:** {tag}\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _delete_tag(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Delete a tag.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        tag = args.get(\"tag\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not tag:\n            return self._error(\"tag is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n\n        await _async_call(\n            self.api.delete_tag,\n            repo_id=repo_id,\n            tag=tag,\n            repo_type=repo_type,\n        )\n\n        return {\"formatted\": f\"**Tag deleted:** {tag}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    # =========================================================================\n    # LIST REFS\n    # =========================================================================\n\n    async def _list_refs(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List branches and tags.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n\n        refs = await _async_call(\n            self.api.list_repo_refs,\n            repo_id=repo_id,\n            repo_type=repo_type,\n        )\n\n        branches = [b.name for b in refs.branches] if refs.branches else []\n        tags = [t.name for t in refs.tags] if hasattr(refs, 'tags') and refs.tags else []\n\n        url = _build_repo_url(repo_id, repo_type)\n        lines = [f\"**{repo_id}**\", url, \"\"]\n\n        if branches:\n            lines.append(f\"**Branches ({len(branches)}):** \" + \", \".join(branches))\n        else:\n            lines.append(\"**Branches:** none\")\n\n        if tags:\n            lines.append(f\"**Tags ({len(tags)}):** \" + \", \".join(tags))\n        else:\n            lines.append(\"**Tags:** none\")\n\n        return {\"formatted\": \"\\n\".join(lines), \"totalResults\": len(branches) + len(tags), \"resultsShared\": len(branches) + len(tags)}\n\n    # =========================================================================\n    # PR OPERATIONS\n    # =========================================================================\n\n    async def _create_pr(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create a pull request.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        title = args.get(\"title\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not title:\n            return self._error(\"title is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        description = args.get(\"description\", \"\")\n\n        result = await _async_call(\n            self.api.create_pull_request,\n            repo_id=repo_id,\n            title=title,\n            description=description,\n            repo_type=repo_type,\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/discussions/{result.num}\"\n        return {\n            \"formatted\": f\"**Draft PR #{result.num} created:** {title}\\n{url}\\n\\nAdd commits via upload with revision=\\\"refs/pr/{result.num}\\\"\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    async def _list_prs(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List PRs and discussions.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        status = args.get(\"status\", \"all\")  # open, closed, all\n\n        discussions = list(self.api.get_repo_discussions(\n            repo_id=repo_id,\n            repo_type=repo_type,\n            discussion_status=status if status != \"all\" else None,\n        ))\n\n        if not discussions:\n            return {\"formatted\": f\"No discussions in {repo_id}\", \"totalResults\": 0, \"resultsShared\": 0}\n\n        url = _build_repo_url(repo_id, repo_type)\n        lines = [f\"**{repo_id}** - {len(discussions)} discussions\", f\"{url}/discussions\", \"\"]\n\n        for d in discussions[:20]:\n            if d.status == \"draft\":\n                status_label = \"[DRAFT]\"\n            elif d.status == \"open\":\n                status_label = \"[OPEN]\"\n            elif d.status == \"merged\":\n                status_label = \"[MERGED]\"\n            else:\n                status_label = \"[CLOSED]\"\n            type_label = \"PR\" if d.is_pull_request else \"D\"\n            lines.append(f\"{status_label} #{d.num} [{type_label}] {d.title}\")\n\n        return {\"formatted\": \"\\n\".join(lines), \"totalResults\": len(discussions), \"resultsShared\": min(20, len(discussions))}\n\n    async def _get_pr(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Get PR details.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        pr_num = args.get(\"pr_num\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not pr_num:\n            return self._error(\"pr_num is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n\n        pr = await _async_call(\n            self.api.get_discussion_details,\n            repo_id=repo_id,\n            discussion_num=int(pr_num),\n            repo_type=repo_type,\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}\"\n        status_map = {\n            \"draft\": \"Draft\",\n            \"open\": \"Open\",\n            \"merged\": \"Merged\",\n            \"closed\": \"Closed\"\n        }\n        status = status_map.get(pr.status, pr.status.capitalize())\n        type_label = \"Pull Request\" if pr.is_pull_request else \"Discussion\"\n\n        lines = [\n            f\"**{type_label} #{pr_num}:** {pr.title}\",\n            f\"**Status:** {status}\",\n            f\"**Author:** {pr.author}\",\n            url,\n        ]\n\n        if pr.is_pull_request:\n            if pr.status == \"draft\":\n                lines.append(f\"\\nTo add commits: upload with revision=\\\"refs/pr/{pr_num}\\\"\")\n            elif pr.status == \"open\":\n                lines.append(f\"\\nTo add commits: upload with revision=\\\"refs/pr/{pr_num}\\\"\")\n\n        return {\"formatted\": \"\\n\".join(lines), \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _merge_pr(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Merge a pull request.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        pr_num = args.get(\"pr_num\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not pr_num:\n            return self._error(\"pr_num is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        comment = args.get(\"comment\", \"\")\n\n        await _async_call(\n            self.api.merge_pull_request,\n            repo_id=repo_id,\n            discussion_num=int(pr_num),\n            comment=comment,\n            repo_type=repo_type,\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}\"\n        return {\"formatted\": f\"**PR #{pr_num} merged**\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _close_pr(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Close a PR/discussion.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        pr_num = args.get(\"pr_num\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not pr_num:\n            return self._error(\"pr_num is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        comment = args.get(\"comment\", \"\")\n\n        await _async_call(\n            self.api.change_discussion_status,\n            repo_id=repo_id,\n            discussion_num=int(pr_num),\n            new_status=\"closed\",\n            comment=comment,\n            repo_type=repo_type,\n        )\n\n        return {\"formatted\": f\"**Discussion #{pr_num} closed**\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _comment_pr(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Add a comment to a PR/discussion.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        pr_num = args.get(\"pr_num\")\n        comment = args.get(\"comment\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not pr_num:\n            return self._error(\"pr_num is required\")\n        if not comment:\n            return self._error(\"comment is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n\n        await _async_call(\n            self.api.comment_discussion,\n            repo_id=repo_id,\n            discussion_num=int(pr_num),\n            comment=comment,\n            repo_type=repo_type,\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}\"\n        return {\"formatted\": f\"**Comment added to #{pr_num}**\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _change_pr_status(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Change PR/discussion status (mainly to convert draft to open).\"\"\"\n        repo_id = args.get(\"repo_id\")\n        pr_num = args.get(\"pr_num\")\n        new_status = args.get(\"new_status\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n        if not pr_num:\n            return self._error(\"pr_num is required\")\n        if not new_status:\n            return self._error(\"new_status is required (open or closed)\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        comment = args.get(\"comment\", \"\")\n\n        await _async_call(\n            self.api.change_discussion_status,\n            repo_id=repo_id,\n            discussion_num=int(pr_num),\n            new_status=new_status,\n            comment=comment,\n            repo_type=repo_type,\n        )\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/discussions/{pr_num}\"\n        return {\"formatted\": f\"**PR #{pr_num} status changed to {new_status}**\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    # =========================================================================\n    # REPO MANAGEMENT\n    # =========================================================================\n\n    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create a new repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        private = args.get(\"private\", True)\n        space_sdk = args.get(\"space_sdk\")\n\n        if repo_type == \"space\" and not space_sdk:\n            return self._error(\"space_sdk required for spaces (gradio/streamlit/docker/static)\")\n\n        kwargs = {\n            \"repo_id\": repo_id,\n            \"repo_type\": repo_type,\n            \"private\": private,\n            \"exist_ok\": args.get(\"exist_ok\", False),\n        }\n        if space_sdk:\n            kwargs[\"space_sdk\"] = space_sdk\n\n        result = await _async_call(self.api.create_repo, **kwargs)\n\n        return {\n            \"formatted\": f\"**Repository created:** {repo_id}\\n**Private:** {private}\\n{result}\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    async def _update_repo(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Update repository settings.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return self._error(\"repo_id is required\")\n\n        repo_type = args.get(\"repo_type\", \"model\")\n        private = args.get(\"private\")\n        gated = args.get(\"gated\")\n\n        if private is None and gated is None:\n            return self._error(\"Specify private (bool) or gated ('auto'/'manual'/false)\")\n\n        kwargs = {\"repo_id\": repo_id, \"repo_type\": repo_type}\n        if private is not None:\n            kwargs[\"private\"] = private\n        if gated is not None:\n            kwargs[\"gated\"] = gated\n\n        await _async_call(self.api.update_repo_settings, **kwargs)\n\n        changes = []\n        if private is not None:\n            changes.append(f\"private={private}\")\n        if gated is not None:\n            changes.append(f\"gated={gated}\")\n\n        url = f\"{_build_repo_url(repo_id, repo_type)}/settings\"\n        return {\"formatted\": f\"**Settings updated:** {', '.join(changes)}\\n{url}\", \"totalResults\": 1, \"resultsShared\": 1}\n\n    def _error(self, message: str) -> ToolResult:\n        \"\"\"Return an error result.\"\"\"\n        return {\"formatted\": message, \"totalResults\": 0, \"resultsShared\": 0, \"isError\": True}\n\n\n# Tool specification\nHF_REPO_GIT_TOOL_SPEC = {\n    \"name\": \"hf_repo_git\",\n    \"description\": (\n        \"Git-like operations on HF repos: branches, tags, PRs, and repo management.\\n\\n\"\n        \"## Operations\\n\"\n        \"**Branches:** create_branch, delete_branch, list_refs\\n\"\n        \"**Tags:** create_tag, delete_tag\\n\"\n        \"**PRs:** create_pr, list_prs, get_pr, merge_pr, close_pr, comment_pr, change_pr_status\\n\"\n        \"**Repo:** create_repo, update_repo\\n\\n\"\n        \"## Use when\\n\"\n        \"- Creating feature branches for experiments\\n\"\n        \"- Tagging model versions (v1.0, v2.0)\\n\"\n        \"- Opening PRs to contribute to repos you don't own\\n\"\n        \"- Reviewing and merging PRs on your repos\\n\"\n        \"- Creating new model/dataset/space repos\\n\"\n        \"- Changing repo visibility (public/private) or gated access\\n\\n\"\n        \"## Examples\\n\"\n        '{\"operation\": \"list_refs\", \"repo_id\": \"my-model\"}\\n'\n        '{\"operation\": \"create_branch\", \"repo_id\": \"my-model\", \"branch\": \"experiment-v2\"}\\n'\n        '{\"operation\": \"create_tag\", \"repo_id\": \"my-model\", \"tag\": \"v1.0\", \"revision\": \"main\"}\\n'\n        '{\"operation\": \"create_pr\", \"repo_id\": \"org/model\", \"title\": \"Fix tokenizer config\"}\\n'\n        '{\"operation\": \"change_pr_status\", \"repo_id\": \"my-model\", \"pr_num\": 1, \"new_status\": \"open\"}\\n'\n        '{\"operation\": \"merge_pr\", \"repo_id\": \"my-model\", \"pr_num\": 3}\\n'\n        '{\"operation\": \"create_repo\", \"repo_id\": \"my-new-model\", \"private\": true}\\n'\n        '{\"operation\": \"update_repo\", \"repo_id\": \"my-model\", \"gated\": \"auto\"}\\n\\n'\n        \"## PR Workflow\\n\"\n        \"1. create_pr → creates draft PR (empty by default)\\n\"\n        \"2. Upload files with revision='refs/pr/N' to add commits\\n\"\n        \"3. change_pr_status with new_status='open' to publish (convert draft to open)\\n\"\n        \"4. merge_pr when ready\\n\\n\"\n        \"## Notes\\n\"\n        \"- PR status: draft (default), open, merged, closed\\n\"\n        \"- delete_branch, delete_tag, merge_pr, create_repo, update_repo require approval\\n\"\n        \"- For spaces, create_repo needs space_sdk (gradio/streamlit/docker/static)\\n\"\n        \"- gated options: 'auto' (instant), 'manual' (review), false (open)\\n\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"operation\": {\n                \"type\": \"string\",\n                \"enum\": [\n                    \"create_branch\", \"delete_branch\",\n                    \"create_tag\", \"delete_tag\", \"list_refs\",\n                    \"create_pr\", \"list_prs\", \"get_pr\", \"merge_pr\", \"close_pr\", \"comment_pr\", \"change_pr_status\",\n                    \"create_repo\", \"update_repo\",\n                ],\n                \"description\": \"Operation to execute\",\n            },\n            \"repo_id\": {\n                \"type\": \"string\",\n                \"description\": \"Repository ID (e.g., 'username/repo-name')\",\n            },\n            \"repo_type\": {\n                \"type\": \"string\",\n                \"enum\": [\"model\", \"dataset\", \"space\"],\n                \"description\": \"Repository type (default: model)\",\n            },\n            \"branch\": {\n                \"type\": \"string\",\n                \"description\": \"Branch name (create_branch, delete_branch)\",\n            },\n            \"from_rev\": {\n                \"type\": \"string\",\n                \"description\": \"Create branch from this revision (default: main)\",\n            },\n            \"tag\": {\n                \"type\": \"string\",\n                \"description\": \"Tag name (create_tag, delete_tag)\",\n            },\n            \"revision\": {\n                \"type\": \"string\",\n                \"description\": \"Revision for tag (default: main)\",\n            },\n            \"tag_message\": {\n                \"type\": \"string\",\n                \"description\": \"Tag description\",\n            },\n            \"title\": {\n                \"type\": \"string\",\n                \"description\": \"PR title (create_pr)\",\n            },\n            \"description\": {\n                \"type\": \"string\",\n                \"description\": \"PR description (create_pr)\",\n            },\n            \"pr_num\": {\n                \"type\": \"integer\",\n                \"description\": \"PR/discussion number\",\n            },\n            \"comment\": {\n                \"type\": \"string\",\n                \"description\": \"Comment text\",\n            },\n            \"status\": {\n                \"type\": \"string\",\n                \"enum\": [\"open\", \"closed\", \"all\"],\n                \"description\": \"Filter PRs by status (list_prs)\",\n            },\n            \"new_status\": {\n                \"type\": \"string\",\n                \"enum\": [\"open\", \"closed\"],\n                \"description\": \"New status for PR/discussion (change_pr_status)\",\n            },\n            \"private\": {\n                \"type\": \"boolean\",\n                \"description\": \"Make repo private (create_repo, update_repo)\",\n            },\n            \"gated\": {\n                \"type\": \"string\",\n                \"enum\": [\"auto\", \"manual\", \"false\"],\n                \"description\": \"Gated access setting (update_repo)\",\n            },\n            \"space_sdk\": {\n                \"type\": \"string\",\n                \"enum\": [\"gradio\", \"streamlit\", \"docker\", \"static\"],\n                \"description\": \"Space SDK (required for create_repo with space)\",\n            },\n        },\n        \"required\": [\"operation\"],\n    },\n}\n\n\nasync def hf_repo_git_handler(arguments: Dict[str, Any], session=None) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router.\"\"\"\n    try:\n        hf_token = session.hf_token if session else None\n        tool = HfRepoGitTool(hf_token=hf_token)\n        result = await tool.execute(arguments)\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/jobs_tool.py",
    "content": "\"\"\"\nHugging Face Jobs Tool - Using huggingface-hub library\n\nRefactored to use official huggingface-hub library instead of custom HTTP client\n\"\"\"\n\nimport asyncio\nimport base64\nimport http.client\nimport os\nimport re\nfrom typing import Any, Dict, Literal, Optional, Callable, Awaitable\n\nimport logging\n\nimport httpx\nfrom huggingface_hub import HfApi\nfrom huggingface_hub.utils import HfHubHTTPError\n\nfrom agent.core.session import Event\nfrom agent.tools.types import ToolResult\n\nlogger = logging.getLogger(__name__)\nfrom agent.tools.utilities import (\n    format_job_details,\n    format_jobs_table,\n    format_scheduled_job_details,\n    format_scheduled_jobs_table,\n)\n\n# Hardware flavors\nCPU_FLAVORS = [\"cpu-basic\", \"cpu-upgrade\"]\nGPU_FLAVORS = [\n    \"t4-small\",\n    \"t4-medium\",\n    \"a10g-small\",\n    \"a10g-large\",\n    \"a10g-largex2\",\n    \"a10g-largex4\",\n    \"a100-large\",\n    \"a100x4\",\n    \"a100x8\",\n    \"l4x1\",\n    \"l4x4\",\n    \"l40sx1\",\n    \"l40sx4\",\n    \"l40sx8\",\n]\n\n# Detailed specs for display (vCPU/RAM/GPU VRAM)\nCPU_FLAVORS_DESC = \"cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB)\"\nGPU_FLAVORS_DESC = (\n    \"t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), \"\n    \"a10g-small(4vCPU/15GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), \"\n    \"a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), \"\n    \"a100-large(12vCPU/142GB/GPU 80GB), a100x4(48vCPU/568GB/GPU 320GB), a100x8(96vCPU/1136GB/GPU 640GB), \"\n    \"l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), \"\n    \"l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB)\"\n)\nSPECIALIZED_FLAVORS = [\"inf2x6\"]\nALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS\n\n# Operation names\nOperationType = Literal[\n    \"run\",\n    \"ps\",\n    \"logs\",\n    \"inspect\",\n    \"cancel\",\n    \"scheduled run\",\n    \"scheduled ps\",\n    \"scheduled inspect\",\n    \"scheduled delete\",\n    \"scheduled suspend\",\n    \"scheduled resume\",\n]\n\n# Constants\nUV_DEFAULT_IMAGE = \"ghcr.io/astral-sh/uv:python3.12-bookworm\"\n\n\ndef _filter_uv_install_output(logs: list[str]) -> list[str]:\n    \"\"\"\n    Filter out UV package installation output from logs.\n\n    Replaces installation details with \"[installs truncated]\" and keeps\n    the \"Installed X packages in Y ms/s\" summary line.\n\n    Args:\n        logs: List of log lines\n\n    Returns:\n        Filtered list of log lines\n    \"\"\"\n    if not logs:\n        return logs\n\n    # Regex pattern to match: \"Installed X packages in Y ms\" or \"Installed X package in Y s\"\n    install_pattern = re.compile(\n        r\"^Installed\\s+\\d+\\s+packages?\\s+in\\s+\\d+(?:\\.\\d+)?\\s*(?:ms|s)$\"\n    )\n\n    # Find the index of the \"Installed X packages\" line\n    install_line_idx = None\n    for idx, line in enumerate(logs):\n        if install_pattern.match(line.strip()):\n            install_line_idx = idx\n            break\n\n    # If pattern found, replace installation details with truncation message\n    if install_line_idx is not None and install_line_idx > 0:\n        # Keep logs from the \"Installed X packages\" line onward\n        # Add truncation message before the \"Installed\" line\n        return [\"[installs truncated]\"] + logs[install_line_idx:]\n\n    # If pattern not found, return original logs\n    return logs\n\n\n_ANSI_RE = re.compile(r'\\x1b\\[[0-9;]*[a-zA-Z]|\\x1b\\].*?\\x07')\n\n\ndef _strip_ansi(text: str) -> str:\n    return _ANSI_RE.sub('', text)\n\n\n_DEFAULT_ENV = {\n    \"HF_HUB_DISABLE_PROGRESS_BARS\": \"1\",\n    \"TQDM_DISABLE\": \"1\",\n    \"TRANSFORMERS_VERBOSITY\": \"warning\",\n    \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n    \"UV_NO_PROGRESS\": \"1\",\n}\n\n\ndef _add_default_env(params: Dict[str, Any] | None) -> Dict[str, Any]:\n    \"\"\"Inject default env vars for clean, agent-friendly output.\"\"\"\n    result = dict(_DEFAULT_ENV)\n    result.update(params or {})  # user-provided values override defaults\n    return result\n\n\ndef _add_environment_variables(\n    params: Dict[str, Any] | None, user_token: str | None = None\n) -> Dict[str, Any]:\n    token = user_token or \"\"\n\n    # Start with user-provided env vars, then force-set token last\n    result = dict(params or {})\n\n    # If the caller passed HF_TOKEN=\"$HF_TOKEN\", ignore it.\n    if result.get(\"HF_TOKEN\", \"\").strip().startswith(\"$\"):\n        result.pop(\"HF_TOKEN\", None)\n\n    # Set both names to be safe (different libs check different vars)\n    if token:\n        result[\"HF_TOKEN\"] = token\n        result[\"HUGGINGFACE_HUB_TOKEN\"] = token\n\n    return result\n\n\ndef _build_uv_command(\n    script: str,\n    with_deps: list[str] | None = None,\n    python: str | None = None,\n    script_args: list[str] | None = None,\n) -> list[str]:\n    \"\"\"Build UV run command\"\"\"\n    parts = [\"uv\", \"run\"]\n\n    if with_deps:\n        for dep in with_deps:\n            parts.extend([\"--with\", dep])\n\n    if python:\n        parts.extend([\"-p\", python])\n\n    parts.append(script)\n\n    if script_args:\n        parts.extend(script_args)\n\n    # add defaults\n    # parts.extend([\"--push_to_hub\"])\n    return parts\n\n\ndef _wrap_inline_script(\n    script: str,\n    with_deps: list[str] | None = None,\n    python: str | None = None,\n    script_args: list[str] | None = None,\n) -> str:\n    \"\"\"Wrap inline script with base64 encoding to avoid file creation\"\"\"\n    encoded = base64.b64encode(script.encode(\"utf-8\")).decode(\"utf-8\")\n    # Build the uv command with stdin (-)\n    uv_command = _build_uv_command(\"-\", with_deps, python, script_args)\n    # Join command parts with proper spacing\n    uv_command_str = \" \".join(uv_command)\n    return f'echo \"{encoded}\" | base64 -d | {uv_command_str}'\n\n\ndef _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:\n    \"\"\"Ensure hf-transfer is included in the dependencies list\"\"\"\n\n    if isinstance(deps, list):\n        deps_copy = deps.copy()  # Don't modify the original\n        if \"hf-transfer\" not in deps_copy:\n            deps_copy.append(\"hf-transfer\")\n        return deps_copy\n\n    return [\"hf-transfer\"]\n\n\ndef _resolve_uv_command(\n    script: str,\n    with_deps: list[str] | None = None,\n    python: str | None = None,\n    script_args: list[str] | None = None,\n) -> list[str]:\n    \"\"\"Resolve UV command based on script source (URL, inline, or file path)\"\"\"\n    # If URL, use directly\n    if script.startswith(\"http://\") or script.startswith(\"https://\"):\n        return _build_uv_command(script, with_deps, python, script_args)\n\n    # If contains newline, treat as inline script\n    if \"\\n\" in script:\n        wrapped = _wrap_inline_script(script, with_deps, python, script_args)\n        return [\"/bin/sh\", \"-lc\", wrapped]\n\n    # Otherwise, treat as file path\n    return _build_uv_command(script, with_deps, python, script_args)\n\n\nasync def _async_call(func, *args, **kwargs):\n    \"\"\"Wrap synchronous HfApi calls for async context\"\"\"\n    return await asyncio.to_thread(func, *args, **kwargs)\n\n\ndef _job_info_to_dict(job_info) -> Dict[str, Any]:\n    \"\"\"Convert JobInfo object to dictionary for formatting functions\"\"\"\n    return {\n        \"id\": job_info.id,\n        \"status\": {\"stage\": job_info.status.stage, \"message\": job_info.status.message},\n        \"command\": job_info.command,\n        \"createdAt\": job_info.created_at.isoformat(),\n        \"dockerImage\": job_info.docker_image,\n        \"spaceId\": job_info.space_id,\n        \"hardware_flavor\": job_info.flavor,\n        \"owner\": {\"name\": job_info.owner.name},\n    }\n\n\ndef _scheduled_job_info_to_dict(scheduled_job_info) -> Dict[str, Any]:\n    \"\"\"Convert ScheduledJobInfo object to dictionary for formatting functions\"\"\"\n    job_spec = scheduled_job_info.job_spec\n\n    # Extract last run and next run from status\n    last_run = None\n    next_run = None\n    if scheduled_job_info.status:\n        if scheduled_job_info.status.last_job:\n            last_run = scheduled_job_info.status.last_job.created_at\n            if last_run:\n                last_run = (\n                    last_run.isoformat()\n                    if hasattr(last_run, \"isoformat\")\n                    else str(last_run)\n                )\n        if scheduled_job_info.status.next_job_run_at:\n            next_run = scheduled_job_info.status.next_job_run_at\n            next_run = (\n                next_run.isoformat()\n                if hasattr(next_run, \"isoformat\")\n                else str(next_run)\n            )\n\n    return {\n        \"id\": scheduled_job_info.id,\n        \"schedule\": scheduled_job_info.schedule,\n        \"suspend\": scheduled_job_info.suspend,\n        \"lastRun\": last_run,\n        \"nextRun\": next_run,\n        \"jobSpec\": {\n            \"dockerImage\": job_spec.docker_image,\n            \"spaceId\": job_spec.space_id,\n            \"command\": job_spec.command or [],\n            \"hardware_flavor\": job_spec.flavor or \"cpu-basic\",\n        },\n    }\n\n\nclass HfJobsTool:\n    \"\"\"Tool for managing Hugging Face compute jobs using huggingface-hub library\"\"\"\n\n    def __init__(\n        self,\n        hf_token: Optional[str] = None,\n        namespace: Optional[str] = None,\n        log_callback: Optional[Callable[[str], Awaitable[None]]] = None,\n        session: Any = None,\n        tool_call_id: Optional[str] = None,\n    ):\n        self.hf_token = hf_token\n        self.api = HfApi(token=hf_token)\n        self.namespace = namespace\n        self.log_callback = log_callback\n        self.session = session\n        self.tool_call_id = tool_call_id\n\n    async def execute(self, params: Dict[str, Any]) -> ToolResult:\n        \"\"\"Execute the specified operation\"\"\"\n        operation = params.get(\"operation\")\n\n        args = params\n\n        # If no operation provided, return error\n        if not operation:\n            return {\n                \"formatted\": \"Error: 'operation' parameter is required. See tool description for available operations and usage examples.\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        # Normalize operation name\n        operation = operation.lower()\n\n        try:\n            # Route to appropriate handler\n            if operation == \"run\":\n                return await self._run_job(args)\n            elif operation == \"ps\":\n                return await self._list_jobs(args)\n            elif operation == \"logs\":\n                return await self._get_logs(args)\n            elif operation == \"inspect\":\n                return await self._inspect_job(args)\n            elif operation == \"cancel\":\n                return await self._cancel_job(args)\n            elif operation == \"scheduled run\":\n                return await self._scheduled_run(args)\n            elif operation == \"scheduled ps\":\n                return await self._list_scheduled_jobs(args)\n            elif operation == \"scheduled inspect\":\n                return await self._inspect_scheduled_job(args)\n            elif operation == \"scheduled delete\":\n                return await self._delete_scheduled_job(args)\n            elif operation == \"scheduled suspend\":\n                return await self._suspend_scheduled_job(args)\n            elif operation == \"scheduled resume\":\n                return await self._resume_scheduled_job(args)\n            else:\n                return {\n                    \"formatted\": f'Unknown operation: \"{operation}\"\\n\\n'\n                    \"Available operations:\\n\"\n                    \"- run, ps, logs, inspect, cancel\\n\"\n                    \"- scheduled run, scheduled ps, scheduled inspect, \"\n                    \"scheduled delete, scheduled suspend, scheduled resume\\n\\n\"\n                    \"Call this tool with no operation for full usage instructions.\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n        except HfHubHTTPError as e:\n            return {\n                \"formatted\": f\"API Error: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n        except Exception as e:\n            return {\n                \"formatted\": f\"Error executing {operation}: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    async def _wait_for_job_completion(\n        self, job_id: str, namespace: Optional[str] = None\n    ) -> tuple[str, list[str]]:\n        \"\"\"\n        Stream job logs until completion, printing them in real-time.\n        Implements retry logic to handle connection drops during long-running jobs.\n\n        Returns:\n            tuple: (final_status, all_logs)\n        \"\"\"\n        all_logs = []\n        terminal_states = {\"COMPLETED\", \"FAILED\", \"CANCELED\", \"ERROR\"}\n        max_retries = 100  # Allow many retries for 8h+ jobs\n        retry_delay = 5  # Seconds between retries\n\n        for _ in range(max_retries):\n            try:\n                # Use a queue to bridge sync generator to async consumer\n                queue = asyncio.Queue()\n                loop = asyncio.get_running_loop()\n\n                def log_producer():\n                    try:\n                        # fetch_job_logs is a blocking sync generator\n                        logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=namespace)\n                        for line in logs_gen:\n                            # Push line to queue thread-safely\n                            loop.call_soon_threadsafe(queue.put_nowait, line)\n                        # Signal EOF\n                        loop.call_soon_threadsafe(queue.put_nowait, None)\n                    except Exception as e:\n                        # Signal error\n                        loop.call_soon_threadsafe(queue.put_nowait, e)\n\n                # Start producer in a background thread so it doesn't block the event loop\n                producer_future = loop.run_in_executor(None, log_producer)\n\n                # Consume logs from the queue as they arrive\n                while True:\n                    item = await queue.get()\n\n                    # EOF sentinel\n                    if item is None:\n                        break\n\n                    # Error occurred in producer\n                    if isinstance(item, Exception):\n                        raise item\n\n                    # Process log line\n                    log_line = item\n                    logger.debug(log_line)\n                    if self.log_callback:\n                        await self.log_callback(log_line)\n                    all_logs.append(log_line)\n\n                # If we get here, streaming completed normally (EOF received)\n                # Wait for thread to cleanup (should be done)\n                await producer_future\n                break\n\n            except (\n                ConnectionError,\n                TimeoutError,\n                OSError,\n                http.client.IncompleteRead,\n                httpx.RemoteProtocolError,\n                httpx.ReadError,\n                HfHubHTTPError,\n            ) as e:\n                # Connection dropped - check if job is still running\n                try:\n                    job_info = await _async_call(\n                        self.api.inspect_job, job_id=job_id, namespace=namespace\n                    )\n                    current_status = job_info.status.stage\n\n                    if current_status in terminal_states:\n                        # Job finished, no need to retry\n                        logger.info(f\"Job reached terminal state: {current_status}\")\n                        break\n\n                    # Job still running, retry connection\n                    logger.warning(\n                        f\"Connection interrupted ({str(e)[:50]}...), reconnecting in {retry_delay}s...\"\n                    )\n                    await asyncio.sleep(retry_delay)\n                    continue\n\n                except (ConnectionError, TimeoutError, OSError):\n                    # Can't even check job status, wait and retry\n                    logger.warning(f\"Connection error, retrying in {retry_delay}s...\")\n                    await asyncio.sleep(retry_delay)\n                    continue\n\n        # Fetch final job status — retry briefly if still RUNNING\n        # (the API may lag a few seconds behind the log stream ending)\n        final_status = \"UNKNOWN\"\n        for _ in range(6):\n            job_info = await _async_call(\n                self.api.inspect_job, job_id=job_id, namespace=namespace\n            )\n            final_status = job_info.status.stage\n            if final_status in terminal_states:\n                break\n            await asyncio.sleep(2.5)\n\n        return final_status, all_logs\n\n    async def _run_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Run a job using HfApi.run_job() - smart detection of Python vs Docker mode\"\"\"\n        try:\n            script = args.get(\"script\")\n            command = args.get(\"command\")\n\n            # Validate mutually exclusive parameters\n            if script and command:\n                raise ValueError(\n                    \"'script' and 'command' are mutually exclusive. Provide one or the other, not both.\"\n                )\n\n            if not script and not command:\n                raise ValueError(\n                    \"Either 'script' (for Python) or 'command' (for Docker) must be provided.\"\n                )\n\n            # Python mode: script provided\n            if script:\n                # Get dependencies and ensure hf-transfer is included\n                deps = _ensure_hf_transfer_dependency(args.get(\"dependencies\"))\n\n                # Resolve the command based on script type (URL, inline, or file)\n                command = _resolve_uv_command(\n                    script=script,\n                    with_deps=deps,\n                    python=args.get(\"python\"),\n                    script_args=args.get(\"script_args\"),\n                )\n\n                # Use UV image unless overridden\n                image = args.get(\"image\", UV_DEFAULT_IMAGE)\n                job_type = \"Python\"\n\n            # Docker mode: command provided\n            else:\n                image = args.get(\"image\", \"python:3.12\")\n                job_type = \"Docker\"\n\n            # Run the job\n            job = await _async_call(\n                self.api.run_job,\n                image=image,\n                command=command,\n                env=_add_default_env(args.get(\"env\")),\n                secrets=_add_environment_variables(args.get(\"secrets\"), self.hf_token),\n                flavor=args.get(\"hardware_flavor\", \"cpu-basic\"),\n                timeout=args.get(\"timeout\", \"30m\"),\n                namespace=self.namespace,\n            )\n\n            # Track job ID for cancellation on interrupt\n            if self.session:\n                self.session._running_job_ids.add(job.id)\n\n            # Send job URL immediately after job creation (before waiting for completion)\n            if self.session and self.tool_call_id:\n                await self.session.send_event(\n                    Event(\n                        event_type=\"tool_state_change\",\n                        data={\n                            \"tool_call_id\": self.tool_call_id,\n                            \"tool\": \"hf_jobs\",\n                            \"state\": \"running\",\n                            \"jobUrl\": job.url,\n                        },\n                    )\n                )\n\n            # Wait for completion and stream logs\n            logger.info(f\"{job_type} job started: {job.url}\")\n            logger.info(\"Streaming logs...\")\n\n            final_status, all_logs = await self._wait_for_job_completion(\n                job_id=job.id,\n                namespace=self.namespace,\n            )\n\n            # Untrack job ID (completed or failed, no longer needs cancellation)\n            if self.session:\n                self.session._running_job_ids.discard(job.id)\n\n            # Notify frontend of final status\n            if self.session and self.tool_call_id:\n                await self.session.send_event(\n                    Event(\n                        event_type=\"tool_state_change\",\n                        data={\n                            \"tool_call_id\": self.tool_call_id,\n                            \"tool\": \"hf_jobs\",\n                            \"state\": final_status.lower(),\n                            \"jobUrl\": job.url,\n                        },\n                    )\n                )\n\n            # Filter out UV package installation output\n            filtered_logs = _filter_uv_install_output(all_logs)\n\n            # Format all logs for the agent\n            log_text = _strip_ansi(\"\\n\".join(filtered_logs)) if filtered_logs else \"(no logs)\"\n\n            response = f\"\"\"{job_type} job completed!\n\n**Job ID:** {job.id}\n**Final Status:** {final_status}\n**View at:** {job.url}\n\n**Logs:**\n```\n{log_text}\n```\"\"\"\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except Exception as e:\n            raise Exception(f\"Failed to run job: {str(e)}\")\n\n    async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List jobs using HfApi.list_jobs()\"\"\"\n        jobs_list = await _async_call(self.api.list_jobs, namespace=self.namespace)\n\n        # Filter jobs\n        if not args.get(\"all\", False):\n            jobs_list = [j for j in jobs_list if j.status.stage == \"RUNNING\"]\n\n        if args.get(\"status\"):\n            status_filter = args[\"status\"].upper()\n            jobs_list = [j for j in jobs_list if status_filter in j.status.stage]\n\n        # Convert JobInfo objects to dicts for formatting\n        jobs_dicts = [_job_info_to_dict(j) for j in jobs_list]\n\n        table = format_jobs_table(jobs_dicts)\n\n        if len(jobs_list) == 0:\n            if args.get(\"all\", False):\n                return {\n                    \"formatted\": \"No jobs found.\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                }\n            return {\n                \"formatted\": 'No running jobs found. Use `{\"operation\": \"ps\", \"all\": true}` to show all jobs.',\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n        response = f\"**Jobs ({len(jobs_list)} total):**\\n\\n{table}\"\n        return {\n            \"formatted\": response,\n            \"totalResults\": len(jobs_list),\n            \"resultsShared\": len(jobs_list),\n        }\n\n    async def _get_logs(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Fetch logs using HfApi.fetch_job_logs()\"\"\"\n        job_id = args.get(\"job_id\")\n        if not job_id:\n            return {\n                \"formatted\": \"job_id is required\",\n                \"isError\": True,\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n        try:\n            # Fetch logs (returns generator, convert to list)\n            logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=self.namespace)\n            logs = await _async_call(list, logs_gen)\n\n            if not logs:\n                return {\n                    \"formatted\": f\"No logs available for job {job_id}\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                }\n\n            log_text = _strip_ansi(\"\\n\".join(logs))\n            return {\n                \"formatted\": f\"**Logs for {job_id}:**\\n\\n```\\n{log_text}\\n```\",\n                \"totalResults\": 1,\n                \"resultsShared\": 1,\n            }\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to fetch logs: {str(e)}\",\n                \"isError\": True,\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n    async def _inspect_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Inspect job using HfApi.inspect_job()\"\"\"\n        job_id = args.get(\"job_id\")\n        if not job_id:\n            return {\n                \"formatted\": \"job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        job_ids = job_id if isinstance(job_id, list) else [job_id]\n\n        jobs = []\n        for jid in job_ids:\n            try:\n                job = await _async_call(\n                    self.api.inspect_job,\n                    job_id=jid,\n                    namespace=self.namespace,\n                )\n                jobs.append(_job_info_to_dict(job))\n            except Exception as e:\n                raise Exception(f\"Failed to inspect job {jid}: {str(e)}\")\n\n        formatted_details = format_job_details(jobs)\n        response = f\"**Job Details** ({len(jobs)} job{'s' if len(jobs) > 1 else ''}):\\n\\n{formatted_details}\"\n\n        return {\n            \"formatted\": response,\n            \"totalResults\": len(jobs),\n            \"resultsShared\": len(jobs),\n        }\n\n    async def _cancel_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Cancel job using HfApi.cancel_job()\"\"\"\n        job_id = args.get(\"job_id\")\n        if not job_id:\n            return {\n                \"formatted\": \"job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        await _async_call(\n            self.api.cancel_job,\n            job_id=job_id,\n            namespace=self.namespace,\n        )\n\n        response = f\"\"\"✓ Job {job_id} has been cancelled.\n\nTo verify, call this tool with `{{\"operation\": \"inspect\", \"job_id\": \"{job_id}\"}}`\"\"\"\n\n        return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create scheduled job using HfApi.create_scheduled_job() - smart detection of Python vs Docker mode\"\"\"\n        try:\n            script = args.get(\"script\")\n            command = args.get(\"command\")\n            schedule = args.get(\"schedule\")\n\n            if not schedule:\n                raise ValueError(\"schedule is required for scheduled jobs\")\n\n            # Validate mutually exclusive parameters\n            if script and command:\n                raise ValueError(\n                    \"'script' and 'command' are mutually exclusive. Provide one or the other, not both.\"\n                )\n\n            if not script and not command:\n                raise ValueError(\n                    \"Either 'script' (for Python) or 'command' (for Docker) must be provided.\"\n                )\n\n            # Python mode: script provided\n            if script:\n                # Get dependencies and ensure hf-transfer is included\n                deps = _ensure_hf_transfer_dependency(args.get(\"dependencies\"))\n\n                # Resolve the command based on script type\n                command = _resolve_uv_command(\n                    script=script,\n                    with_deps=deps,\n                    python=args.get(\"python\"),\n                    script_args=args.get(\"script_args\"),\n                )\n\n                # Use UV image unless overridden\n                image = args.get(\"image\", UV_DEFAULT_IMAGE)\n                job_type = \"Python\"\n\n            # Docker mode: command provided\n            else:\n                image = args.get(\"image\", \"python:3.12\")\n                job_type = \"Docker\"\n\n            # Create scheduled job\n            scheduled_job = await _async_call(\n                self.api.create_scheduled_job,\n                image=image,\n                command=command,\n                schedule=schedule,\n                env=_add_default_env(args.get(\"env\")),\n                secrets=_add_environment_variables(args.get(\"secrets\"), self.hf_token),\n                flavor=args.get(\"hardware_flavor\", \"cpu-basic\"),\n                timeout=args.get(\"timeout\", \"30m\"),\n                namespace=self.namespace,\n            )\n\n            scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)\n\n            response = f\"\"\"✓ Scheduled {job_type} job created successfully!\n\n**Scheduled Job ID:** {scheduled_dict[\"id\"]}\n**Schedule:** {scheduled_dict[\"schedule\"]}\n**Suspended:** {\"Yes\" if scheduled_dict.get(\"suspend\") else \"No\"}\n**Next Run:** {scheduled_dict.get(\"nextRun\", \"N/A\")}\n\nTo inspect, call this tool with `{{\"operation\": \"scheduled inspect\", \"scheduled_job_id\": \"{scheduled_dict[\"id\"]}\"}}`\nTo list all, call this tool with `{{\"operation\": \"scheduled ps\"}}`\"\"\"\n\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except Exception as e:\n            raise Exception(f\"Failed to create scheduled job: {str(e)}\")\n\n    async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List scheduled jobs using HfApi.list_scheduled_jobs()\"\"\"\n        scheduled_jobs_list = await _async_call(\n            self.api.list_scheduled_jobs,\n            namespace=self.namespace,\n        )\n\n        # Filter jobs - default: hide suspended jobs unless --all is specified\n        if not args.get(\"all\", False):\n            scheduled_jobs_list = [j for j in scheduled_jobs_list if not j.suspend]\n\n        # Convert to dicts for formatting\n        scheduled_dicts = [_scheduled_job_info_to_dict(j) for j in scheduled_jobs_list]\n\n        table = format_scheduled_jobs_table(scheduled_dicts)\n\n        if len(scheduled_jobs_list) == 0:\n            if args.get(\"all\", False):\n                return {\n                    \"formatted\": \"No scheduled jobs found.\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                }\n            return {\n                \"formatted\": 'No active scheduled jobs found. Use `{\"operation\": \"scheduled ps\", \"all\": true}` to show suspended jobs.',\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n            }\n\n        response = f\"**Scheduled Jobs ({len(scheduled_jobs_list)} total):**\\n\\n{table}\"\n        return {\n            \"formatted\": response,\n            \"totalResults\": len(scheduled_jobs_list),\n            \"resultsShared\": len(scheduled_jobs_list),\n        }\n\n    async def _inspect_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Inspect scheduled job using HfApi.inspect_scheduled_job()\"\"\"\n        scheduled_job_id = args.get(\"scheduled_job_id\")\n        if not scheduled_job_id:\n            return {\n                \"formatted\": \"scheduled_job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        scheduled_job = await _async_call(\n            self.api.inspect_scheduled_job,\n            scheduled_job_id=scheduled_job_id,\n            namespace=self.namespace,\n        )\n\n        scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)\n        formatted_details = format_scheduled_job_details(scheduled_dict)\n\n        return {\n            \"formatted\": f\"**Scheduled Job Details:**\\n\\n{formatted_details}\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    async def _delete_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Delete scheduled job using HfApi.delete_scheduled_job()\"\"\"\n        scheduled_job_id = args.get(\"scheduled_job_id\")\n        if not scheduled_job_id:\n            return {\n                \"formatted\": \"scheduled_job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        await _async_call(\n            self.api.delete_scheduled_job,\n            scheduled_job_id=scheduled_job_id,\n            namespace=self.namespace,\n        )\n\n        return {\n            \"formatted\": f\"✓ Scheduled job {scheduled_job_id} has been deleted.\",\n            \"totalResults\": 1,\n            \"resultsShared\": 1,\n        }\n\n    async def _suspend_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Suspend scheduled job using HfApi.suspend_scheduled_job()\"\"\"\n        scheduled_job_id = args.get(\"scheduled_job_id\")\n        if not scheduled_job_id:\n            return {\n                \"formatted\": \"scheduled_job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        await _async_call(\n            self.api.suspend_scheduled_job,\n            scheduled_job_id=scheduled_job_id,\n            namespace=self.namespace,\n        )\n\n        response = f\"\"\"✓ Scheduled job {scheduled_job_id} has been suspended.\n\nTo resume, call this tool with `{{\"operation\": \"scheduled resume\", \"scheduled_job_id\": \"{scheduled_job_id}\"}}`\"\"\"\n\n        return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _resume_scheduled_job(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Resume scheduled job using HfApi.resume_scheduled_job()\"\"\"\n        scheduled_job_id = args.get(\"scheduled_job_id\")\n        if not scheduled_job_id:\n            return {\n                \"formatted\": \"scheduled_job_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        await _async_call(\n            self.api.resume_scheduled_job,\n            scheduled_job_id=scheduled_job_id,\n            namespace=self.namespace,\n        )\n\n        response = f\"\"\"✓ Scheduled job {scheduled_job_id} has been resumed.\n\nTo inspect, call this tool with `{{\"operation\": \"scheduled inspect\", \"scheduled_job_id\": \"{scheduled_job_id}\"}}`\"\"\"\n\n        return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n\n# Tool specification for agent registration\nHF_JOBS_TOOL_SPEC = {\n    \"name\": \"hf_jobs\",\n    \"description\": (\n        \"Execute Python scripts or Docker containers on HF cloud infrastructure.\\n\\n\"\n        \"Two modes (mutually exclusive): Python mode (script + dependencies) or Docker mode (command + image). \"\n        \"Provide exactly ONE of 'script' or 'command'.\\n\\n\"\n        \"BEFORE submitting training/fine-tuning jobs:\\n\"\n        \"- You MUST have called github_find_examples + github_read_file to find a working reference implementation. \"\n        \"Scripts based on your internal knowledge WILL use outdated APIs and fail.\\n\"\n        \"- You MUST have validated dataset format via hf_inspect_dataset or hub_repo_details.\\n\"\n        \"- Training config MUST include push_to_hub=True and hub_model_id. \"\n        \"Job storage is EPHEMERAL — all files are deleted when the job ends. Without push_to_hub, trained models are lost permanently.\\n\"\n        \"- Include trackio monitoring and provide the dashboard URL to the user.\\n\\n\"\n        \"BATCH/ABLATION JOBS: Submit ONE job first. Check logs to confirm it starts training successfully. \"\n        \"Only then submit the remaining jobs. Never submit all at once — if there's a bug, all jobs fail.\\n\\n\"\n        \"Operations: run, ps, logs, inspect, cancel, scheduled run/ps/inspect/delete/suspend/resume.\\n\\n\"\n        f\"Hardware: CPU: {CPU_FLAVORS_DESC}. GPU: {GPU_FLAVORS_DESC}.\\n\"\n        \"Common picks: t4-small ($0.60/hr, 1-3B), a10g-large ($2/hr, 7-13B), a100-large ($4/hr, 30B+), h100 ($6/hr, 70B+). \"\n        \"Note: a10g-small and a10g-large have the SAME 24GB GPU — the difference is CPU/RAM only.\\n\\n\"\n        \"OOM RECOVERY: When a training job fails with CUDA OOM:\\n\"\n        \"1. Reduce per_device_train_batch_size and increase gradient_accumulation_steps proportionally (keep effective batch size identical)\\n\"\n        \"2. Enable gradient_checkpointing=True\\n\"\n        \"3. Upgrade to larger GPU (a10g→a100→h100)\\n\"\n        \"Do NOT switch training methods (e.g. full SFT to LoRA) or reduce max_length — those change what the user gets and require explicit approval.\\n\\n\"\n        \"Examples:\\n\"\n        \"Training: {'operation': 'run', 'script': '/app/train.py', 'dependencies': ['transformers', 'trl', 'torch', 'datasets', 'trackio'], 'hardware_flavor': 'a100-large', 'timeout': '8h'}\\n\"\n        \"Monitor: {'operation': 'ps'}, {'operation': 'logs', 'job_id': 'xxx'}, {'operation': 'cancel', 'job_id': 'xxx'}\"\n        \"Docker: {'operation': 'run', 'command': ['duckdb', '-c', 'select 1 + 2'], 'image': 'duckdb/duckdb', 'hardware_flavor': 'cpu-basic', 'timeout': '1h'}\\n\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"operation\": {\n                \"type\": \"string\",\n                \"enum\": [\n                    \"run\",\n                    \"ps\",\n                    \"logs\",\n                    \"inspect\",\n                    \"cancel\",\n                    \"scheduled run\",\n                    \"scheduled ps\",\n                    \"scheduled inspect\",\n                    \"scheduled delete\",\n                    \"scheduled suspend\",\n                    \"scheduled resume\",\n                ],\n                \"description\": \"Operation to execute.\",\n            },\n            \"script\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Python code or sandbox file path (e.g. '/app/train.py') or URL. \"\n                    \"Triggers Python mode. For ML training: base this on a working example found via github_find_examples, not on internal knowledge. \"\n                    \"Mutually exclusive with 'command'.\"\n                ),\n            },\n            \"dependencies\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"},\n                \"description\": (\n                    \"Pip packages to install. Include ALL required packages. \"\n                    \"Common training set: ['transformers', 'trl', 'torch', 'datasets', 'trackio', 'accelerate']. \"\n                    \"Only used with 'script'.\"\n                ),\n            },\n            \"image\": {\n                \"type\": \"string\",\n                \"description\": \"Docker image. Optional — auto-selected if not provided. Use with 'command'.\",\n            },\n            \"command\": {\n                \"type\": \"array\",\n                \"items\": {\"type\": \"string\"},\n                \"description\": \"Command to execute as list. Triggers Docker mode. Mutually exclusive with 'script'.\",\n            },\n            \"hardware_flavor\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Hardware type. Sizing guide: 1-3B params → t4-small/a10g-small, \"\n                    \"7-13B → a10g-large, 30B+ → a100-large, 70B+ → h100/h100x8. \"\n                    f\"All options: CPU: {CPU_FLAVORS}. GPU: {GPU_FLAVORS}.\"\n                ),\n            },\n            \"timeout\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Maximum job runtime. MUST be >2h for any training job — default 30m kills training mid-run. \"\n                    \"Guidelines: 1-3B models: 3-4h, 7-13B: 6-8h, 30B+: 12-24h. \"\n                    \"Use 30m-1h only for quick data processing or inference tasks. Default: '30m'.\"\n                ),\n            },\n            \"env\": {\n                \"type\": \"object\",\n                \"description\": \"Environment variables {'KEY': 'VALUE'}. HF_TOKEN is auto-included.\",\n            },\n            \"job_id\": {\n                \"type\": \"string\",\n                \"description\": \"Job ID. Required for: logs, inspect, cancel.\",\n            },\n            \"scheduled_job_id\": {\n                \"type\": \"string\",\n                \"description\": \"Scheduled job ID. Required for: scheduled inspect/delete/suspend/resume.\",\n            },\n            \"schedule\": {\n                \"type\": \"string\",\n                \"description\": \"Cron schedule or preset (@hourly, @daily, @weekly, @monthly). Required for: scheduled run.\",\n            },\n        },\n        \"required\": [\"operation\"],\n    },\n}\n\n\nasync def hf_jobs_handler(\n    arguments: Dict[str, Any], session: Any = None, tool_call_id: str | None = None\n) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router\"\"\"\n    try:\n\n        async def log_callback(log: str):\n            if session:\n                await session.send_event(\n                    Event(event_type=\"tool_log\", data={\"tool\": \"hf_jobs\", \"log\": log})\n                )\n\n        # If script is a sandbox file path, read it from the sandbox\n        script = arguments.get(\"script\", \"\")\n        sandbox = getattr(session, \"sandbox\", None) if session else None\n        if sandbox and script:\n            from agent.tools.sandbox_tool import resolve_sandbox_script\n            content, error = await resolve_sandbox_script(sandbox, script)\n            if error:\n                return error, False\n            if content:\n                arguments = {**arguments, \"script\": content}\n\n        hf_token = session.hf_token if session else None\n        namespace = os.environ.get(\"HF_NAMESPACE\") or (HfApi(token=hf_token).whoami().get(\"name\") if hf_token else None)\n\n        tool = HfJobsTool(\n            namespace=namespace,\n            hf_token=hf_token,\n            log_callback=log_callback if session else None,\n            session=session,\n            tool_call_id=tool_call_id,\n        )\n        result = await tool.execute(arguments)\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error executing HF Jobs tool: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/local_tools.py",
    "content": "\"\"\"\nLocal tool implementations — bash/read/write/edit running on the user's machine.\n\nDrop-in replacement for sandbox tools when running in CLI (local) mode.\nSame tool specs (names, parameters) but handlers execute locally via\nsubprocess/pathlib instead of going through a remote sandbox.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport re\nimport subprocess\nimport tempfile\nfrom pathlib import Path\nfrom typing import Any\n\n\nMAX_OUTPUT_CHARS = 25_000\nMAX_LINE_LENGTH = 4000\nDEFAULT_READ_LINES = 2000\nDEFAULT_TIMEOUT = 120\nMAX_TIMEOUT = 36000  # 10 hours — needed for long training runs (e.g. PostTrainBench)\n\n_ANSI_RE = re.compile(r'\\x1b\\[[0-9;]*[a-zA-Z]|\\x1b\\].*?\\x07')\n\n# Track files that have been read this session (enforces read-before-write/edit)\n_files_read: set[str] = set()\n\n\ndef _resolve_path(path: str) -> str:\n    try:\n        return str(Path(path).resolve())\n    except Exception:\n        return path\n\n\ndef _atomic_write(path: Path, content: str) -> None:\n    \"\"\"Write file atomically via temp file + os.replace().\n\n    Ensures the file is never left in a partial/corrupted state — it's either\n    the old content or the new content, never half-written.\n    \"\"\"\n    path.parent.mkdir(parents=True, exist_ok=True)\n    fd = None\n    tmp_path = None\n    try:\n        fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=\".tmp\")\n        os.write(fd, content.encode(\"utf-8\"))\n        os.fsync(fd)\n        os.close(fd)\n        fd = None\n        os.replace(tmp_path, str(path))\n        tmp_path = None  # successfully replaced, nothing to clean up\n    finally:\n        if fd is not None:\n            os.close(fd)\n        if tmp_path is not None:\n            try:\n                os.unlink(tmp_path)\n            except OSError:\n                pass\n\n\ndef _strip_ansi(text: str) -> str:\n    return _ANSI_RE.sub('', text)\n\n\ndef _truncate_output(output: str, max_chars: int = MAX_OUTPUT_CHARS, head_ratio: float = 0.25) -> str:\n    \"\"\"Tail-biased truncation with temp file spillover for full output access.\"\"\"\n    if len(output) <= max_chars:\n        return output\n    # Write full output to temp file so LLM can read specific sections\n    spill_path = None\n    try:\n        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', delete=False) as f:\n            f.write(output)\n            spill_path = f.name\n    except Exception:\n        pass\n    head_budget = int(max_chars * head_ratio)\n    tail_budget = max_chars - head_budget\n    head = output[:head_budget]\n    tail = output[-tail_budget:]\n    total = len(output)\n    omitted = total - max_chars\n    meta = f\"\\n\\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\\n\"\n    if spill_path:\n        meta += f\"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\\n\"\n    meta += \"IMPORTANT: The command has finished. Analyze the output above and continue with your next action.\\n\"\n    return head + meta + tail\n\n\n# ── Handlers ────────────────────────────────────────────────────────────\n\nasync def _bash_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:\n    command = args.get(\"command\", \"\")\n    if not command:\n        return \"No command provided.\", False\n    work_dir = args.get(\"work_dir\", \".\")\n    timeout = min(args.get(\"timeout\") or DEFAULT_TIMEOUT, MAX_TIMEOUT)\n    try:\n        result = subprocess.run(\n            command,\n            shell=True,\n            capture_output=True,\n            text=True,\n            cwd=work_dir,\n            timeout=timeout,\n        )\n        output = _strip_ansi(result.stdout + result.stderr)\n        output = _truncate_output(output)\n        if not output.strip():\n            output = \"(no output)\"\n        return output, result.returncode == 0\n    except subprocess.TimeoutExpired:\n        return (\n            f\"Command timed out after {timeout}s and was killed.\\n\\n\"\n            f\"For long-running commands, run in the background and poll:\\n\"\n            f\"  nohup <command> > /tmp/output.log 2>&1 & echo $!\\n\"\n            f\"Then check status with:\\n\"\n            f\"  kill -0 <PID> 2>/dev/null && echo 'running' || echo 'done'\\n\"\n            f\"  tail -n 50 /tmp/output.log\"\n        ), False\n    except Exception as e:\n        return f\"bash error: {e}\", False\n\n\nasync def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:\n    file_path = args.get(\"path\", \"\")\n    if not file_path:\n        return \"No path provided.\", False\n    p = Path(file_path)\n    if not p.exists():\n        return f\"File not found: {file_path}\", False\n    if p.is_dir():\n        return \"Cannot read a directory. Use bash with 'ls' instead.\", False\n    try:\n        raw_content = p.read_text()\n    except Exception as e:\n        return f\"read error: {e}\", False\n\n    _files_read.add(_resolve_path(file_path))\n\n    lines = raw_content.splitlines()\n    offset = max((args.get(\"offset\") or 1), 1)\n    limit = args.get(\"limit\") or DEFAULT_READ_LINES\n\n    selected = lines[offset - 1 : offset - 1 + limit]\n    numbered = []\n    for i, line in enumerate(selected, start=offset):\n        if len(line) > MAX_LINE_LENGTH:\n            line = line[:MAX_LINE_LENGTH] + \"...\"\n        numbered.append(f\"{i:>6}\\t{line}\")\n\n    return \"\\n\".join(numbered), True\n\n\nasync def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:\n    file_path = args.get(\"path\", \"\")\n    content = args.get(\"content\", \"\")\n    if not file_path:\n        return \"No path provided.\", False\n    p = Path(file_path)\n    if p.exists() and _resolve_path(file_path) not in _files_read:\n        return (\n            f\"You must read {file_path} before overwriting it. \"\n            f\"Use the read tool first to see current contents.\"\n        ), False\n    try:\n        _atomic_write(p, content)\n        _files_read.add(_resolve_path(file_path))\n        msg = f\"Wrote {len(content)} bytes to {file_path}\"\n        # Syntax validation for Python files\n        if p.suffix == \".py\":\n            from agent.tools.edit_utils import validate_python\n            warnings = validate_python(content, file_path)\n            if warnings:\n                msg += \"\\n\\nValidation warnings:\\n\" + \"\\n\".join(f\"  ⚠ {w}\" for w in warnings)\n        return msg, True\n    except Exception as e:\n        return f\"write error: {e}\", False\n\n\nasync def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:\n    from agent.tools.edit_utils import apply_edit, validate_python\n\n    file_path = args.get(\"path\", \"\")\n    old_str = args.get(\"old_str\", \"\")\n    new_str = args.get(\"new_str\", \"\")\n    replace_all = args.get(\"replace_all\", False)\n    mode = args.get(\"mode\", \"replace\")\n\n    if not file_path:\n        return \"No path provided.\", False\n    if old_str == new_str:\n        return \"old_str and new_str must differ.\", False\n\n    p = Path(file_path)\n    if not p.exists():\n        return f\"File not found: {file_path}\", False\n    if _resolve_path(file_path) not in _files_read:\n        return (\n            f\"You must read {file_path} before editing it. \"\n            f\"Use the read tool first to see current contents.\"\n        ), False\n\n    try:\n        text = p.read_text()\n    except Exception as e:\n        return f\"edit read error: {e}\", False\n\n    try:\n        new_text, replacements, fuzzy_note = apply_edit(\n            text, old_str, new_str, mode=mode, replace_all=replace_all\n        )\n    except ValueError as e:\n        return str(e), False\n\n    try:\n        _atomic_write(p, new_text)\n    except Exception as e:\n        return f\"edit write error: {e}\", False\n\n    msg = f\"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})\"\n    if fuzzy_note:\n        msg += f\" {fuzzy_note}\"\n    # Syntax validation for Python files\n    if p.suffix == \".py\":\n        warnings = validate_python(new_text, file_path)\n        if warnings:\n            msg += \"\\n\\nValidation warnings:\\n\" + \"\\n\".join(f\"  ⚠ {w}\" for w in warnings)\n    return msg, True\n\n\n# ── Local tool specs (override sandbox /app references) ────────────────\n\n_LOCAL_TOOL_SPECS = {\n    \"bash\": {\n        \"description\": (\n            \"Run a shell command on the local machine and return stdout/stderr.\\n\"\n            \"\\n\"\n            \"IMPORTANT: Do NOT use bash for file operations — use the dedicated tools instead:\\n\"\n            \"- To read files: use read (not cat/head/tail)\\n\"\n            \"- To edit files: use edit (not sed/awk)\\n\"\n            \"- To write files: use write (not echo/cat <<EOF)\\n\"\n            \"\\n\"\n            \"Commands run in a shell at the working directory. Each invocation is independent.\\n\"\n            \"Chain dependent commands with &&. Independent commands should be \"\n            \"separate bash calls (they can run in parallel).\\n\"\n            \"\\n\"\n            \"For long-running commands (training, evaluation), run in the background and poll:\\n\"\n            \"  nohup <command> > /tmp/output.log 2>&1 & echo $!\\n\"\n            \"Then check status:\\n\"\n            \"  kill -0 <PID> 2>/dev/null && echo 'running' || echo 'done'\\n\"\n            \"  tail -n 50 /tmp/output.log\\n\"\n            \"\\n\"\n            \"Timeout default 120s, max 36000s.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"required\": [\"command\"],\n            \"additionalProperties\": False,\n            \"properties\": {\n                \"command\": {\n                    \"type\": \"string\",\n                    \"description\": \"The shell command to execute.\",\n                },\n                \"description\": {\n                    \"type\": \"string\",\n                    \"description\": \"Short description (5-10 words, active voice).\",\n                },\n                \"work_dir\": {\n                    \"type\": \"string\",\n                    \"description\": \"Working directory (default: current directory).\",\n                },\n                \"timeout\": {\n                    \"type\": \"integer\",\n                    \"description\": \"Optional timeout in seconds (default: 120, max: 36000).\",\n                },\n            },\n        },\n    },\n    \"read\": {\n        \"description\": (\n            \"Reads a file from the local filesystem. Returns contents with line numbers \"\n            \"(cat -n format).\\n\"\n            \"\\n\"\n            \"Usage:\\n\"\n            \"- By default, reads up to 2000 lines from the beginning of the file.\\n\"\n            \"- You can optionally specify offset and limit for large files, but prefer \"\n            \"reading the whole file first.\\n\"\n            \"- Lines longer than 4000 chars are truncated.\\n\"\n            \"- Cannot read directories — use bash with 'ls' instead.\\n\"\n            \"- You should read multiple potentially useful files in parallel when possible.\\n\"\n            \"- IMPORTANT: Always read a file before editing or overwriting it. The edit and \"\n            \"write tools will reject operations on files you haven't read.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"required\": [\"path\"],\n            \"additionalProperties\": False,\n            \"properties\": {\n                \"path\": {\n                    \"type\": \"string\",\n                    \"description\": \"Absolute path to the file to read.\",\n                },\n                \"offset\": {\n                    \"type\": \"integer\",\n                    \"description\": \"The line number to start reading from (1-based). Only provide if the file is too large to read at once.\",\n                },\n                \"limit\": {\n                    \"type\": \"integer\",\n                    \"description\": \"The number of lines to read. Only provide if the file is too large to read at once.\",\n                },\n            },\n        },\n    },\n    \"write\": {\n        \"description\": (\n            \"Writes a file to the local filesystem. Overwrites the existing file if one \"\n            \"exists at the path.\\n\"\n            \"\\n\"\n            \"- If this is an existing file, you MUST use the read tool first. This tool \"\n            \"will fail if you did not read the file first.\\n\"\n            \"- ALWAYS prefer editing existing files with the edit tool over overwriting \"\n            \"with write.\\n\"\n            \"- Creates parent directories as needed.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"required\": [\"path\", \"content\"],\n            \"additionalProperties\": False,\n            \"properties\": {\n                \"path\": {\n                    \"type\": \"string\",\n                    \"description\": \"Absolute path to the file to write.\",\n                },\n                \"content\": {\n                    \"type\": \"string\",\n                    \"description\": \"The complete file content to write.\",\n                },\n            },\n        },\n    },\n    \"edit\": {\n        \"description\": (\n            \"Performs string replacements in files. Supports exact matching with \"\n            \"fuzzy fallback.\\n\"\n            \"\\n\"\n            \"Usage:\\n\"\n            \"- You must read the file at least once before editing. This tool will \"\n            \"error if you attempt an edit without reading the file.\\n\"\n            \"- The edit will FAIL if old_str is not unique in the file. Either provide \"\n            \"a larger string with more surrounding context to make it unique, or set \"\n            \"replace_all to true.\\n\"\n            \"- old_str and new_str must differ.\\n\"\n            \"- Preserve indentation exactly as it appears in the file.\\n\"\n            \"- Do NOT include line number prefixes from read output in old_str or new_str.\\n\"\n            \"- To delete code, set new_str to empty string.\\n\"\n            \"- Use replace_all for renaming variables or strings across the file.\\n\"\n            \"\\n\"\n            \"Modes:\\n\"\n            \"- replace (default): replace first occurrence of old_str with new_str.\\n\"\n            \"- append_after: insert new_str immediately after old_str (old_str is kept).\\n\"\n            \"- prepend_before: insert new_str immediately before old_str (old_str is kept).\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"required\": [\"path\", \"old_str\", \"new_str\"],\n            \"additionalProperties\": False,\n            \"properties\": {\n                \"path\": {\n                    \"type\": \"string\",\n                    \"description\": \"Absolute path to the file to edit.\",\n                },\n                \"old_str\": {\n                    \"type\": \"string\",\n                    \"description\": \"The text to find in the file. Must match exactly (fuzzy matching is used as fallback).\",\n                },\n                \"new_str\": {\n                    \"type\": \"string\",\n                    \"description\": \"The replacement text. For append_after/prepend_before modes, the text to insert.\",\n                },\n                \"replace_all\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"Replace all occurrences of old_str (default: false).\",\n                    \"default\": False,\n                },\n                \"mode\": {\n                    \"type\": \"string\",\n                    \"enum\": [\"replace\", \"append_after\", \"prepend_before\"],\n                    \"description\": \"Edit mode (default: replace).\",\n                    \"default\": \"replace\",\n                },\n            },\n        },\n    },\n}\n\n_HANDLERS = {\n    \"bash\": _bash_handler,\n    \"read\": _read_handler,\n    \"write\": _write_handler,\n    \"edit\": _edit_handler,\n}\n\n\ndef get_local_tools():\n    \"\"\"Return local ToolSpecs for bash/read/write/edit (no sandbox_create).\"\"\"\n    from agent.core.tools import ToolSpec\n\n    tools = []\n    for name, spec in _LOCAL_TOOL_SPECS.items():\n        handler = _HANDLERS.get(name)\n        if handler is None:\n            continue\n        tools.append(\n            ToolSpec(\n                name=name,\n                description=spec[\"description\"],\n                parameters=spec[\"parameters\"],\n                handler=handler,\n            )\n        )\n    return tools\n"
  },
  {
    "path": "agent/tools/papers_tool.py",
    "content": "\"\"\"\nHF Papers Tool — Discover papers, read their contents, and find linked resources.\n\nOperations: trending, search, paper_details, read_paper,\n            find_datasets, find_models, find_collections, find_all_resources,\n            citation_graph, snippet_search, recommend\n\"\"\"\n\nimport asyncio\nimport os\nimport re\nimport time\nfrom typing import Any\n\nimport httpx\nfrom bs4 import BeautifulSoup, Tag\n\nfrom agent.tools.types import ToolResult\n\nHF_API = \"https://huggingface.co/api\"\nARXIV_HTML = \"https://arxiv.org/html\"\nAR5IV_HTML = \"https://ar5iv.labs.arxiv.org/html\"\n\nDEFAULT_LIMIT = 10\nMAX_LIMIT = 50\nMAX_SUMMARY_LEN = 300\nMAX_SECTION_PREVIEW_LEN = 280\nMAX_SECTION_TEXT_LEN = 8000\n\nSORT_MAP = {\n    \"downloads\": \"downloads\",\n    \"likes\": \"likes\",\n    \"trending\": \"trendingScore\",\n}\n\n# ---------------------------------------------------------------------------\n# Semantic Scholar API\n# ---------------------------------------------------------------------------\n\nS2_API = \"https://api.semanticscholar.org\"\nS2_API_KEY = os.environ.get(\"S2_API_KEY\")\nS2_HEADERS: dict[str, str] = {\"x-api-key\": S2_API_KEY} if S2_API_KEY else {}\nS2_TIMEOUT = 12\n_s2_last_request: float = 0.0\n\n# Shared response cache (survives across sessions, keyed by (path, params_tuple))\n_s2_cache: dict[str, Any] = {}\n_S2_CACHE_MAX = 500\n\n\ndef _s2_paper_id(arxiv_id: str) -> str:\n    \"\"\"Convert bare arxiv ID to S2 format.\"\"\"\n    return f\"ARXIV:{arxiv_id}\"\n\n\ndef _s2_cache_key(path: str, params: dict | None) -> str:\n    \"\"\"Build a hashable cache key from path + sorted params.\"\"\"\n    p = tuple(sorted((params or {}).items()))\n    return f\"{path}:{p}\"\n\n\nasync def _s2_request(\n    client: httpx.AsyncClient,\n    method: str,\n    path: str,\n    **kwargs: Any,\n) -> httpx.Response | None:\n    \"\"\"S2 request with 2 retries on 429/5xx. Rate-limited only when using API key.\"\"\"\n    global _s2_last_request\n    url = f\"{S2_API}{path}\"\n    kwargs.setdefault(\"headers\", {}).update(S2_HEADERS)\n    kwargs.setdefault(\"timeout\", S2_TIMEOUT)\n\n    for attempt in range(3):\n        # Rate limit only when authenticated (1 req/s for search, 10 req/s for others)\n        if S2_API_KEY:\n            min_interval = 1.0 if \"search\" in path else 0.1\n            elapsed = time.monotonic() - _s2_last_request\n            if elapsed < min_interval:\n                await asyncio.sleep(min_interval - elapsed)\n        _s2_last_request = time.monotonic()\n\n        try:\n            resp = await client.request(method, url, **kwargs)\n            if resp.status_code == 429:\n                if attempt < 2:\n                    await asyncio.sleep(60)\n                    continue\n                return None\n            if resp.status_code >= 500:\n                if attempt < 2:\n                    await asyncio.sleep(3)\n                    continue\n                return None\n            return resp\n        except (httpx.RequestError, httpx.HTTPStatusError):\n            if attempt < 2:\n                await asyncio.sleep(3)\n                continue\n            return None\n    return None\n\n\nasync def _s2_get_json(\n    client: httpx.AsyncClient, path: str, params: dict | None = None,\n) -> dict | None:\n    \"\"\"Cached S2 GET returning parsed JSON or None.\"\"\"\n    key = _s2_cache_key(path, params)\n    if key in _s2_cache:\n        return _s2_cache[key]\n\n    resp = await _s2_request(client, \"GET\", path, params=params or {})\n    if resp and resp.status_code == 200:\n        data = resp.json()\n        if len(_s2_cache) < _S2_CACHE_MAX:\n            _s2_cache[key] = data\n        return data\n    return None\n\n\nasync def _s2_get_paper(\n    client: httpx.AsyncClient, arxiv_id: str, fields: str,\n) -> dict | None:\n    \"\"\"Fetch a single paper from S2 by arxiv ID. Returns None on failure.\"\"\"\n    return await _s2_get_json(\n        client,\n        f\"/graph/v1/paper/{_s2_paper_id(arxiv_id)}\",\n        {\"fields\": fields},\n    )\n\n\n# ---------------------------------------------------------------------------\n# HTML paper parsing\n# ---------------------------------------------------------------------------\n\n\ndef _parse_paper_html(html: str) -> dict[str, Any]:\n    \"\"\"Parse arxiv HTML into structured sections.\n\n    Returns:\n        {\n            \"title\": str,\n            \"abstract\": str,\n            \"sections\": [{\"id\": str, \"title\": str, \"level\": int, \"text\": str}],\n        }\n    \"\"\"\n    soup = BeautifulSoup(html, \"html.parser\")\n\n    # Title\n    title_el = soup.find(\"h1\", class_=\"ltx_title\")\n    title = title_el.get_text(strip=True).removeprefix(\"Title:\") if title_el else \"\"\n\n    # Abstract\n    abstract_el = soup.find(\"div\", class_=\"ltx_abstract\")\n    abstract = \"\"\n    if abstract_el:\n        # Skip the \"Abstract\" heading itself\n        for child in abstract_el.children:\n            if isinstance(child, Tag) and child.name in (\"h6\", \"h2\", \"h3\", \"p\", \"span\"):\n                if child.get_text(strip=True).lower() == \"abstract\":\n                    continue\n            if isinstance(child, Tag) and child.name == \"p\":\n                abstract += child.get_text(separator=\" \", strip=True) + \" \"\n        abstract = abstract.strip()\n\n    # Sections — collect h2/h3 headings and text between them\n    sections: list[dict[str, Any]] = []\n    headings = soup.find_all([\"h2\", \"h3\"], class_=lambda c: c and \"ltx_title\" in c)\n\n    for heading in headings:\n        level = 2 if heading.name == \"h2\" else 3\n        heading_text = heading.get_text(separator=\" \", strip=True)\n\n        # Collect text from siblings until next heading of same or higher level\n        text_parts: list[str] = []\n        sibling = heading.find_next_sibling()\n        while sibling:\n            if isinstance(sibling, Tag):\n                if sibling.name in (\"h2\", \"h3\") and \"ltx_title\" in (\n                    sibling.get(\"class\") or []\n                ):\n                    break\n                # Also stop at h2 if we're collecting h3 content\n                if sibling.name == \"h2\" and level == 3:\n                    break\n                text_parts.append(sibling.get_text(separator=\" \", strip=True))\n            sibling = sibling.find_next_sibling()\n\n        # Also check parent section element for contained paragraphs\n        parent_section = heading.find_parent(\"section\")\n        if parent_section and not text_parts:\n            for p in parent_section.find_all(\"p\", recursive=False):\n                text_parts.append(p.get_text(separator=\" \", strip=True))\n\n        section_text = \"\\n\\n\".join(t for t in text_parts if t)\n\n        # Extract section number from heading text (e.g., \"4 Experiments\" → \"4\")\n        num_match = re.match(r\"^([A-Z]?\\d+(?:\\.\\d+)*)\\s\", heading_text)\n        section_id = num_match.group(1) if num_match else \"\"\n\n        sections.append(\n            {\n                \"id\": section_id,\n                \"title\": heading_text,\n                \"level\": level,\n                \"text\": section_text,\n            }\n        )\n\n    return {\"title\": title, \"abstract\": abstract, \"sections\": sections}\n\n\ndef _find_section(sections: list[dict], query: str) -> dict | None:\n    \"\"\"Find a section by number or name (fuzzy).\"\"\"\n    query_lower = query.lower().strip()\n\n    # Exact match on section number\n    for s in sections:\n        if s[\"id\"] == query_lower or s[\"id\"] == query:\n            return s\n\n    # Exact match on title\n    for s in sections:\n        if query_lower == s[\"title\"].lower():\n            return s\n\n    # Substring match on title\n    for s in sections:\n        if query_lower in s[\"title\"].lower():\n            return s\n\n    # Number prefix match (e.g., \"4\" matches \"4.1\", \"4.2\", etc. — return parent)\n    for s in sections:\n        if s[\"id\"].startswith(query_lower + \".\") or s[\"id\"] == query_lower:\n            return s\n\n    return None\n\n\n# ---------------------------------------------------------------------------\n# Formatting helpers\n# ---------------------------------------------------------------------------\n\n\ndef _clean_description(text: str) -> str:\n    \"\"\"Strip HTML card artifacts and collapse whitespace from HF API descriptions.\"\"\"\n    text = re.sub(r\"[\\t]+\", \" \", text)\n    text = re.sub(r\"\\n{2,}\", \"\\n\", text)\n    return text.strip()\n\n\ndef _truncate(text: str, max_len: int) -> str:\n    if len(text) <= max_len:\n        return text\n    return text[:max_len] + \"...\"\n\n\ndef _format_paper_list(\n    papers: list, title: str, date: str | None = None, query: str | None = None\n) -> str:\n    lines = [f\"# {title}\"]\n    if date:\n        lines[0] += f\" ({date})\"\n    if query:\n        lines.append(f\"Filtered by: '{query}'\")\n    lines.append(f\"Showing {len(papers)} paper(s)\\n\")\n\n    for i, item in enumerate(papers, 1):\n        paper = item.get(\"paper\", item)\n        arxiv_id = paper.get(\"id\", \"\")\n        paper_title = paper.get(\"title\", \"Unknown\")\n        upvotes = paper.get(\"upvotes\", 0)\n        summary = paper.get(\"ai_summary\") or _truncate(\n            paper.get(\"summary\", \"\"), MAX_SUMMARY_LEN\n        )\n        keywords = paper.get(\"ai_keywords\") or []\n        github = paper.get(\"githubRepo\") or \"\"\n        stars = paper.get(\"githubStars\") or 0\n\n        lines.append(f\"## {i}. {paper_title}\")\n        lines.append(f\"**arxiv_id:** {arxiv_id} | **upvotes:** {upvotes}\")\n        lines.append(f\"https://huggingface.co/papers/{arxiv_id}\")\n        if keywords:\n            lines.append(f\"**Keywords:** {', '.join(keywords[:5])}\")\n        if github:\n            lines.append(f\"**GitHub:** {github} ({stars} stars)\")\n        if summary:\n            lines.append(f\"**Summary:** {_truncate(summary, MAX_SUMMARY_LEN)}\")\n        lines.append(\"\")\n\n    return \"\\n\".join(lines)\n\n\ndef _format_paper_detail(paper: dict, s2_data: dict | None = None) -> str:\n    arxiv_id = paper.get(\"id\", \"\")\n    title = paper.get(\"title\", \"Unknown\")\n    upvotes = paper.get(\"upvotes\", 0)\n    ai_summary = paper.get(\"ai_summary\") or \"\"\n    summary = paper.get(\"summary\", \"\")\n    keywords = paper.get(\"ai_keywords\") or []\n    github = paper.get(\"githubRepo\") or \"\"\n    stars = paper.get(\"githubStars\") or 0\n    authors = paper.get(\"authors\") or []\n\n    lines = [f\"# {title}\"]\n    meta_parts = [f\"**arxiv_id:** {arxiv_id}\", f\"**upvotes:** {upvotes}\"]\n    if s2_data:\n        cites = s2_data.get(\"citationCount\", 0)\n        influential = s2_data.get(\"influentialCitationCount\", 0)\n        meta_parts.append(f\"**citations:** {cites} ({influential} influential)\")\n    lines.append(\" | \".join(meta_parts))\n    lines.append(f\"https://huggingface.co/papers/{arxiv_id}\")\n    lines.append(f\"https://arxiv.org/abs/{arxiv_id}\")\n\n    if authors:\n        names = [a.get(\"name\", \"\") for a in authors[:10]]\n        author_str = \", \".join(n for n in names if n)\n        if len(authors) > 10:\n            author_str += f\" (+{len(authors) - 10} more)\"\n        lines.append(f\"**Authors:** {author_str}\")\n\n    if keywords:\n        lines.append(f\"**Keywords:** {', '.join(keywords)}\")\n    if s2_data and s2_data.get(\"s2FieldsOfStudy\"):\n        fields = [f[\"category\"] for f in s2_data[\"s2FieldsOfStudy\"] if f.get(\"category\")]\n        if fields:\n            lines.append(f\"**Fields:** {', '.join(fields)}\")\n    if s2_data and s2_data.get(\"venue\"):\n        lines.append(f\"**Venue:** {s2_data['venue']}\")\n    if github:\n        lines.append(f\"**GitHub:** {github} ({stars} stars)\")\n\n    if s2_data and s2_data.get(\"tldr\"):\n        tldr_text = s2_data[\"tldr\"].get(\"text\", \"\")\n        if tldr_text:\n            lines.append(f\"\\n## TL;DR\\n{tldr_text}\")\n    if ai_summary:\n        lines.append(f\"\\n## AI Summary\\n{ai_summary}\")\n    if summary:\n        lines.append(f\"\\n## Abstract\\n{_truncate(summary, 500)}\")\n\n    lines.append(\n        \"\\n**Next:** Use read_paper to read specific sections, find_all_resources for linked datasets/models, \"\n        \"or citation_graph to trace references and citations.\"\n    )\n    return \"\\n\".join(lines)\n\n\ndef _format_read_paper_toc(parsed: dict[str, Any], arxiv_id: str) -> str:\n    \"\"\"Format TOC view: abstract + section list with previews.\"\"\"\n    lines = [f\"# {parsed['title']}\"]\n    lines.append(f\"https://arxiv.org/abs/{arxiv_id}\\n\")\n\n    if parsed[\"abstract\"]:\n        lines.append(f\"## Abstract\\n{parsed['abstract']}\\n\")\n\n    lines.append(\"## Sections\")\n    for s in parsed[\"sections\"]:\n        prefix = \"  \" if s[\"level\"] == 3 else \"\"\n        preview = (\n            _truncate(s[\"text\"], MAX_SECTION_PREVIEW_LEN) if s[\"text\"] else \"(empty)\"\n        )\n        lines.append(f\"{prefix}- **{s['title']}**: {preview}\")\n\n    lines.append(\n        '\\nCall read_paper with section parameter (e.g. section=\"4\" or section=\"Experiments\") to read a specific section.'\n    )\n    return \"\\n\".join(lines)\n\n\ndef _format_read_paper_section(section: dict, arxiv_id: str) -> str:\n    \"\"\"Format a single section's full text.\"\"\"\n    lines = [f\"# {section['title']}\"]\n    lines.append(f\"https://arxiv.org/abs/{arxiv_id}\\n\")\n\n    text = section[\"text\"]\n    if len(text) > MAX_SECTION_TEXT_LEN:\n        text = (\n            text[:MAX_SECTION_TEXT_LEN]\n            + f\"\\n\\n... (truncated at {MAX_SECTION_TEXT_LEN} chars)\"\n        )\n\n    lines.append(text if text else \"(This section has no extractable text content.)\")\n    return \"\\n\".join(lines)\n\n\ndef _format_datasets(datasets: list, arxiv_id: str, sort: str) -> str:\n    lines = [f\"# Datasets linked to paper {arxiv_id}\"]\n    lines.append(f\"https://huggingface.co/papers/{arxiv_id}\")\n    lines.append(f\"Showing {len(datasets)} dataset(s), sorted by {sort}\\n\")\n\n    for i, ds in enumerate(datasets, 1):\n        ds_id = ds.get(\"id\", \"unknown\")\n        downloads = ds.get(\"downloads\", 0)\n        likes = ds.get(\"likes\", 0)\n        desc = _truncate(_clean_description(ds.get(\"description\") or \"\"), MAX_SUMMARY_LEN)\n        tags = ds.get(\"tags\") or []\n        interesting = [t for t in tags if not t.startswith((\"arxiv:\", \"region:\"))][:5]\n\n        lines.append(f\"**{i}. [{ds_id}](https://huggingface.co/datasets/{ds_id})**\")\n        lines.append(f\"   Downloads: {downloads:,} | Likes: {likes}\")\n        if interesting:\n            lines.append(f\"   Tags: {', '.join(interesting)}\")\n        if desc:\n            lines.append(f\"   {desc}\")\n        lines.append(\"\")\n\n    if datasets:\n        top = datasets[0].get(\"id\", \"\")\n        lines.append(f'**Inspect top dataset:** hf_inspect_dataset(dataset=\"{top}\")')\n    return \"\\n\".join(lines)\n\n\ndef _format_datasets_compact(datasets: list) -> str:\n    if not datasets:\n        return \"## Datasets\\nNone found\"\n    lines = [f\"## Datasets ({len(datasets)})\"]\n    for ds in datasets:\n        lines.append(\n            f\"- **{ds.get('id', '?')}** ({ds.get('downloads', 0):,} downloads)\"\n        )\n    return \"\\n\".join(lines)\n\n\ndef _format_models(models: list, arxiv_id: str, sort: str) -> str:\n    lines = [f\"# Models linked to paper {arxiv_id}\"]\n    lines.append(f\"https://huggingface.co/papers/{arxiv_id}\")\n    lines.append(f\"Showing {len(models)} model(s), sorted by {sort}\\n\")\n\n    for i, m in enumerate(models, 1):\n        model_id = m.get(\"id\", \"unknown\")\n        downloads = m.get(\"downloads\", 0)\n        likes = m.get(\"likes\", 0)\n        pipeline = m.get(\"pipeline_tag\") or \"\"\n        library = m.get(\"library_name\") or \"\"\n\n        lines.append(f\"**{i}. [{model_id}](https://huggingface.co/{model_id})**\")\n        meta = f\"   Downloads: {downloads:,} | Likes: {likes}\"\n        if pipeline:\n            meta += f\" | Task: {pipeline}\"\n        if library:\n            meta += f\" | Library: {library}\"\n        lines.append(meta)\n        lines.append(\"\")\n\n    return \"\\n\".join(lines)\n\n\ndef _format_models_compact(models: list) -> str:\n    if not models:\n        return \"## Models\\nNone found\"\n    lines = [f\"## Models ({len(models)})\"]\n    for m in models:\n        pipeline = m.get(\"pipeline_tag\") or \"\"\n        suffix = f\" ({pipeline})\" if pipeline else \"\"\n        lines.append(\n            f\"- **{m.get('id', '?')}** ({m.get('downloads', 0):,} downloads){suffix}\"\n        )\n    return \"\\n\".join(lines)\n\n\ndef _format_collections(collections: list, arxiv_id: str) -> str:\n    lines = [f\"# Collections containing paper {arxiv_id}\"]\n    lines.append(f\"Showing {len(collections)} collection(s)\\n\")\n\n    for i, c in enumerate(collections, 1):\n        slug = c.get(\"slug\", \"\")\n        title = c.get(\"title\", \"Untitled\")\n        upvotes = c.get(\"upvotes\", 0)\n        owner = c.get(\"owner\", {}).get(\"name\", \"\")\n        desc = _truncate(c.get(\"description\") or \"\", MAX_SUMMARY_LEN)\n        num_items = len(c.get(\"items\", []))\n\n        lines.append(f\"**{i}. {title}**\")\n        lines.append(f\"   By: {owner} | Upvotes: {upvotes} | Items: {num_items}\")\n        lines.append(f\"   https://huggingface.co/collections/{slug}\")\n        if desc:\n            lines.append(f\"   {desc}\")\n        lines.append(\"\")\n\n    return \"\\n\".join(lines)\n\n\ndef _format_collections_compact(collections: list) -> str:\n    if not collections:\n        return \"## Collections\\nNone found\"\n    lines = [f\"## Collections ({len(collections)})\"]\n    for c in collections:\n        title = c.get(\"title\", \"Untitled\")\n        owner = c.get(\"owner\", {}).get(\"name\", \"\")\n        upvotes = c.get(\"upvotes\", 0)\n        lines.append(f\"- **{title}** by {owner} ({upvotes} upvotes)\")\n    return \"\\n\".join(lines)\n\n\n# ---------------------------------------------------------------------------\n# Operation handlers\n# ---------------------------------------------------------------------------\n\n\ndef _error(message: str) -> ToolResult:\n    return {\n        \"formatted\": message,\n        \"totalResults\": 0,\n        \"resultsShared\": 0,\n        \"isError\": True,\n    }\n\n\ndef _validate_arxiv_id(args: dict) -> str | None:\n    \"\"\"Return arxiv_id or None if missing.\"\"\"\n    return args.get(\"arxiv_id\")\n\n\nasync def _op_trending(args: dict[str, Any], limit: int) -> ToolResult:\n    date = args.get(\"date\")\n    query = args.get(\"query\")\n\n    params: dict[str, Any] = {\"limit\": limit if not query else max(limit * 3, 30)}\n    if date:\n        params[\"date\"] = date\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(f\"{HF_API}/daily_papers\", params=params)\n        resp.raise_for_status()\n        papers = resp.json()\n\n    if query:\n        q = query.lower()\n        papers = [\n            p\n            for p in papers\n            if q in p.get(\"title\", \"\").lower()\n            or q in p.get(\"paper\", {}).get(\"title\", \"\").lower()\n            or q in p.get(\"paper\", {}).get(\"summary\", \"\").lower()\n            or any(\n                q in kw.lower() for kw in (p.get(\"paper\", {}).get(\"ai_keywords\") or [])\n            )\n        ]\n\n    papers = papers[:limit]\n    if not papers:\n        msg = \"No trending papers found\"\n        if query:\n            msg += f\" matching '{query}'\"\n        if date:\n            msg += f\" for {date}\"\n        return {\"formatted\": msg, \"totalResults\": 0, \"resultsShared\": 0}\n\n    formatted = _format_paper_list(papers, \"Trending Papers\", date=date, query=query)\n    return {\n        \"formatted\": formatted,\n        \"totalResults\": len(papers),\n        \"resultsShared\": len(papers),\n    }\n\n\ndef _format_s2_paper_list(papers: list[dict], title: str) -> str:\n    \"\"\"Format a list of S2 paper results.\"\"\"\n    lines = [f\"# {title}\"]\n    lines.append(f\"Showing {len(papers)} result(s)\\n\")\n\n    for i, paper in enumerate(papers, 1):\n        ptitle = paper.get(\"title\") or \"(untitled)\"\n        year = paper.get(\"year\") or \"?\"\n        cites = paper.get(\"citationCount\", 0)\n        venue = paper.get(\"venue\") or \"\"\n        ext_ids = paper.get(\"externalIds\") or {}\n        aid = ext_ids.get(\"ArXiv\", \"\")\n        tldr = (paper.get(\"tldr\") or {}).get(\"text\", \"\")\n\n        lines.append(f\"### {i}. {ptitle}\")\n        meta = [f\"Year: {year}\", f\"Citations: {cites}\"]\n        if venue:\n            meta.append(f\"Venue: {venue}\")\n        if aid:\n            meta.append(f\"arxiv_id: {aid}\")\n        lines.append(\" | \".join(meta))\n        if aid:\n            lines.append(f\"https://arxiv.org/abs/{aid}\")\n        if tldr:\n            lines.append(f\"**TL;DR:** {tldr}\")\n        lines.append(\"\")\n\n    lines.append(\"Use paper_details with arxiv_id for full info, or read_paper to read sections.\")\n    return \"\\n\".join(lines)\n\n\nasync def _s2_bulk_search(query: str, args: dict[str, Any], limit: int) -> ToolResult | None:\n    \"\"\"Search via S2 bulk endpoint with filters. Returns None on failure.\"\"\"\n    params: dict[str, Any] = {\n        \"query\": query,\n        \"limit\": limit,\n        \"fields\": \"title,externalIds,year,citationCount,tldr,venue,publicationDate\",\n    }\n\n    # Date filter\n    date_from = args.get(\"date_from\", \"\")\n    date_to = args.get(\"date_to\", \"\")\n    if date_from or date_to:\n        params[\"publicationDateOrYear\"] = f\"{date_from}:{date_to}\"\n\n    # Fields of study\n    categories = args.get(\"categories\")\n    if categories:\n        params[\"fieldsOfStudy\"] = categories\n\n    # Min citations\n    min_cites = args.get(\"min_citations\")\n    if min_cites:\n        params[\"minCitationCount\"] = str(min_cites)\n\n    # Sort\n    sort_by = args.get(\"sort_by\")\n    if sort_by and sort_by != \"relevance\":\n        params[\"sort\"] = f\"{sort_by}:desc\"\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await _s2_request(client, \"GET\", \"/graph/v1/paper/search/bulk\", params=params)\n        if not resp or resp.status_code != 200:\n            return None\n        data = resp.json()\n\n    papers = data.get(\"data\") or []\n    if not papers:\n        return {\n            \"formatted\": f\"No papers found for '{query}' with the given filters.\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    formatted = _format_s2_paper_list(papers[:limit], f\"Papers matching '{query}' (Semantic Scholar)\")\n    return {\n        \"formatted\": formatted,\n        \"totalResults\": data.get(\"total\", len(papers)),\n        \"resultsShared\": min(limit, len(papers)),\n    }\n\n\nasync def _op_search(args: dict[str, Any], limit: int) -> ToolResult:\n    query = args.get(\"query\")\n    if not query:\n        return _error(\"'query' is required for search operation.\")\n\n    # Route to S2 when filters are present\n    use_s2 = any(args.get(k) for k in (\"date_from\", \"date_to\", \"categories\", \"min_citations\", \"sort_by\"))\n    if use_s2:\n        result = await _s2_bulk_search(query, args, limit)\n        if result is not None:\n            return result\n        # Fall back to HF search (without filters) if S2 fails\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(\n            f\"{HF_API}/papers/search\", params={\"q\": query, \"limit\": limit}\n        )\n        resp.raise_for_status()\n        papers = resp.json()\n\n    if not papers:\n        return {\n            \"formatted\": f\"No papers found for '{query}'\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    formatted = _format_paper_list(papers, f\"Papers matching '{query}'\")\n    return {\n        \"formatted\": formatted,\n        \"totalResults\": len(papers),\n        \"resultsShared\": len(papers),\n    }\n\n\nasync def _op_paper_details(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for paper_details.\")\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(f\"{HF_API}/papers/{arxiv_id}\")\n        resp.raise_for_status()\n        paper = resp.json()\n\n    return {\n        \"formatted\": _format_paper_detail(paper),\n        \"totalResults\": 1,\n        \"resultsShared\": 1,\n    }\n\n\nasync def _op_read_paper(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for read_paper.\")\n\n    section_query = args.get(\"section\")\n\n    # Try fetching HTML from arxiv, then ar5iv, then fallback to abstract\n    parsed = None\n    async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:\n        for base_url in [ARXIV_HTML, AR5IV_HTML]:\n            try:\n                resp = await client.get(f\"{base_url}/{arxiv_id}\")\n                if resp.status_code == 200:\n                    parsed = _parse_paper_html(resp.text)\n                    if parsed[\"sections\"]:  # Only use if we got real sections\n                        break\n                    parsed = None\n            except httpx.RequestError:\n                continue\n\n    # Fallback: return abstract from HF API\n    if not parsed or not parsed[\"sections\"]:\n        try:\n            async with httpx.AsyncClient(timeout=15) as client:\n                resp = await client.get(f\"{HF_API}/papers/{arxiv_id}\")\n                resp.raise_for_status()\n                paper = resp.json()\n            abstract = paper.get(\"summary\", \"\")\n            title = paper.get(\"title\", \"\")\n            msg = f\"# {title}\\nhttps://arxiv.org/abs/{arxiv_id}\\n\\n\"\n            msg += f\"## Abstract\\n{abstract}\\n\\n\"\n            msg += \"HTML version not available for this paper. Only abstract shown.\\n\"\n            msg += f\"PDF: https://arxiv.org/pdf/{arxiv_id}\"\n            return {\"formatted\": msg, \"totalResults\": 1, \"resultsShared\": 1}\n        except Exception:\n            return _error(\n                f\"Could not fetch paper {arxiv_id}. Check the arxiv ID is correct.\"\n            )\n\n    # Return TOC or specific section\n    if not section_query:\n        formatted = _format_read_paper_toc(parsed, arxiv_id)\n        return {\n            \"formatted\": formatted,\n            \"totalResults\": len(parsed[\"sections\"]),\n            \"resultsShared\": len(parsed[\"sections\"]),\n        }\n\n    section = _find_section(parsed[\"sections\"], section_query)\n    if not section:\n        available = \"\\n\".join(f\"- {s['title']}\" for s in parsed[\"sections\"])\n        return _error(\n            f\"Section '{section_query}' not found. Available sections:\\n{available}\"\n        )\n\n    formatted = _format_read_paper_section(section, arxiv_id)\n    return {\"formatted\": formatted, \"totalResults\": 1, \"resultsShared\": 1}\n\n\n# ---------------------------------------------------------------------------\n# Citation graph (Semantic Scholar)\n# ---------------------------------------------------------------------------\n\n\ndef _format_citation_entry(entry: dict, show_context: bool = False) -> str:\n    \"\"\"Format a single citation/reference entry.\"\"\"\n    paper = entry.get(\"citingPaper\") or entry.get(\"citedPaper\") or {}\n    title = paper.get(\"title\") or \"(untitled)\"\n    year = paper.get(\"year\") or \"?\"\n    cites = paper.get(\"citationCount\", 0)\n    ext_ids = paper.get(\"externalIds\") or {}\n    aid = ext_ids.get(\"ArXiv\", \"\")\n    influential = \" **[influential]**\" if entry.get(\"isInfluential\") else \"\"\n\n    parts = [f\"- **{title}** ({year}, {cites} cites){influential}\"]\n    if aid:\n        parts[0] += f\"  arxiv:{aid}\"\n\n    if show_context:\n        intents = entry.get(\"intents\") or []\n        if intents:\n            parts.append(f\"  Intent: {', '.join(intents)}\")\n        contexts = entry.get(\"contexts\") or []\n        for ctx in contexts[:2]:\n            if ctx:\n                parts.append(f\"  > {_truncate(ctx, 200)}\")\n\n    return \"\\n\".join(parts)\n\n\ndef _format_citation_graph(\n    arxiv_id: str,\n    references: list[dict] | None,\n    citations: list[dict] | None,\n) -> str:\n    lines = [f\"# Citation Graph for {arxiv_id}\"]\n    lines.append(f\"https://arxiv.org/abs/{arxiv_id}\\n\")\n\n    if references is not None:\n        lines.append(f\"## References ({len(references)})\")\n        if references:\n            for entry in references:\n                lines.append(_format_citation_entry(entry))\n        else:\n            lines.append(\"No references found.\")\n        lines.append(\"\")\n\n    if citations is not None:\n        lines.append(f\"## Citations ({len(citations)})\")\n        if citations:\n            for entry in citations:\n                lines.append(_format_citation_entry(entry, show_context=True))\n        else:\n            lines.append(\"No citations found.\")\n        lines.append(\"\")\n\n    lines.append(\"**Tip:** Use paper_details with an arxiv_id from above to explore further.\")\n    return \"\\n\".join(lines)\n\n\nasync def _op_citation_graph(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for citation_graph.\")\n\n    direction = args.get(\"direction\", \"both\")\n    s2_id = _s2_paper_id(arxiv_id)\n    fields = \"title,externalIds,year,citationCount,influentialCitationCount,contexts,intents,isInfluential\"\n    params = {\"fields\": fields, \"limit\": limit}\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        refs, cites = None, None\n        coros = []\n        if direction in (\"references\", \"both\"):\n            coros.append(_s2_get_json(client, f\"/graph/v1/paper/{s2_id}/references\", params))\n        if direction in (\"citations\", \"both\"):\n            coros.append(_s2_get_json(client, f\"/graph/v1/paper/{s2_id}/citations\", params))\n\n        results = await asyncio.gather(*coros, return_exceptions=True)\n        idx = 0\n        if direction in (\"references\", \"both\"):\n            r = results[idx]\n            if isinstance(r, dict):\n                refs = r.get(\"data\", [])\n            idx += 1\n        if direction in (\"citations\", \"both\"):\n            r = results[idx]\n            if isinstance(r, dict):\n                cites = r.get(\"data\", [])\n\n    if refs is None and cites is None:\n        return _error(f\"Could not fetch citation data for {arxiv_id}. Paper may not be indexed by Semantic Scholar.\")\n\n    total = (len(refs) if refs else 0) + (len(cites) if cites else 0)\n    return {\n        \"formatted\": _format_citation_graph(arxiv_id, refs, cites),\n        \"totalResults\": total,\n        \"resultsShared\": total,\n    }\n\n\nasync def _op_find_datasets(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for find_datasets.\")\n\n    sort = args.get(\"sort\", \"downloads\")\n    sort_key = SORT_MAP.get(sort, \"downloads\")\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(\n            f\"{HF_API}/datasets\",\n            params={\n                \"filter\": f\"arxiv:{arxiv_id}\",\n                \"limit\": limit,\n                \"sort\": sort_key,\n                \"direction\": -1,\n            },\n        )\n        resp.raise_for_status()\n        datasets = resp.json()\n\n    if not datasets:\n        return {\n            \"formatted\": f\"No datasets found linked to paper {arxiv_id}.\\nhttps://huggingface.co/papers/{arxiv_id}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    return {\n        \"formatted\": _format_datasets(datasets, arxiv_id, sort),\n        \"totalResults\": len(datasets),\n        \"resultsShared\": len(datasets),\n    }\n\n\nasync def _op_find_models(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for find_models.\")\n\n    sort = args.get(\"sort\", \"downloads\")\n    sort_key = SORT_MAP.get(sort, \"downloads\")\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(\n            f\"{HF_API}/models\",\n            params={\n                \"filter\": f\"arxiv:{arxiv_id}\",\n                \"limit\": limit,\n                \"sort\": sort_key,\n                \"direction\": -1,\n            },\n        )\n        resp.raise_for_status()\n        models = resp.json()\n\n    if not models:\n        return {\n            \"formatted\": f\"No models found linked to paper {arxiv_id}.\\nhttps://huggingface.co/papers/{arxiv_id}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    return {\n        \"formatted\": _format_models(models, arxiv_id, sort),\n        \"totalResults\": len(models),\n        \"resultsShared\": len(models),\n    }\n\n\nasync def _op_find_collections(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for find_collections.\")\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await client.get(f\"{HF_API}/collections\", params={\"paper\": arxiv_id})\n        resp.raise_for_status()\n        collections = resp.json()\n\n    if not collections:\n        return {\n            \"formatted\": f\"No collections found containing paper {arxiv_id}.\\nhttps://huggingface.co/papers/{arxiv_id}\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    collections = collections[:limit]\n    return {\n        \"formatted\": _format_collections(collections, arxiv_id),\n        \"totalResults\": len(collections),\n        \"resultsShared\": len(collections),\n    }\n\n\nasync def _op_find_all_resources(args: dict[str, Any], limit: int) -> ToolResult:\n    arxiv_id = _validate_arxiv_id(args)\n    if not arxiv_id:\n        return _error(\"'arxiv_id' is required for find_all_resources.\")\n\n    per_cat = min(limit, 10)\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        results = await asyncio.gather(\n            client.get(\n                f\"{HF_API}/datasets\",\n                params={\n                    \"filter\": f\"arxiv:{arxiv_id}\",\n                    \"limit\": per_cat,\n                    \"sort\": \"downloads\",\n                    \"direction\": -1,\n                },\n            ),\n            client.get(\n                f\"{HF_API}/models\",\n                params={\n                    \"filter\": f\"arxiv:{arxiv_id}\",\n                    \"limit\": per_cat,\n                    \"sort\": \"downloads\",\n                    \"direction\": -1,\n                },\n            ),\n            client.get(f\"{HF_API}/collections\", params={\"paper\": arxiv_id}),\n            return_exceptions=True,\n        )\n\n    sections = []\n    total = 0\n\n    # Datasets\n    if isinstance(results[0], Exception):\n        sections.append(f\"## Datasets\\nError: {results[0]}\")\n    else:\n        datasets = results[0].json()\n        total += len(datasets)\n        sections.append(_format_datasets_compact(datasets[:per_cat]))\n\n    # Models\n    if isinstance(results[1], Exception):\n        sections.append(f\"## Models\\nError: {results[1]}\")\n    else:\n        models = results[1].json()\n        total += len(models)\n        sections.append(_format_models_compact(models[:per_cat]))\n\n    # Collections\n    if isinstance(results[2], Exception):\n        sections.append(f\"## Collections\\nError: {results[2]}\")\n    else:\n        collections = results[2].json()\n        total += len(collections)\n        sections.append(_format_collections_compact(collections[:per_cat]))\n\n    header = f\"# Resources linked to paper {arxiv_id}\\nhttps://huggingface.co/papers/{arxiv_id}\\n\"\n    formatted = header + \"\\n\\n\".join(sections)\n    return {\"formatted\": formatted, \"totalResults\": total, \"resultsShared\": total}\n\n\n# ---------------------------------------------------------------------------\n# Snippet search (Semantic Scholar)\n# ---------------------------------------------------------------------------\n\n\ndef _format_snippets(snippets: list[dict], query: str) -> str:\n    lines = [f\"# Snippet Search: '{query}'\"]\n    lines.append(f\"Found {len(snippets)} matching passage(s)\\n\")\n\n    for i, item in enumerate(snippets, 1):\n        paper = item.get(\"paper\") or {}\n        ptitle = paper.get(\"title\") or \"(untitled)\"\n        year = paper.get(\"year\") or \"?\"\n        cites = paper.get(\"citationCount\", 0)\n        ext_ids = paper.get(\"externalIds\") or {}\n        aid = ext_ids.get(\"ArXiv\", \"\")\n\n        snippet = item.get(\"snippet\") or {}\n        text = snippet.get(\"text\", \"\")\n        section = snippet.get(\"section\") or \"\"\n\n        lines.append(f\"### {i}. {ptitle} ({year}, {cites} cites)\")\n        if aid:\n            lines.append(f\"arxiv:{aid}\")\n        if section:\n            lines.append(f\"Section: {section}\")\n        if text:\n            lines.append(f\"> {_truncate(text, 400)}\")\n        lines.append(\"\")\n\n    lines.append(\"Use paper_details or read_paper with arxiv_id to explore a paper further.\")\n    return \"\\n\".join(lines)\n\n\nasync def _op_snippet_search(args: dict[str, Any], limit: int) -> ToolResult:\n    query = args.get(\"query\")\n    if not query:\n        return _error(\"'query' is required for snippet_search.\")\n\n    params: dict[str, Any] = {\n        \"query\": query,\n        \"limit\": limit,\n        \"fields\": \"title,externalIds,year,citationCount\",\n    }\n\n    # Optional filters (same as search)\n    date_from = args.get(\"date_from\", \"\")\n    date_to = args.get(\"date_to\", \"\")\n    if date_from or date_to:\n        params[\"publicationDateOrYear\"] = f\"{date_from}:{date_to}\"\n    if args.get(\"categories\"):\n        params[\"fieldsOfStudy\"] = args[\"categories\"]\n    if args.get(\"min_citations\"):\n        params[\"minCitationCount\"] = str(args[\"min_citations\"])\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        resp = await _s2_request(client, \"GET\", \"/graph/v1/snippet/search\", params=params)\n        if not resp or resp.status_code != 200:\n            return _error(\"Snippet search failed. Semantic Scholar may be unavailable.\")\n        data = resp.json()\n\n    snippets = data.get(\"data\") or []\n    if not snippets:\n        return {\n            \"formatted\": f\"No snippets found for '{query}'.\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    return {\n        \"formatted\": _format_snippets(snippets, query),\n        \"totalResults\": len(snippets),\n        \"resultsShared\": len(snippets),\n    }\n\n\n# ---------------------------------------------------------------------------\n# Recommendations (Semantic Scholar)\n# ---------------------------------------------------------------------------\n\n\nasync def _op_recommend(args: dict[str, Any], limit: int) -> ToolResult:\n    positive_ids = args.get(\"positive_ids\")\n    arxiv_id = _validate_arxiv_id(args)\n\n    if not arxiv_id and not positive_ids:\n        return _error(\"'arxiv_id' or 'positive_ids' is required for recommend.\")\n\n    fields = \"title,externalIds,year,citationCount,tldr,venue\"\n\n    async with httpx.AsyncClient(timeout=15) as client:\n        if positive_ids and not arxiv_id:\n            # Multi-paper recommendations (POST, not cached)\n            pos = [_s2_paper_id(pid.strip()) for pid in positive_ids.split(\",\") if pid.strip()]\n            neg_raw = args.get(\"negative_ids\", \"\")\n            neg = [_s2_paper_id(pid.strip()) for pid in neg_raw.split(\",\") if pid.strip()] if neg_raw else []\n            resp = await _s2_request(\n                client, \"POST\", \"/recommendations/v1/papers/\",\n                json={\"positivePaperIds\": pos, \"negativePaperIds\": neg},\n                params={\"fields\": fields, \"limit\": limit},\n            )\n            if not resp or resp.status_code != 200:\n                return _error(\"Recommendation request failed. Semantic Scholar may be unavailable.\")\n            data = resp.json()\n        else:\n            # Single-paper recommendations (cached)\n            data = await _s2_get_json(\n                client,\n                f\"/recommendations/v1/papers/forpaper/{_s2_paper_id(arxiv_id)}\",\n                {\"fields\": fields, \"limit\": limit, \"from\": \"recent\"},\n            )\n            if not data:\n                return _error(\"Recommendation request failed. Semantic Scholar may be unavailable.\")\n\n    papers = data.get(\"recommendedPapers\") or []\n    if not papers:\n        return {\n            \"formatted\": \"No recommendations found.\",\n            \"totalResults\": 0,\n            \"resultsShared\": 0,\n        }\n\n    title = f\"Recommended papers based on {arxiv_id or positive_ids}\"\n    return {\n        \"formatted\": _format_s2_paper_list(papers[:limit], title),\n        \"totalResults\": len(papers),\n        \"resultsShared\": min(limit, len(papers)),\n    }\n\n\n# ---------------------------------------------------------------------------\n# Operation dispatch\n# ---------------------------------------------------------------------------\n\n_OPERATIONS = {\n    \"trending\": _op_trending,\n    \"search\": _op_search,\n    \"paper_details\": _op_paper_details,\n    \"read_paper\": _op_read_paper,\n    \"citation_graph\": _op_citation_graph,\n    \"snippet_search\": _op_snippet_search,\n    \"recommend\": _op_recommend,\n    \"find_datasets\": _op_find_datasets,\n    \"find_models\": _op_find_models,\n    \"find_collections\": _op_find_collections,\n    \"find_all_resources\": _op_find_all_resources,\n}\n\n\n# ---------------------------------------------------------------------------\n# Tool spec + handler\n# ---------------------------------------------------------------------------\n\nHF_PAPERS_TOOL_SPEC = {\n    \"name\": \"hf_papers\",\n    \"description\": (\n        \"Discover ML research papers, analyze citations, search paper contents, and find linked resources.\\n\\n\"\n        \"Combines HuggingFace Hub, arXiv, and Semantic Scholar. Use for exploring research areas, \"\n        \"finding datasets for a task, tracing citation chains, or implementing a paper's approach.\\n\\n\"\n        \"Typical flows:\\n\"\n        \"  search → read_paper → find_all_resources → hf_inspect_dataset\\n\"\n        \"  search → paper_details → citation_graph → read_paper (trace influence)\\n\"\n        \"  snippet_search → paper_details → read_paper (find specific claims)\\n\\n\"\n        \"Operations:\\n\"\n        \"- trending: Get trending daily papers, optionally filter by topic keyword\\n\"\n        \"- search: Search papers. Uses HF by default (ML-tuned). Add date_from/min_citations/categories to use Semantic Scholar with filters\\n\"\n        \"- paper_details: Metadata, abstract, AI summary, github link\\n\"\n        \"- read_paper: Read paper contents — without section: abstract + TOC; with section: full text\\n\"\n        \"- citation_graph: Get references and citations for a paper with influence flags and citation intents\\n\"\n        \"- snippet_search: Semantic search over full-text passages from 12M+ papers\\n\"\n        \"- recommend: Find similar papers (single paper or positive/negative examples)\\n\"\n        \"- find_datasets: Find datasets linked to a paper\\n\"\n        \"- find_models: Find models linked to a paper\\n\"\n        \"- find_collections: Find collections that include a paper\\n\"\n        \"- find_all_resources: Parallel fetch of datasets + models + collections for a paper\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"operation\": {\n                \"type\": \"string\",\n                \"enum\": list(_OPERATIONS.keys()),\n                \"description\": \"Operation to execute.\",\n            },\n            \"query\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Search query. Required for: search, snippet_search. \"\n                    \"Optional for: trending (filters by keyword). \"\n                    \"Supports boolean syntax for Semantic Scholar: '\\\"exact phrase\\\" term1 | term2'.\"\n                ),\n            },\n            \"arxiv_id\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"ArXiv paper ID (e.g. '2305.18290'). \"\n                    \"Required for: paper_details, read_paper, citation_graph, find_datasets, find_models, find_collections, find_all_resources. \"\n                    \"Optional for: recommend (single-paper recs). Get IDs from search results first.\"\n                ),\n            },\n            \"section\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Section name or number to read (e.g. '3', 'Experiments', '4.2'). \"\n                    \"Optional for: read_paper. Without this, returns abstract + TOC.\"\n                ),\n            },\n            \"direction\": {\n                \"type\": \"string\",\n                \"enum\": [\"citations\", \"references\", \"both\"],\n                \"description\": \"Direction for citation_graph. Default: both.\",\n            },\n            \"date\": {\n                \"type\": \"string\",\n                \"description\": \"Date in YYYY-MM-DD format. Optional for: trending (defaults to recent papers).\",\n            },\n            \"date_from\": {\n                \"type\": \"string\",\n                \"description\": \"Start date (YYYY-MM-DD). Triggers Semantic Scholar search. For: search, snippet_search.\",\n            },\n            \"date_to\": {\n                \"type\": \"string\",\n                \"description\": \"End date (YYYY-MM-DD). Triggers Semantic Scholar search. For: search, snippet_search.\",\n            },\n            \"categories\": {\n                \"type\": \"string\",\n                \"description\": \"Field of study filter (e.g. 'Computer Science'). Triggers Semantic Scholar search.\",\n            },\n            \"min_citations\": {\n                \"type\": \"integer\",\n                \"description\": \"Minimum citation count filter. Triggers Semantic Scholar search.\",\n            },\n            \"sort_by\": {\n                \"type\": \"string\",\n                \"enum\": [\"relevance\", \"citationCount\", \"publicationDate\"],\n                \"description\": \"Sort order for Semantic Scholar search. Default: relevance.\",\n            },\n            \"positive_ids\": {\n                \"type\": \"string\",\n                \"description\": \"Comma-separated arxiv IDs for multi-paper recommendations. For: recommend.\",\n            },\n            \"negative_ids\": {\n                \"type\": \"string\",\n                \"description\": \"Comma-separated arxiv IDs as negative examples. For: recommend.\",\n            },\n            \"sort\": {\n                \"type\": \"string\",\n                \"enum\": [\"downloads\", \"likes\", \"trending\"],\n                \"description\": (\n                    \"Sort order for find_datasets and find_models. Default: downloads.\"\n                ),\n            },\n            \"limit\": {\n                \"type\": \"integer\",\n                \"description\": \"Maximum results to return (default: 10, max: 50).\",\n            },\n        },\n        \"required\": [\"operation\"],\n    },\n}\n\n\nasync def hf_papers_handler(arguments: dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router.\"\"\"\n    operation = arguments.get(\"operation\")\n    if not operation:\n        return \"'operation' parameter is required.\", False\n\n    handler = _OPERATIONS.get(operation)\n    if not handler:\n        valid = \", \".join(_OPERATIONS.keys())\n        return f\"Unknown operation: '{operation}'. Valid: {valid}\", False\n\n    limit = min(arguments.get(\"limit\", DEFAULT_LIMIT), MAX_LIMIT)\n\n    try:\n        result = await handler(arguments, limit)\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except httpx.HTTPStatusError as e:\n        return f\"API error: {e.response.status_code} — {e.response.text[:200]}\", False\n    except httpx.RequestError as e:\n        return f\"Request error: {e}\", False\n    except Exception as e:\n        return f\"Error in {operation}: {e}\", False\n"
  },
  {
    "path": "agent/tools/plan_tool.py",
    "content": "from typing import Any, Dict, List\n\nfrom agent.core.session import Event\nfrom agent.utils.terminal_display import format_plan_tool_output\n\nfrom .types import ToolResult\n\n# In-memory storage for the current plan (raw structure from agent)\n_current_plan: List[Dict[str, str]] = []\n\n\nclass PlanTool:\n    \"\"\"Tool for managing a list of todos with status tracking.\"\"\"\n\n    def __init__(self, session: Any = None):\n        self.session = session\n\n    async def execute(self, params: Dict[str, Any]) -> ToolResult:\n        \"\"\"\n        Execute the WritePlan operation.\n\n        Args:\n            params: Dictionary containing:\n                - todos: List of todo items, each with id, content, and status\n\n        Returns:\n            ToolResult with formatted output\n        \"\"\"\n        global _current_plan\n\n        todos = params.get(\"todos\", [])\n\n        # Validate todos structure\n        for todo in todos:\n            if not isinstance(todo, dict):\n                return {\n                    \"formatted\": \"Error: Each todo must be an object. Re call the tool with correct format (mandatory).\",\n                    \"isError\": True,\n                }\n\n            required_fields = [\"id\", \"content\", \"status\"]\n            for field in required_fields:\n                if field not in todo:\n                    return {\n                        \"formatted\": f\"Error: Todo missing required field '{field}'. Re call the tool with correct format (mandatory).\",\n                        \"isError\": True,\n                    }\n\n            # Validate status\n            valid_statuses = [\"pending\", \"in_progress\", \"completed\"]\n            if todo[\"status\"] not in valid_statuses:\n                return {\n                    \"formatted\": f\"Error: Invalid status '{todo['status']}'. Must be one of: {', '.join(valid_statuses)}. Re call the tool with correct format (mandatory).\",\n                    \"isError\": True,\n                }\n\n        # Store the raw todos structure in memory\n        _current_plan = todos\n\n        # Emit plan update event if session is available\n        if self.session:\n            await self.session.send_event(\n                Event(\n                    event_type=\"plan_update\",\n                    data={\"plan\": todos},\n                )\n            )\n\n        # Format only for display using terminal_display utility\n        formatted_output = format_plan_tool_output(todos)\n\n        return {\n            \"formatted\": formatted_output,\n            \"totalResults\": len(todos),\n            \"isError\": False,\n        }\n\n\ndef get_current_plan() -> List[Dict[str, str]]:\n    \"\"\"Get the current plan (raw structure).\"\"\"\n    return _current_plan\n\n\n# Tool specification\nPLAN_TOOL_SPEC = {\n    \"name\": \"plan_tool\",\n    \"description\": (\n        \"Track progress on multi-step tasks with a todo list (pending/in_progress/completed).\\n\\n\"\n        \"Use for tasks with 3+ steps. Each call replaces the entire plan (send full list).\\n\\n\"\n        \"Rules: exactly ONE task in_progress at a time. Mark completed immediately after finishing. \"\n        \"Only mark completed when the task fully succeeded — keep in_progress if there are errors. \"\n        \"Update frequently so the user sees progress.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"todos\": {\n                \"type\": \"array\",\n                \"description\": \"List of todo items\",\n                \"items\": {\n                    \"type\": \"object\",\n                    \"properties\": {\n                        \"id\": {\n                            \"type\": \"string\",\n                            \"description\": \"Unique identifier for the todo\",\n                        },\n                        \"content\": {\n                            \"type\": \"string\",\n                            \"description\": \"Description of the todo task\",\n                        },\n                        \"status\": {\n                            \"type\": \"string\",\n                            \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n                            \"description\": \"Current status of the todo\",\n                        },\n                    },\n                    \"required\": [\"id\", \"content\", \"status\"],\n                },\n            }\n        },\n        \"required\": [\"todos\"],\n    },\n}\n\n\nasync def plan_tool_handler(\n    arguments: Dict[str, Any], session: Any = None\n) -> tuple[str, bool]:\n    tool = PlanTool(session=session)\n    result = await tool.execute(arguments)\n    return result[\"formatted\"], not result.get(\"isError\", False)\n"
  },
  {
    "path": "agent/tools/private_hf_repo_tools.py",
    "content": "\"\"\"\nPrivate HF Repos Tool - Manage private Hugging Face repositories\n\nPRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.\nSince job results are ephemeral, this tool provides persistent storage in private repos.\n\nSECONDARY USE: Read back stored files and list repo contents.\n\"\"\"\n\nimport asyncio\nfrom typing import Any, Dict, Literal, Optional\n\nfrom huggingface_hub import HfApi, hf_hub_download\nfrom huggingface_hub.utils import HfHubHTTPError\n\nfrom agent.tools.types import ToolResult\n\n# Operation names\nOperationType = Literal[\n    \"upload_file\", \"create_repo\", \"check_repo\", \"list_files\", \"read_file\"\n]\n\n\nasync def _async_call(func, *args, **kwargs):\n    \"\"\"Wrap synchronous HfApi calls for async context.\"\"\"\n    return await asyncio.to_thread(func, *args, **kwargs)\n\n\ndef _build_repo_url(repo_id: str, repo_type: str = \"dataset\") -> str:\n    \"\"\"Build the Hub URL for a repository.\"\"\"\n    type_path = \"\" if repo_type == \"model\" else f\"{repo_type}s\"\n    return f\"https://huggingface.co/{type_path}/{repo_id}\".replace(\"//\", \"/\")\n\n\ndef _content_to_bytes(content: str | bytes) -> bytes:\n    \"\"\"Convert string or bytes content to bytes.\"\"\"\n    if isinstance(content, str):\n        return content.encode(\"utf-8\")\n    return content\n\n\nclass PrivateHfRepoTool:\n    \"\"\"Tool for managing private Hugging Face repositories.\"\"\"\n\n    def __init__(self, hf_token: Optional[str] = None):\n        self.api = HfApi(token=hf_token)\n\n    async def execute(self, params: Dict[str, Any]) -> ToolResult:\n        \"\"\"Execute the specified upload operation.\"\"\"\n        operation = params.get(\"operation\")\n        args = params.get(\"args\", {})\n\n        # If no operation provided, return usage instructions\n        if not operation:\n            return self._show_help()\n\n        # Normalize operation name\n        operation = operation.lower()\n\n        # Check if help is requested\n        if args.get(\"help\"):\n            return self._show_operation_help(operation)\n\n        try:\n            # Route to appropriate handler\n            if operation == \"upload_file\":\n                return await self._upload_file(args)\n            elif operation == \"create_repo\":\n                return await self._create_repo(args)\n            elif operation == \"check_repo\":\n                return await self._check_repo(args)\n            elif operation == \"list_files\":\n                return await self._list_files(args)\n            elif operation == \"read_file\":\n                return await self._read_file(args)\n            else:\n                return {\n                    \"formatted\": f'Unknown operation: \"{operation}\"\\n\\n'\n                    \"Available operations: upload_file, create_repo, check_repo, list_files, read_file\\n\\n\"\n                    \"Call this tool with no operation for full usage instructions.\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n        except HfHubHTTPError as e:\n            return {\n                \"formatted\": f\"API Error: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n        except Exception as e:\n            return {\n                \"formatted\": f\"Error executing {operation}: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    def _show_help(self) -> ToolResult:\n        \"\"\"Show usage instructions when tool is called with no arguments.\"\"\"\n        usage_text = \"\"\"# Private HF Repos Tool\n\n**PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.\nSince job results are ephemeral, use this tool for persistent storage.\n\n**SECONDARY USE:** Read back stored files and list repo contents.\n\n## Available Commands\n\n### Write Operations\n- **upload_file** - Upload file content to a repository\n- **create_repo** - Create a new private repository\n\n### Read Operations\n- **list_files** - List all files in a repository\n- **read_file** - Read content of a specific file from a repository\n- **check_repo** - Check if a repository exists\n\n## Examples\n\n### Upload a script to a dataset repo\nCall this tool with:\n```json\n{\n  \"operation\": \"upload_file\",\n  \"args\": {\n    \"file_content\": \"import pandas as pd\\\\nprint('Hello from HF!')\",\n    \"path_in_repo\": \"scripts/hello.py\",\n    \"repo_id\": \"my-dataset\",\n    \"repo_type\": \"dataset\",\n    \"create_if_missing\": true,\n    \"commit_message\": \"Add hello script\"\n  }\n}\n```\n\n### Upload logs from a job\nCall this tool with:\n```json\n{\n  \"operation\": \"upload_file\",\n  \"args\": {\n    \"file_content\": \"Job started...\\\\nJob completed successfully!\",\n    \"path_in_repo\": \"jobs/job-abc123/logs.txt\",\n    \"repo_id\": \"job-results\",\n    \"create_if_missing\": true\n  }\n}\n```\n\n### Create a repository\nCall this tool with:\n```json\n{\n  \"operation\": \"create_repo\",\n  \"args\": {\n    \"repo_id\": \"my-results\",\n    \"repo_type\": \"dataset\"\n  }\n}\n```\n\n### Create a Space\nCall this tool with:\n```json\n{\n  \"operation\": \"create_repo\",\n  \"args\": {\n    \"repo_id\": \"my-gradio-app\",\n    \"repo_type\": \"space\",\n    \"space_sdk\": \"gradio\"\n  }\n}\n```\nNote: Repositories are always created as private. For spaces, `space_sdk` is required (gradio, streamlit, static, or docker).\n\n### Check if a repository exists\nCall this tool with:\n```json\n{\n  \"operation\": \"check_repo\",\n  \"args\": {\n    \"repo_id\": \"my-dataset\",\n    \"repo_type\": \"dataset\"\n  }\n}\n```\n\n### List files in a repository\nCall this tool with:\n```json\n{\n  \"operation\": \"list_files\",\n  \"args\": {\n    \"repo_id\": \"job-results\",\n    \"repo_type\": \"dataset\"\n  }\n}\n```\n\n### Read a file from a repository\nCall this tool with:\n```json\n{\n  \"operation\": \"read_file\",\n  \"args\": {\n    \"repo_id\": \"job-results\",\n    \"path_in_repo\": \"jobs/job-abc123/script.py\",\n    \"repo_type\": \"dataset\"\n  }\n}\n```\n\n## Repository Types\n\n- **dataset** (default) - For storing data, results, logs, scripts\n- **model** - For ML models and related artifacts\n- **space** - For Spaces and applications\n\n## Tips\n\n- **Content-based**: Pass file content directly as strings or bytes, not file paths\n- **Repo ID format**: Use just the repo name (e.g., \"my-dataset\"). Username is automatically inferred from HF_TOKEN\n- **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)\n- **Organization**: Use path_in_repo to organize files (e.g., \"jobs/job-123/script.py\")\n- **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility\n\"\"\"\n        return {\"formatted\": usage_text, \"totalResults\": 1, \"resultsShared\": 1}\n\n    def _show_operation_help(self, operation: str) -> ToolResult:\n        \"\"\"Show help for a specific operation.\"\"\"\n        help_text = f\"Help for operation: {operation}\\n\\nCall with appropriate arguments. Use the main help for examples.\"\n        return {\"formatted\": help_text, \"totalResults\": 1, \"resultsShared\": 1}\n\n    async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Upload file content to a Hub repository.\"\"\"\n        # Validate required arguments\n        file_content = args.get(\"file_content\")\n        path_in_repo = args.get(\"path_in_repo\")\n        repo_id = args.get(\"repo_id\")\n\n        if not file_content:\n            return {\n                \"formatted\": \"file_content is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        if not path_in_repo:\n            return {\n                \"formatted\": \"path_in_repo is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        if not repo_id:\n            return {\n                \"formatted\": \"repo_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        repo_type = args.get(\"repo_type\", \"dataset\")\n        create_if_missing = args.get(\"create_if_missing\", False)\n\n        # Check if repo exists\n        try:\n            repo_exists = await _async_call(\n                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type\n            )\n\n            # Create repo if needed\n            if not repo_exists and create_if_missing:\n                create_args = {\n                    \"repo_id\": repo_id,\n                    \"repo_type\": repo_type,\n                    \"private\": True,\n                }\n                # Pass through space_sdk if provided (required for spaces)\n                if \"space_sdk\" in args:\n                    create_args[\"space_sdk\"] = args[\"space_sdk\"]\n                await self._create_repo(create_args)\n            elif not repo_exists:\n                return {\n                    \"formatted\": f\"Repository {repo_id} does not exist. Set create_if_missing: true to create it.\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to check repository: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        # Convert content to bytes\n        file_bytes = _content_to_bytes(file_content)\n\n        # Upload file\n        try:\n            await _async_call(\n                self.api.upload_file,\n                path_or_fileobj=file_bytes,\n                path_in_repo=path_in_repo,\n                repo_id=repo_id,\n                repo_type=repo_type,\n                commit_message=args.get(\"commit_message\", f\"Upload {path_in_repo}\"),\n            )\n\n            repo_url = _build_repo_url(repo_id, repo_type)\n            file_url = f\"{repo_url}/blob/main/{path_in_repo}\"\n\n            response = f\"\"\"✓ File uploaded successfully!\n\n**Repository:** {repo_id}\n**File:** {path_in_repo}\n**View at:** {file_url}\n**Browse repo:** {repo_url}\"\"\"\n\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to upload file: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Create a new Hub repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return {\n                \"formatted\": \"repo_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        repo_type = args.get(\"repo_type\", \"dataset\")\n        private = True  # Always create private repos\n        space_sdk = args.get(\"space_sdk\")  # Required if repo_type is \"space\"\n\n        try:\n            # Check if repo already exists\n            repo_exists = await _async_call(\n                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type\n            )\n\n            if repo_exists:\n                repo_url = _build_repo_url(repo_id, repo_type)\n                return {\n                    \"formatted\": f\"Repository {repo_id} already exists.\\n**View at:** {repo_url}\",\n                    \"totalResults\": 1,\n                    \"resultsShared\": 1,\n                }\n\n            # Validate space_sdk for spaces\n            if repo_type == \"space\" and not space_sdk:\n                return {\n                    \"formatted\": \"space_sdk is required when creating a space. Valid values: gradio, streamlit, static, docker\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n\n            # Create repository\n            create_kwargs = {\n                \"repo_id\": repo_id,\n                \"repo_type\": repo_type,\n                \"private\": private,\n                \"exist_ok\": True,\n            }\n            # Add space_sdk only for spaces\n            if repo_type == \"space\" and space_sdk:\n                create_kwargs[\"space_sdk\"] = space_sdk\n\n            repo_url = await _async_call(self.api.create_repo, **create_kwargs)\n\n            response = f\"\"\"✓ Repository created successfully!\n\n**Repository:** {repo_id}\n**Type:** {repo_type}\n**Private:** Yes\n**View at:** {repo_url}\"\"\"\n\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to create repository: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Check if a Hub repository exists.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return {\n                \"formatted\": \"repo_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        repo_type = args.get(\"repo_type\", \"dataset\")\n\n        try:\n            repo_exists = await _async_call(\n                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type\n            )\n\n            if repo_exists:\n                repo_url = _build_repo_url(repo_id, repo_type)\n                response = f\"\"\"✓ Repository exists!\n\n**Repository:** {repo_id}\n**Type:** {repo_type}\n**View at:** {repo_url}\"\"\"\n            else:\n                response = f\"\"\"Repository does not exist: {repo_id}\n\nTo create it, call this tool with:\n```json\n{{\n  \"operation\": \"create_repo\",\n  \"args\": {{\n    \"repo_id\": \"{repo_id}\",\n    \"repo_type\": \"{repo_type}\"\n  }}\n}}\n```\"\"\"\n\n            return {\n                \"formatted\": response,\n                \"totalResults\": 1 if repo_exists else 0,\n                \"resultsShared\": 1 if repo_exists else 0,\n            }\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to check repository: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    async def _list_files(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"List all files in a Hub repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n\n        if not repo_id:\n            return {\n                \"formatted\": \"repo_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        repo_type = args.get(\"repo_type\", \"dataset\")\n\n        try:\n            # List all files in the repository\n            files = await _async_call(\n                self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type\n            )\n\n            if not files:\n                return {\n                    \"formatted\": f\"No files found in repository: {repo_id}\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                }\n\n            # Format file list\n            file_list = \"\\n\".join(f\"- {f}\" for f in sorted(files))\n            repo_url = _build_repo_url(repo_id, repo_type)\n\n            response = f\"\"\"✓ Files in repository: {repo_id}\n\n**Total files:** {len(files)}\n**Repository URL:** {repo_url}\n\n**Files:**\n{file_list}\"\"\"\n\n            return {\n                \"formatted\": response,\n                \"totalResults\": len(files),\n                \"resultsShared\": len(files),\n            }\n\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to list files: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n    async def _read_file(self, args: Dict[str, Any]) -> ToolResult:\n        \"\"\"Read content of a specific file from a Hub repository.\"\"\"\n        repo_id = args.get(\"repo_id\")\n        path_in_repo = args.get(\"path_in_repo\")\n\n        if not repo_id:\n            return {\n                \"formatted\": \"repo_id is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        if not path_in_repo:\n            return {\n                \"formatted\": \"path_in_repo is required\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n        repo_type = args.get(\"repo_type\", \"dataset\")\n\n        try:\n            # Download file to cache and read it\n            file_path = await _async_call(\n                hf_hub_download,\n                repo_id=repo_id,\n                filename=path_in_repo,\n                repo_type=repo_type,\n                token=self.api.token,\n            )\n\n            # Read file content\n            with open(file_path, \"r\", encoding=\"utf-8\") as f:\n                content = f.read()\n\n            repo_url = _build_repo_url(repo_id, repo_type)\n            file_url = f\"{repo_url}/blob/main/{path_in_repo}\"\n\n            response = f\"\"\"✓ File read successfully!\n\n**Repository:** {repo_id}\n**File:** {path_in_repo}\n**Size:** {len(content)} characters\n**View at:** {file_url}\n\n**Content:**\n```\n{content}\n```\"\"\"\n\n            return {\"formatted\": response, \"totalResults\": 1, \"resultsShared\": 1}\n\n        except UnicodeDecodeError:\n            # If file is binary, return size info instead\n            try:\n                with open(file_path, \"rb\") as f:\n                    binary_content = f.read()\n\n                return {\n                    \"formatted\": f\"File is binary ({len(binary_content)} bytes). Cannot display as text.\",\n                    \"totalResults\": 1,\n                    \"resultsShared\": 1,\n                }\n            except Exception as e:\n                return {\n                    \"formatted\": f\"Failed to read binary file: {str(e)}\",\n                    \"totalResults\": 0,\n                    \"resultsShared\": 0,\n                    \"isError\": True,\n                }\n        except Exception as e:\n            return {\n                \"formatted\": f\"Failed to read file: {str(e)}\",\n                \"totalResults\": 0,\n                \"resultsShared\": 0,\n                \"isError\": True,\n            }\n\n\n# Tool specification for agent registration\nPRIVATE_HF_REPO_TOOL_SPEC = {\n    \"name\": \"hf_private_repos\",\n    \"description\": (\n        \"Manage private HF repositories - create, upload, read, list files in models/datasets/spaces. \"\n        \"⚠️ PRIMARY USE: Store job outputs persistently (job storage is EPHEMERAL - everything deleted after completion). \"\n        \"**Use when:** (1) Job completes and need to store logs/scripts/results, (2) Creating repos for training outputs, \"\n        \"(3) Reading back stored files, (4) Managing Space files, (5) Organizing job artifacts by path. \"\n        \"**Pattern:** hf_jobs (ephemeral) → hf_private_repos upload_file (persistent) → can read_file later. \"\n        \"ALWAYS pass file_content as string/bytes (✓), never file paths (✗) - this is content-based, no filesystem access. \"\n        \"**Operations:** create_repo (new private repo), upload_file (store content), read_file (retrieve content), list_files (browse), check_repo (verify exists). \"\n        \"**Critical for reliability:** Jobs lose all files after completion - use this tool to preserve important outputs. \"\n        \"Repositories created are ALWAYS private by default (good for sensitive training data/models). \"\n        \"For Spaces: must provide space_sdk ('gradio', 'streamlit', 'static', 'docker') when creating. \"\n        \"**Then:** After uploading, provide user with repository URL for viewing/sharing.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"operation\": {\n                \"type\": \"string\",\n                \"enum\": [\n                    \"upload_file\",\n                    \"create_repo\",\n                    \"check_repo\",\n                    \"list_files\",\n                    \"read_file\",\n                ],\n                \"description\": (\n                    \"Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]\"\n                ),\n            },\n            \"args\": {\n                \"type\": \"object\",\n                \"description\": (\n                    \"Operation-specific arguments as a JSON object. \"\n                    \"Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), \"\n                    \"repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string), \"\n                    \"space_sdk (gradio/streamlit/static/docker - required when repo_type=space). \"\n                    \"Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional).\"\n                ),\n                \"additionalProperties\": True,\n            },\n        },\n    },\n}\n\n\nasync def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:\n    \"\"\"Handler for agent tool router.\"\"\"\n    try:\n        tool = PrivateHfRepoTool()\n        result = await tool.execute(arguments)\n        return result[\"formatted\"], not result.get(\"isError\", False)\n    except Exception as e:\n        return f\"Error executing Private HF Repo tool: {str(e)}\", False\n"
  },
  {
    "path": "agent/tools/research_tool.py",
    "content": "\"\"\"\nResearch subagent tool — spawns a cheap LLM call with a focused\nresearch task and returns a summary. The subagent gets its own\nindependent context (not the main conversation), so research\nwork doesn't pollute the main agent's context window.\n\nInspired by claude-code's code-explorer agent pattern.\n\"\"\"\n\nimport json\nimport logging\nfrom typing import Any\n\nfrom litellm import Message, acompletion\n\nfrom agent.core.doom_loop import check_for_doom_loop\nfrom agent.core.llm_params import _resolve_llm_params\nfrom agent.core.prompt_caching import with_prompt_caching\nfrom agent.core.session import Event\n\nlogger = logging.getLogger(__name__)\n\n# Context budget for the research subagent (tokens).\n# When usage exceeds WARN threshold, the subagent is told to wrap up.\n# At MAX, the loop is force-stopped and whatever content exists is returned.\n_RESEARCH_CONTEXT_WARN = 170_000  # 85% of 200k\n_RESEARCH_CONTEXT_MAX = 190_000\n\n# Tools the research agent can use (read-only subset)\nRESEARCH_TOOL_NAMES = {\n    \"read\",\n    \"bash\",\n    \"explore_hf_docs\",\n    \"fetch_hf_docs\",\n    \"find_hf_api\",\n    \"hf_papers\",\n    \"github_find_examples\",\n    \"github_list_repos\",\n    \"github_read_file\",\n    \"hf_inspect_dataset\",\n    \"hf_repo_files\",\n}\n\nRESEARCH_SYSTEM_PROMPT = \"\"\"\\\nYou are a research sub-agent for an ML engineering assistant.\nYour primary job: mine the literature to find the best training recipes —\nthen back them up with working code and up to date documantation. The main agent will use\nyour findings to implement the actual solution.\n\n# Start from the literature\n\nYour default approach is a deep literature crawl. Do not start from docs or\nexample scripts — start from papers. Papers contain the results, and results\ntell you what actually works.\n\n## The crawl\n\n1. **Find anchor papers**: Search for the task/domain. Identify the landmark paper(s) — high citations, recent, or both.\n2. **Crawl the citation graph**: Use `citation_graph` on the anchor paper(s). Look DOWNSTREAM (papers that cite it) — these are the ones that built on it, improved it, or applied it to new domains. Prioritize recent papers and papers with many citations.\n3. **Read methodology sections**: For the most promising papers (strong results, recent, relevant), use `read_paper` with section parameter to read sections 3, 4, 5 (Methodology, Experiments, Results — not the abstract). Extract:\n   - The exact dataset(s) used (name, source, size, any filtering/preprocessing)\n   - The training method and configuration (optimizer, lr, schedule, epochs, batch size)\n   - The results those choices produced (benchmark scores, metrics, comparisons)\n4. **Attribute results to recipes**: This is the critical step. Every finding must link a RESULT to the RECIPE that produced it. \"Dataset X + method Y + lr Z → score W on benchmark V\" is useful. \"They used SFT\" is not.\n5. **Validate datasets**: For the most promising datasets, check if they exist on HF Hub with `hf_inspect_dataset`. Verify format matches the training method. Report if doesnt.\n6. **Find code**: Now find working implementation code via `github_find_examples` and `github_read_file`. Use docs (`explore_hf_docs`, `fetch_hf_docs`) to fill in API details.\n\n## When to go deeper\n\n- If the anchor paper is old (>1 year), its citation graph is your main source — the downstream papers will have better methods.\n- If a downstream paper reports significantly better results, crawl ITS citation graph too.\n- Use `snippet_search` to find specific claims across papers (e.g., \"does dataset X consistently outperform Y for this task?\").\n- Use `recommend` to find related papers the citation graph might miss.\n\n# How to use your tools\n\n## Papers & citations (USE FIRST)\n- `hf_papers(operation=\"search\", query=...)`: Search papers (HF-tuned for ML)\n- `hf_papers(operation=\"search\", query=..., min_citations=50, sort_by=\"citationCount\")`: Find highly-cited papers via Semantic Scholar\n- `hf_papers(operation=\"search\", query=..., date_from=\"2024-01-01\")`: Search with date filter\n- `hf_papers(operation=\"paper_details\", arxiv_id=...)`: Metadata, citations, TL;DR\n- `hf_papers(operation=\"citation_graph\", arxiv_id=...)`: References + citations with influence flags and intents\n- `hf_papers(operation=\"read_paper\", arxiv_id=..., section=\"3\")`: Read a specific section's full text\n- `hf_papers(operation=\"read_paper\", arxiv_id=...)`: Get TOC (abstract + section list) — use this to find which section numbers contain methodology/experiments\n- `hf_papers(operation=\"snippet_search\", query=...)`: Semantic search across 12M+ full-text paper passages\n- `hf_papers(operation=\"recommend\", arxiv_id=...)`: Find related papers\n- `hf_papers(operation=\"find_datasets\", arxiv_id=...)`: Find HF datasets linked to a paper\n- `hf_papers(operation=\"find_all_resources\", arxiv_id=...)`: Datasets + models + collections for a paper\n\n## Dataset inspection\n- `hf_inspect_dataset`: Check dataset schema, splits, sample rows\n  CRITICAL for training: verify column format matches training method:\n  - SFT: needs \"messages\", \"text\", or \"prompt\"/\"completion\"\n  - DPO: needs \"prompt\", \"chosen\", \"rejected\"\n  - GRPO: needs \"prompt\" only\n\n## GitHub code research\n- `github_find_examples`: Find working example scripts in HF repos (trl, transformers, etc.)\n- `github_read_file`: Read the actual implementation code. Use line_start/line_end for large files.\n\n## Documentation\n- `explore_hf_docs(endpoint)`: Search docs for a library. Endpoints: trl, transformers, datasets, peft, accelerate, trackio, vllm, inference-endpoints, etc.\n- `fetch_hf_docs(url)`: Fetch full page content from explore results\n- `find_hf_api(query=..., tag=...)`: Find REST API endpoints\n\n## Hub repo inspection\n- `hf_repo_files`: List/read files in any HF repo (model, dataset, space)\n\n# Correct research pattern\n\n```\n# 1. Find anchor paper(s) for the task\nhf_papers({\"operation\": \"search\", \"query\": \"GPQA graduate questions\", \"sort_by\": \"citationCount\"})\n\n# 2. Crawl citation graph — look downstream\nhf_papers({\"operation\": \"citation_graph\", \"arxiv_id\": \"2311.12022\", \"direction\": \"citations\"})\n\n# 3. Read methodology of promising downstream papers\nhf_papers({\"operation\": \"read_paper\", \"arxiv_id\": \"2604.01348\"})  # TOC first\nhf_papers({\"operation\": \"read_paper\", \"arxiv_id\": \"2604.01348\", \"section\": \"3\"})  # Methodology\nhf_papers({\"operation\": \"read_paper\", \"arxiv_id\": \"2604.01348\", \"section\": \"4\"})  # Experiments\n\n# 4. Find datasets used by these papers\nhf_papers({\"operation\": \"find_datasets\", \"arxiv_id\": \"2604.01348\"})\nhf_papers({\"operation\": \"find_all_resources\", \"arxiv_id\": \"2604.01348\"})\n\n# 5. Validate datasets exist and have correct format\nhf_inspect_dataset({\"dataset\": \"org/dataset-name\", \"split\": \"train\", \"sample_rows\": 3})\n\n# 6. Now get working code for the training method\ngithub_find_examples({\"repo\": \"trl\", \"keyword\": \"sft\"})\ngithub_read_file({\"repo\": \"huggingface/trl\", \"path\": \"examples/scripts/sft.py\"})\nexplore_hf_docs(\"trl\")\n```\n\n# Output format\n\n\n\nYour output MUST be structured as a ranked list of training recipes, each attributed to published results:\n\n## Recipe table (REQUIRED)\nFor each promising approach found, report:\n- **Paper**: title, arxiv_id, date, venue\n- **Result**: exact benchmark scores and what they were measured on\n- **Dataset(s)**: name, size, source, HF Hub availability, format verified (yes/no)\n- **Method**: training approach, key hyperparameters (lr, epochs, batch size, optimizer, schedule)\n- **What made it work**: the specific insight or trick that drove the result (data curation, curriculum, loss function, etc.)\n\nRank recipes by result quality. The main agent will pick the best one that's feasible.\n\n## Code patterns\n- Key imports, configurations, and usage patterns from working examples\n- Specific file paths, URLs, function names from docs\n\n## Recommendations\n- Which recipe to implement first and why\n- What datasets to use (with HF Hub paths, verified)\n- Any gaps: datasets that need preprocessing, methods that need adaptation\n\nAdditionally include:\n- **SOTA landscape**: Current best models, datasets, and methods for the task (from recent papers). Flag anything outdated.\n- **Essential references**: Specific file paths, URLs, function names, doc sections, code snippets\n  that the main agent should use directly\n- **Code patterns**: Key imports, configurations, and usage patterns from working examples\n\nBe concise. Your output goes into another agent's context — every token counts.\nAim for 500-1500 words max. Include actual code snippets from examples you read,\nnot paraphrased descriptions.\n\"\"\"\n\nRESEARCH_TOOL_SPEC = {\n    \"name\": \"research\",\n    \"description\": (\n        \"Spawn a research sub-agent to explore documentation, codebases, \"\n        \"or repos WITHOUT polluting the main conversation context. \"\n        \"The sub-agent gets its own independent context window with read-only \"\n        \"research tools and returns a concise summary of findings.\\n\\n\"\n        \"Use this for:\\n\"\n        \"- Researching current API usage before implementing ML tasks \"\n        \"(find examples + read docs)\\n\"\n        \"- Exploring HF docs, reading papers, analyzing GitHub repos\\n\"\n        \"- Any research where raw tool outputs would be too verbose\\n\\n\"\n        \"The sub-agent knows how to use github_find_examples, github_read_file, \"\n        \"explore_hf_docs, fetch_hf_docs, hf_inspect_dataset, hf_papers, etc. \"\n        \"Just describe what you need researched.\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"properties\": {\n            \"task\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Detailed description of what to research. Be specific: \"\n                    \"include library names, trainer types, dataset names, \"\n                    \"repo names, or doc pages to explore. Example: \"\n                    \"'Research current TRL SFTTrainer usage: find working \"\n                    \"example scripts, read the SFT documentation, and check \"\n                    \"SFTConfig parameters. Also validate that dataset \"\n                    \"HuggingFaceH4/ultrachat_200k has the right format for SFT.'\"\n                ),\n            },\n            \"context\": {\n                \"type\": \"string\",\n                \"description\": (\n                    \"Optional context from the current conversation that the \"\n                    \"research agent needs (e.g., what the user wants to build, \"\n                    \"constraints, what's been tried).\"\n                ),\n            },\n        },\n        \"required\": [\"task\"],\n    },\n}\n\n\ndef _get_research_model(main_model: str) -> str:\n    \"\"\"Pick a cheaper model for research based on the main model.\"\"\"\n    if \"anthropic\" in main_model:\n        return \"bedrock/us.anthropic.claude-sonnet-4-6\"\n    # For non-Anthropic models (HF router etc.), use the same model\n    return main_model\n\n\nasync def research_handler(\n    arguments: dict[str, Any], session=None, tool_call_id: str | None = None, **_kw\n) -> tuple[str, bool]:\n    \"\"\"Execute a research sub-agent with its own context.\"\"\"\n    task = arguments.get(\"task\", \"\")\n    context = arguments.get(\"context\", \"\")\n    if not task:\n        return \"No research task provided.\", False\n\n    if not session:\n        return \"No session available for research agent.\", False\n\n    # Build the sub-agent's messages (independent context)\n    messages: list[Message] = [\n        Message(role=\"system\", content=RESEARCH_SYSTEM_PROMPT),\n    ]\n\n    user_content = f\"Research task: {task}\"\n    if context:\n        user_content = f\"Context: {context}\\n\\n{user_content}\"\n    messages.append(Message(role=\"user\", content=user_content))\n\n    # Use a cheaper/faster model for research\n    main_model = session.config.model_name\n    research_model = _get_research_model(main_model)\n    # Research is a cheap sub-call — cap the main session's effort at \"high\"\n    # so a user preference of ``max``/``xhigh`` (valid for Opus 4.6/4.7) doesn't\n    # propagate to a Sonnet research model that may not accept those levels.\n    # We also haven't probed this sub-model so we don't know its ceiling.\n    _pref = getattr(session.config, \"reasoning_effort\", None)\n    _capped = \"high\" if _pref in (\"max\", \"xhigh\") else _pref\n    llm_params = _resolve_llm_params(\n        research_model,\n        getattr(session, \"hf_token\", None),\n        reasoning_effort=_capped,\n    )\n\n    # Get read-only tool specs from the session's tool router\n    tool_specs = [\n        spec\n        for spec in session.tool_router.get_tool_specs_for_llm()\n        if spec[\"function\"][\"name\"] in RESEARCH_TOOL_NAMES\n    ]\n\n    # Unique ID + short label so parallel agents show separate status lines.\n    # Use the tool_call_id when available — it's unique per invocation and lets\n    # the frontend match a research tool card to its agent state. Fall back to\n    # uuid for offline/test paths. Previously used md5(task), which collided\n    # when the same task string was researched in parallel.\n    if tool_call_id:\n        _agent_id = tool_call_id\n    else:\n        import uuid\n        _agent_id = uuid.uuid4().hex[:8]\n    _agent_label = \"research: \" + (task[:50] + \"…\" if len(task) > 50 else task)\n\n    async def _log(text: str) -> None:\n        \"\"\"Send a progress event to the UI so it doesn't look frozen.\"\"\"\n        try:\n            await session.send_event(\n                Event(event_type=\"tool_log\", data={\n                    \"tool\": \"research\",\n                    \"log\": text,\n                    \"agent_id\": _agent_id,\n                    \"label\": _agent_label,\n                })\n            )\n        except Exception:\n            pass\n\n    _tool_uses = 0\n    _total_tokens = 0\n    _warned_context = False\n\n    await _log(\"Starting research sub-agent...\")\n\n    # Run the research loop — context budget is the real limiter\n    max_iterations = 60\n    for _iteration in range(max_iterations):\n        # ── Doom-loop detection ──\n        doom_prompt = check_for_doom_loop(messages)\n        if doom_prompt:\n            logger.warning(\"Research sub-agent doom loop detected at iteration %d\", _iteration)\n            await _log(\"Doom loop detected — injecting corrective prompt\")\n            messages.append(Message(role=\"user\", content=doom_prompt))\n\n        # ── Context budget: warn at 75%, hard-stop at 95% ──\n        if _total_tokens >= _RESEARCH_CONTEXT_MAX:\n            logger.warning(\n                \"Research sub-agent hit context max (%d tokens) — forcing summary\",\n                _total_tokens,\n            )\n            await _log(f\"Context limit reached ({_total_tokens} tokens) — forcing wrap-up\")\n            # Ask for a final summary with no tools\n            messages.append(Message(\n                role=\"user\",\n                content=(\n                    \"[SYSTEM: CONTEXT LIMIT REACHED] You have used all available context. \"\n                    \"Summarize your findings NOW. Do NOT call any more tools.\"\n                ),\n            ))\n            try:\n                _msgs, _ = with_prompt_caching(messages, None, llm_params.get(\"model\"))\n                response = await acompletion(\n                    messages=_msgs,\n                    tools=None,  # no tools — force text response\n                    stream=False,\n                    timeout=120,\n                    **llm_params,\n                )\n                content = response.choices[0].message.content or \"\"\n                return content or \"Research context exhausted — no summary produced.\", bool(content)\n            except Exception:\n                return \"Research context exhausted and summary call failed.\", False\n\n        if not _warned_context and _total_tokens >= _RESEARCH_CONTEXT_WARN:\n            _warned_context = True\n            await _log(f\"Context at {_total_tokens} tokens — nudging to wrap up\")\n            messages.append(Message(\n                role=\"user\",\n                content=(\n                    \"[SYSTEM: You have used 75% of your context budget. \"\n                    \"Start wrapping up: finish any critical lookups, then \"\n                    \"produce your final summary within the next 1-2 iterations.]\"\n                ),\n            ))\n\n        try:\n            _msgs, _tools = with_prompt_caching(\n                messages, tool_specs if tool_specs else None, llm_params.get(\"model\")\n            )\n            response = await acompletion(\n                messages=_msgs,\n                tools=_tools,\n                tool_choice=\"auto\",\n                stream=False,\n                timeout=120,\n                **llm_params,\n            )\n        except Exception as e:\n            logger.error(\"Research sub-agent LLM error: %s\", e)\n            return f\"Research agent LLM error: {e}\", False\n\n        # Track tokens\n        if response.usage:\n            _total_tokens = response.usage.total_tokens\n            await _log(f\"tokens:{_total_tokens}\")\n\n        choice = response.choices[0]\n        msg = choice.message\n\n        # If no tool calls, we have our final answer\n        if not msg.tool_calls:\n            await _log(\"Research complete.\")\n            content = msg.content or \"Research completed but no summary generated.\"\n            return content, True\n\n        # Execute tool calls and add results.\n        # Rebuild the assistant message with only the wire-safe fields —\n        # LiteLLM's raw Message carries `provider_specific_fields` and\n        # `reasoning_content`, which the HF router's OpenAI schema rejects\n        # if we echo them back in the next request.\n        messages.append(Message(\n            role=\"assistant\",\n            content=msg.content,\n            tool_calls=msg.tool_calls,\n        ))\n        for tc in msg.tool_calls:\n            try:\n                tool_args = json.loads(tc.function.arguments)\n            except (json.JSONDecodeError, TypeError):\n                messages.append(\n                    Message(\n                        role=\"tool\",\n                        content=\"Invalid tool arguments.\",\n                        tool_call_id=tc.id,\n                        name=tc.function.name,\n                    )\n                )\n                continue\n\n            tool_name = tc.function.name\n            if tool_name not in RESEARCH_TOOL_NAMES:\n                messages.append(\n                    Message(\n                        role=\"tool\",\n                        content=f\"Tool '{tool_name}' not available for research.\",\n                        tool_call_id=tc.id,\n                        name=tool_name,\n                    )\n                )\n                continue\n\n            try:\n                import json as _json\n\n                args_str = _json.dumps(tool_args)[:80]\n                await _log(f\"▸ {tool_name}  {args_str}\")\n\n                output, _success = await session.tool_router.call_tool(\n                    tool_name, tool_args, session=session\n                )\n                _tool_uses += 1\n                await _log(f\"tools:{_tool_uses}\")\n                # Truncate tool output for the research context\n                if len(output) > 8000:\n                    output = output[:4800] + \"\\n...(truncated)...\\n\" + output[-3200:]\n            except Exception as e:\n                output = f\"Tool error: {e}\"\n\n            messages.append(\n                Message(\n                    role=\"tool\",\n                    content=output,\n                    tool_call_id=tc.id,\n                    name=tool_name,\n                )\n            )\n\n    # ── Iteration limit: try to salvage findings ──\n    await _log(\"Iteration limit reached — extracting summary\")\n    messages.append(Message(\n        role=\"user\",\n        content=(\n            \"[SYSTEM: ITERATION LIMIT] You have reached the maximum number of research \"\n            \"iterations. Summarize ALL findings so far. Do NOT call any more tools.\"\n        ),\n    ))\n    try:\n        _msgs, _ = with_prompt_caching(messages, None, llm_params.get(\"model\"))\n        response = await acompletion(\n            messages=_msgs,\n            tools=None,\n            stream=False,\n            timeout=120,\n            **llm_params,\n        )\n        content = response.choices[0].message.content or \"\"\n        if content:\n            return content, True\n    except Exception as e:\n        logger.error(\"Research summary call failed: %s\", e)\n\n    return (\n        \"Research agent hit iteration limit (60). \"\n        \"Partial findings may be incomplete — try a more focused task.\",\n        False,\n    )\n"
  },
  {
    "path": "agent/tools/sandbox_client.py",
    "content": "#!/usr/bin/env python3\n# /// script\n# requires-python = \">=3.10\"\n# dependencies = [\"huggingface_hub>=0.20.0\", \"httpx>=0.27.0\"]\n# ///\n\"\"\"\nSandbox Tools — Agent-native primitives for HF Space dev-mode sandboxes.\n\nArchitecture:\n  - Creates a sandbox by duplicating a template Space (runs sandbox_server.py)\n  - Waits for it to come online\n  - Communicates via HTTPS to the Space's API\n  - Optionally deletes the Space when done\n\nLifecycle:\n    sb = Sandbox.create(owner=\"burtenshaw\")         # duplicate, wait, connect\n    sb = Sandbox.create(owner=\"burtenshaw\",          # with options\n                        hardware=\"t4-small\",\n                        private=True,\n                        sleep_time=3600)\n    sb = Sandbox.connect(\"burtenshaw/my-sandbox-abc\") # attach to existing\n\n    sb.bash(\"uv run train.py\")\n    sb.read(\"/app/train.py\")\n    sb.edit(\"/app/train.py\", old_str=\"lr=1e-3\", new_str=\"lr=1e-4\")\n\n    sb.delete()                                       # tear down when done\n\n    # Or use as a context manager for automatic cleanup\n    with Sandbox.create(owner=\"burtenshaw\") as sb:\n        sb.bash(\"python train.py\")\n    # Space deleted on exit\n\nTools: bash, read, write, edit, upload\n\"\"\"\n\nfrom __future__ import annotations\n\nimport io\nimport sys\nimport time\nimport uuid\nfrom dataclasses import dataclass, field\nfrom typing import Any, Callable\n\nimport httpx\nfrom huggingface_hub import CommitOperationAdd, HfApi\n\nTEMPLATE_SPACE = \"burtenshaw/sandbox\"\nHARDWARE_OPTIONS = [\n    \"cpu-basic\",\n    \"cpu-upgrade\",\n    \"t4-small\",\n    \"t4-medium\",\n    \"a10g-small\",\n    \"a10g-large\",\n    \"a100-large\",\n]\nOUTPUT_LIMIT = 25000\nLINE_LIMIT = 4000\nDEFAULT_READ_LIMIT = 2000\nDEFAULT_TIMEOUT = 240\nMAX_TIMEOUT = 1200\nWAIT_TIMEOUT = 600\nWAIT_INTERVAL = 5\nAPI_WAIT_TIMEOUT = 180\n\n_DOCKERFILE = \"\"\"\\\nFROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim\n\nRUN apt-get update && \\\\\n    apt-get install -y \\\\\n      bash git git-lfs wget curl procps \\\\\n      htop vim nano jq tmux \\\\\n      build-essential && \\\\\n    rm -rf /var/lib/apt/lists/*\n\nRUN uv pip install --system fastapi uvicorn python-multipart\n\nRUN useradd -m -u 1000 user\nUSER user\n\nENV HOME=/home/user \\\\\n    PATH=/home/user/.local/bin:$PATH \\\\\n    PIP_USER=1 \\\\\n    HF_HUB_DISABLE_PROGRESS_BARS=1 \\\\\n    TQDM_DISABLE=1 \\\\\n    HF_HUB_ENABLE_HF_TRANSFER=1 \\\\\n    UV_NO_PROGRESS=1 \\\\\n    PYTHONWARNINGS=ignore::DeprecationWarning\n\nWORKDIR /app\nCOPY --chown=user . /app\n\nEXPOSE 7860\n\nCMD [\"python\", \"sandbox_server.py\"]\n\"\"\"\n\n_SANDBOX_SERVER = '''\\\n\"\"\"Minimal FastAPI server for sandbox operations.\"\"\"\nimport os, subprocess, pathlib, signal, threading, re, tempfile\nfrom fastapi import FastAPI\nfrom pydantic import BaseModel\nfrom typing import Optional\nimport uvicorn\n\n_ANSI_RE = re.compile(r'\\\\x1b\\\\[[0-9;]*[a-zA-Z]|\\\\x1b\\\\].*?\\\\x07')\n\ndef _strip_ansi(text: str) -> str:\n    return _ANSI_RE.sub('', text)\n\ndef _truncate_output(output: str, max_chars: int = 25000, head_ratio: float = 0.25) -> str:\n    if len(output) <= max_chars:\n        return output\n    # Write full output to temp file so LLM can read specific sections\n    spill_path = None\n    try:\n        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', dir='/tmp', delete=False) as f:\n            f.write(output)\n            spill_path = f.name\n    except Exception:\n        pass\n    head_budget = int(max_chars * head_ratio)\n    tail_budget = max_chars - head_budget\n    head = output[:head_budget]\n    tail = output[-tail_budget:]\n    total = len(output)\n    omitted = total - max_chars\n    meta = f\"\\\\n\\\\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\\\\n\"\n    if spill_path:\n        meta += f\"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\\\\n\"\n    return head + meta + tail\n\ndef _atomic_write(path: pathlib.Path, content: str):\n    \"\"\"Write atomically: temp file + fsync + os.replace.\"\"\"\n    path.parent.mkdir(parents=True, exist_ok=True)\n    fd = None\n    tmp_path = None\n    try:\n        fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=\".tmp\")\n        os.write(fd, content.encode(\"utf-8\"))\n        os.fsync(fd)\n        os.close(fd)\n        fd = None\n        os.replace(tmp_path, str(path))\n        tmp_path = None\n    finally:\n        if fd is not None:\n            os.close(fd)\n        if tmp_path is not None:\n            try:\n                os.unlink(tmp_path)\n            except OSError:\n                pass\n\napp = FastAPI()\n\n# Track active bash processes so they can be killed on cancel\n_active_procs = {}  # pid -> subprocess.Popen\n_proc_lock = threading.Lock()\n\nclass BashReq(BaseModel):\n    command: str\n    work_dir: str = \"/app\"\n    timeout: int = 120\n\nclass ReadReq(BaseModel):\n    path: str\n    offset: Optional[int] = None\n    limit: Optional[int] = 2000\n\nclass WriteReq(BaseModel):\n    path: str\n    content: str\n\nclass EditReq(BaseModel):\n    path: str\n    old_str: str\n    new_str: str\n    replace_all: bool = False\n    mode: str = \"replace\"\n\nclass ExistsReq(BaseModel):\n    path: str\n\n# ── Fuzzy matching & edit utilities (embedded) ──\n\nUNICODE_MAP = {\n    \"\\\\u2013\": \"-\", \"\\\\u2014\": \"-\", \"\\\\u2212\": \"-\",\n    \"\\\\u2018\": \"'\", \"\\\\u2019\": \"'\",\n    \"\\\\u201c\": \\'\"\\', \"\\\\u201d\": \\'\"\\',\n    \"\\\\u00a0\": \" \", \"\\\\u2003\": \" \", \"\\\\u2002\": \" \",\n    \"\\\\u200b\": \"\", \"\\\\ufeff\": \"\",\n}\n\ndef _normalize_unicode(s):\n    return \"\".join(UNICODE_MAP.get(c, c) for c in s)\n\ndef _fuzzy_find_original(content, pattern):\n    \"\"\"Find the original text in content that matches pattern fuzzily.\"\"\"\n    if pattern in content:\n        return pattern, None\n    # Pass 2: right-trim\n    c_lines = content.split(\"\\\\n\")\n    c_rt = \"\\\\n\".join(l.rstrip() for l in c_lines)\n    p_rt = \"\\\\n\".join(l.rstrip() for l in pattern.split(\"\\\\n\"))\n    if p_rt in c_rt:\n        idx = c_rt.index(p_rt)\n        start_line = c_rt[:idx].count(\"\\\\n\")\n        n_lines = p_rt.count(\"\\\\n\") + 1\n        matched = \"\\\\n\".join(c_lines[start_line:start_line + n_lines])\n        return matched, \"(matched after trimming trailing whitespace)\"\n    # Pass 3: both-sides trim\n    c_st = \"\\\\n\".join(l.strip() for l in c_lines)\n    p_st = \"\\\\n\".join(l.strip() for l in pattern.split(\"\\\\n\"))\n    if p_st in c_st:\n        idx = c_st.index(p_st)\n        start_line = c_st[:idx].count(\"\\\\n\")\n        n_lines = p_st.count(\"\\\\n\") + 1\n        matched = \"\\\\n\".join(c_lines[start_line:start_line + n_lines])\n        return matched, \"(matched after trimming whitespace)\"\n    # Pass 4: unicode normalization\n    c_norm = _normalize_unicode(c_st)\n    p_norm = _normalize_unicode(p_st)\n    if p_norm in c_norm:\n        idx = c_norm.index(p_norm)\n        start_line = c_norm[:idx].count(\"\\\\n\")\n        n_lines = p_norm.count(\"\\\\n\") + 1\n        matched = \"\\\\n\".join(c_lines[start_line:start_line + n_lines])\n        return matched, \"(matched after unicode normalization)\"\n    return None, None\n\ndef _apply_edit(content, old_str, new_str, mode=\"replace\", replace_all=False):\n    \"\"\"Apply edit. Returns (new_content, count, fuzzy_note) or raises ValueError.\"\"\"\n    if mode == \"replace_all\":\n        replace_all = True\n        mode = \"replace\"\n    fuzzy_note = None\n    if old_str not in content:\n        matched, fuzzy_note = _fuzzy_find_original(content, old_str)\n        if matched is None:\n            raise ValueError(\"old_str not found in file.\")\n        old_str = matched\n    count = content.count(old_str)\n    if mode == \"replace\":\n        if count > 1 and not replace_all:\n            raise ValueError(f\"old_str appears {count} times. Use replace_all=true or provide more context.\")\n        if replace_all:\n            return content.replace(old_str, new_str), count, fuzzy_note\n        return content.replace(old_str, new_str, 1), 1, fuzzy_note\n    elif mode == \"append_after\":\n        if replace_all:\n            return content.replace(old_str, old_str + new_str), count, fuzzy_note\n        idx = content.index(old_str) + len(old_str)\n        return content[:idx] + new_str + content[idx:], 1, fuzzy_note\n    elif mode == \"prepend_before\":\n        if replace_all:\n            return content.replace(old_str, new_str + old_str), count, fuzzy_note\n        idx = content.index(old_str)\n        return content[:idx] + new_str + content[idx:], 1, fuzzy_note\n    raise ValueError(f\"Unknown mode: {mode}\")\n\ndef _validate_python(content, path=\"\"):\n    \"\"\"Validate Python: syntax, kwargs against real installed signatures, training heuristics.\n\n    Runs inside the sandbox where packages are pip-installed, so we can actually\n    import classes and inspect their __init__ signatures to catch kwarg mismatches\n    before runtime.\n    \"\"\"\n    import ast as _ast, inspect as _inspect, importlib as _il\n    warnings = []\n\n    # 1. Syntax check\n    try:\n        tree = _ast.parse(content)\n    except SyntaxError as e:\n        warnings.append(f\"Python syntax error at line {e.lineno}: {e.msg}\")\n        return warnings\n\n    # 2. Build import map: name -> module path (from the script's own imports)\n    import_map = {}\n    for node in _ast.walk(tree):\n        if isinstance(node, _ast.ImportFrom) and node.module:\n            for alias in (node.names or []):\n                local_name = alias.asname or alias.name\n                import_map[local_name] = (node.module, alias.name)\n        elif isinstance(node, _ast.Import):\n            for alias in (node.names or []):\n                local_name = alias.asname or alias.name\n                import_map[local_name] = (alias.name, None)\n\n    # 3. For each Call node, resolve the callable and check kwargs against signature\n    for node in _ast.walk(tree):\n        if not isinstance(node, _ast.Call):\n            continue\n        # Skip calls with **kwargs unpacking — we can't statically know those keys\n        if any(kw.arg is None for kw in node.keywords):\n            continue\n        call_kwargs = [kw.arg for kw in node.keywords if kw.arg]\n        if not call_kwargs:\n            continue\n\n        # Resolve the callable name\n        func_name = None\n        if isinstance(node.func, _ast.Name):\n            func_name = node.func.id\n        elif isinstance(node.func, _ast.Attribute):\n            func_name = node.func.attr\n        if not func_name or func_name not in import_map:\n            continue\n\n        # Try to import and inspect the real callable\n        module_path, attr_name = import_map[func_name]\n        try:\n            mod = _il.import_module(module_path)\n            obj = getattr(mod, attr_name, None) if attr_name else mod\n            if obj is None:\n                continue\n            sig = _inspect.signature(obj)\n            params = sig.parameters\n            # If **kwargs is in the signature, any kwarg is valid\n            if any(p.kind == _inspect.Parameter.VAR_KEYWORD for p in params.values()):\n                continue\n            valid_names = set(params.keys())\n            for kw_name in call_kwargs:\n                if kw_name not in valid_names:\n                    warnings.append(\n                        f\"Invalid kwarg: {func_name}({kw_name}=...) at line {node.lineno} \"\n                        f\"-- not accepted by {module_path}.{attr_name or func_name}()\"\n                    )\n        except Exception:\n            pass  # can't import/inspect — skip silently\n\n    # 4. Training script heuristics\n    if any(kw in content for kw in (\"TrainingArguments\", \"SFTConfig\", \"DPOConfig\", \"GRPOConfig\")):\n        if \"push_to_hub\" not in content:\n            warnings.append(\"Training script warning: no \\'push_to_hub\\' found\")\n        if \"hub_model_id\" not in content:\n            warnings.append(\"Training script warning: no \\'hub_model_id\\' found\")\n    return warnings\n\n@app.get(\"/api/health\")\ndef health():\n    return {\"status\": \"ok\"}\n\n@app.post(\"/api/bash\")\ndef bash(req: BashReq):\n    try:\n        proc = subprocess.Popen(\n            req.command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,\n            text=True, cwd=req.work_dir, start_new_session=True,\n        )\n        with _proc_lock:\n            _active_procs[proc.pid] = proc\n        try:\n            stdout, stderr = proc.communicate(timeout=req.timeout)\n            output = _strip_ansi(stdout + stderr)\n            output = _truncate_output(output)\n            return {\"success\": proc.returncode == 0, \"output\": output, \"error\": \"\" if proc.returncode == 0 else f\"Exit code {proc.returncode}\"}\n        except subprocess.TimeoutExpired:\n            try:\n                os.killpg(os.getpgid(proc.pid), signal.SIGKILL)\n            except OSError:\n                proc.kill()\n            proc.wait()\n            return {\"success\": False, \"output\": \"\", \"error\": f\"Timeout after {req.timeout}s\"}\n        finally:\n            with _proc_lock:\n                _active_procs.pop(proc.pid, None)\n    except Exception as e:\n        return {\"success\": False, \"output\": \"\", \"error\": str(e)}\n\n@app.post(\"/api/kill\")\ndef kill_all():\n    \"\"\"Kill all active bash processes. Called when user cancels.\"\"\"\n    with _proc_lock:\n        pids = list(_active_procs.keys())\n    killed = []\n    for pid in pids:\n        try:\n            os.killpg(os.getpgid(pid), signal.SIGTERM)\n            killed.append(pid)\n        except OSError:\n            try:\n                os.kill(pid, signal.SIGKILL)\n                killed.append(pid)\n            except OSError:\n                pass\n    return {\"success\": True, \"output\": f\"Killed {len(killed)} process(es): {killed}\", \"error\": \"\"}\n\n@app.post(\"/api/read\")\ndef read(req: ReadReq):\n    try:\n        p = pathlib.Path(req.path)\n        if not p.exists():\n            return {\"success\": False, \"output\": \"\", \"error\": f\"File not found: {req.path}\"}\n        if p.is_dir():\n            return {\"success\": False, \"output\": \"\", \"error\": f\"Is a directory: {req.path}\"}\n        lines = p.read_text().splitlines()\n        start = (req.offset or 1) - 1\n        end = start + (req.limit or len(lines))\n        selected = lines[start:end]\n        numbered = \"\\\\n\".join(f\"{start + i + 1}\\\\t{line}\" for i, line in enumerate(selected))\n        return {\"success\": True, \"output\": numbered, \"error\": \"\"}\n    except Exception as e:\n        return {\"success\": False, \"output\": \"\", \"error\": str(e)}\n\n@app.post(\"/api/write\")\ndef write(req: WriteReq):\n    try:\n        p = pathlib.Path(req.path)\n        _atomic_write(p, req.content)\n        msg = f\"Wrote {len(req.content)} bytes to {req.path}\"\n        if p.suffix == \".py\":\n            warnings = _validate_python(req.content, req.path)\n            if warnings:\n                msg += \"\\\\n\\\\nValidation warnings:\\\\n\" + \"\\\\n\".join(f\"  ! {w}\" for w in warnings)\n        return {\"success\": True, \"output\": msg, \"error\": \"\"}\n    except Exception as e:\n        return {\"success\": False, \"output\": \"\", \"error\": str(e)}\n\n@app.post(\"/api/edit\")\ndef edit(req: EditReq):\n    try:\n        p = pathlib.Path(req.path)\n        if not p.exists():\n            return {\"success\": False, \"output\": \"\", \"error\": f\"File not found: {req.path}\"}\n        content = p.read_text()\n        if req.old_str == req.new_str:\n            return {\"success\": False, \"output\": \"\", \"error\": \"old_str and new_str must differ.\"}\n        try:\n            new_content, count, fuzzy_note = _apply_edit(\n                content, req.old_str, req.new_str, mode=req.mode, replace_all=req.replace_all\n            )\n        except ValueError as e:\n            return {\"success\": False, \"output\": \"\", \"error\": str(e)}\n        _atomic_write(p, new_content)\n        msg = f\"Edited {req.path} ({count} replacement{'s' if count > 1 else ''})\"\n        if fuzzy_note:\n            msg += f\" {fuzzy_note}\"\n        if p.suffix == \".py\":\n            warnings = _validate_python(new_content, req.path)\n            if warnings:\n                msg += \"\\\\n\\\\nValidation warnings:\\\\n\" + \"\\\\n\".join(f\"  ! {w}\" for w in warnings)\n        return {\"success\": True, \"output\": msg, \"error\": \"\"}\n    except Exception as e:\n        return {\"success\": False, \"output\": \"\", \"error\": str(e)}\n\n@app.post(\"/api/exists\")\ndef exists(req: ExistsReq):\n    return {\"success\": True, \"output\": str(pathlib.Path(req.path).exists()).lower(), \"error\": \"\"}\n\nif __name__ == \"__main__\":\n    uvicorn.run(app, host=\"0.0.0.0\", port=7860)\n'''\n\n\n@dataclass\nclass ToolResult:\n    success: bool\n    output: str = \"\"\n    error: str = \"\"\n\n    def __str__(self):\n        if self.success:\n            return self.output or \"(no output)\"\n        return f\"ERROR: {self.error}\"\n\n    def to_dict(self) -> dict:\n        return {\"success\": self.success, \"output\": self.output, \"error\": self.error}\n\n\n@dataclass\nclass Sandbox:\n    \"\"\"\n    A handle to an HF Space sandbox.\n\n    Use Sandbox.create() to spin up a new one, or Sandbox.connect() to\n    attach to an existing running Space.\n    \"\"\"\n\n    space_id: str\n    token: str | None = None\n    work_dir: str = \"/app\"\n    timeout: int = DEFAULT_TIMEOUT\n    _owns_space: bool = field(default=False, repr=False)\n    _base_url: str = field(init=False, repr=False)\n    _client: httpx.Client = field(init=False, repr=False)\n    _hf_api: HfApi = field(init=False, repr=False)\n    _files_read: set = field(init=False, repr=False, default_factory=set)\n\n    def __post_init__(self):\n        slug = self.space_id.replace(\"/\", \"-\")\n        # Trailing slash is critical: httpx resolves relative paths against base_url.\n        # Without it, client.get(\"health\") resolves to /health instead of /api/health.\n        self._base_url = f\"https://{slug}.hf.space/api/\"\n        self._client = httpx.Client(\n            base_url=self._base_url,\n            headers={\"Authorization\": f\"Bearer {self.token}\"} if self.token else {},\n            timeout=httpx.Timeout(MAX_TIMEOUT, connect=30),\n            follow_redirects=True,\n        )\n        self._hf_api = HfApi(token=self.token)\n\n    # ── Lifecycle ─────────────────────────────────────────────────\n\n    class Cancelled(Exception):\n        \"\"\"Raised when sandbox creation is cancelled by the user.\"\"\"\n\n    @classmethod\n    def create(\n        cls,\n        owner: str,\n        *,\n        name: str | None = None,\n        template: str = TEMPLATE_SPACE,\n        hardware: str = \"cpu-basic\",\n        private: bool = False,\n        sleep_time: int | None = None,\n        token: str | None = None,\n        secrets: dict[str, str] | None = None,\n        wait_timeout: int = WAIT_TIMEOUT,\n        log: \"Callable[[str], object] | None\" = None,\n        cancel_event: \"Any | None\" = None,\n    ) -> Sandbox:\n        \"\"\"\n        Create a new sandbox by duplicating the template Space.\n\n        Generates a unique space name, duplicates the template, waits for it\n        to come online, then returns a connected Sandbox.\n\n        Args:\n            owner: HF username or org (e.g. \"burtenshaw\").\n            name: Base name for the space. Defaults to \"sandbox\".\n                  A unique suffix is always appended.\n            template: Source Space to duplicate (default: burtenshaw/sandbox).\n            hardware: Hardware tier (cpu-basic, t4-small, etc.).\n            private: Whether the Space should be private.\n            sleep_time: Auto-sleep after N seconds of inactivity.\n            token: HF API token (from user's OAuth session).\n            wait_timeout: Max seconds to wait for Space to start (default: 300).\n            cancel_event: A threading.Event (or compatible) checked during\n                          polling loops.  When set, the Space is deleted and\n                          Sandbox.Cancelled is raised.\n\n        Returns:\n            A Sandbox instance connected to the running Space.\n        \"\"\"\n        _log = log or print\n        api = HfApi(token=token)\n\n        def _check_cancel():\n            if cancel_event and cancel_event.is_set():\n                _log(\"Sandbox creation cancelled by user, cleaning up...\")\n                try:\n                    api.delete_repo(space_id, repo_type=\"space\")\n                    _log(f\"Deleted Space {space_id}\")\n                except Exception:\n                    pass\n                raise cls.Cancelled(f\"Sandbox creation cancelled: {space_id}\")\n\n        base = name or \"sandbox\"\n        suffix = uuid.uuid4().hex[:8]\n        space_id = f\"{owner}/{base}-{suffix}\"\n\n        _log(f\"Creating sandbox: {space_id} (from {template})...\")\n\n        kwargs = {\n            \"from_id\": template,\n            \"to_id\": space_id,\n            \"private\": private,\n            \"hardware\": hardware,\n        }\n        if sleep_time is not None:\n            kwargs[\"sleep_time\"] = sleep_time\n\n        api.duplicate_space(**kwargs)\n        _log(f\"Space created: https://huggingface.co/spaces/{space_id}\")\n\n        _check_cancel()\n\n        # Inject secrets BEFORE uploading server files (which triggers rebuild).\n        # Secrets added after a Space is running aren't available until restart,\n        # so they must be set before the build/start cycle.\n        if secrets:\n            for key, val in secrets.items():\n                api.add_space_secret(space_id, key, val)\n\n        # Upload sandbox server and Dockerfile (triggers rebuild)\n        cls._setup_server(space_id, api, log=_log)\n\n        _check_cancel()\n\n        # Wait for it to come online (rebuild + start)\n        _log(f\"Waiting for Space to start (timeout: {wait_timeout}s)...\")\n        deadline = time.time() + wait_timeout\n        while time.time() < deadline:\n            _check_cancel()\n            runtime = api.get_space_runtime(space_id)\n            if runtime.stage == \"RUNNING\":\n                _log(f\"Space is running (hardware: {runtime.hardware})\")\n                break\n            if runtime.stage in (\"RUNTIME_ERROR\", \"BUILD_ERROR\"):\n                raise RuntimeError(\n                    f\"Space failed to start: {runtime.stage}. \"\n                    f\"Check https://huggingface.co/spaces/{space_id}\"\n                )\n            _log(f\"  {runtime.stage}...\")\n            time.sleep(WAIT_INTERVAL)\n        else:\n            raise TimeoutError(\n                f\"Space did not start within {wait_timeout}s. \"\n                f\"Check https://huggingface.co/spaces/{space_id}\"\n            )\n\n        _check_cancel()\n\n        # Wait for the API server to be responsive (non-fatal)\n        sb = cls(space_id=space_id, token=token, _owns_space=True)\n        try:\n            sb._wait_for_api(timeout=API_WAIT_TIMEOUT, log=_log)\n        except TimeoutError as e:\n            _log(\n                f\"Warning: API health check timed out ({e}), but Space is RUNNING. Continuing.\"\n            )\n        return sb\n\n    @staticmethod\n    def _setup_server(space_id: str, api: HfApi, *, log: Callable[[str], object] = print) -> None:\n        \"\"\"Upload embedded sandbox server + Dockerfile to the Space (single commit).\"\"\"\n        log(f\"Uploading sandbox server to {space_id}...\")\n        api.create_commit(\n            repo_id=space_id,\n            repo_type=\"space\",\n            operations=[\n                CommitOperationAdd(\n                    path_in_repo=\"sandbox_server.py\",\n                    path_or_fileobj=io.BytesIO(_SANDBOX_SERVER.encode()),\n                ),\n                CommitOperationAdd(\n                    path_in_repo=\"Dockerfile\",\n                    path_or_fileobj=io.BytesIO(_DOCKERFILE.encode()),\n                ),\n            ],\n            commit_message=\"Setup sandbox server\",\n        )\n        log(\"Server files uploaded, rebuild triggered.\")\n\n    @classmethod\n    def connect(cls, space_id: str, *, token: str | None = None) -> Sandbox:\n        \"\"\"\n        Connect to an existing running Space.\n\n        Does a health check to verify the Space is reachable.\n        \"\"\"\n        sb = cls(space_id=space_id, token=token, _owns_space=False)\n        sb._wait_for_api(timeout=60)\n        return sb\n\n    def _wait_for_api(self, timeout: int = API_WAIT_TIMEOUT, log: Callable[[str], object] = print):\n        \"\"\"Poll the health endpoint until the server responds.\"\"\"\n        deadline = time.time() + timeout\n        last_err = None\n        last_status = None\n        while time.time() < deadline:\n            try:\n                resp = self._client.get(\"health\", timeout=10)\n                last_status = resp.status_code\n                if resp.status_code == 200:\n                    log(f\"API is responsive at {self._base_url}\")\n                    return\n            except Exception as e:\n                last_err = e\n            time.sleep(3)\n        raise TimeoutError(\n            f\"Sandbox API at {self._base_url} not responding after {timeout}s. \"\n            f\"Last status: {last_status}, last error: {last_err}\"\n        )\n\n    def delete(self):\n        \"\"\"Delete the Space. Only works if this Sandbox created it.\"\"\"\n        if not self._owns_space:\n            raise RuntimeError(\n                f\"This Sandbox did not create {self.space_id}. \"\n                f\"Use self._hf_api.delete_repo() directly if you're sure.\"\n            )\n        print(f\"Deleting sandbox: {self.space_id}...\")\n        self._hf_api.delete_repo(self.space_id, repo_type=\"space\")\n        self._client.close()\n        print(\"Deleted.\")\n\n    def pause(self):\n        \"\"\"Pause the Space (stops billing, preserves state).\"\"\"\n        self._hf_api.pause_space(self.space_id)\n\n    def restart(self):\n        \"\"\"Restart the Space.\"\"\"\n        self._hf_api.restart_space(self.space_id)\n        self._wait_for_api()\n\n    @property\n    def url(self) -> str:\n        \"\"\"Public URL of the Space.\"\"\"\n        return f\"https://huggingface.co/spaces/{self.space_id}\"\n\n    @property\n    def status(self) -> str:\n        \"\"\"Current Space stage (RUNNING, BUILDING, PAUSED, etc.).\"\"\"\n        return self._hf_api.get_space_runtime(self.space_id).stage\n\n    def __enter__(self) -> Sandbox:\n        return self\n\n    def __exit__(self, *exc):\n        if self._owns_space:\n            try:\n                self.delete()\n            except Exception as e:\n                print(f\"Warning: failed to delete sandbox: {e}\", file=sys.stderr)\n        self._client.close()\n\n    # ── HTTP plumbing ─────────────────────────────────────────────\n\n    def _call(\n        self, endpoint: str, payload: dict, timeout: float | None = None\n    ) -> ToolResult:\n        # Strip leading slash for correct httpx base_url resolution\n        endpoint = endpoint.lstrip(\"/\")\n        effective_timeout = timeout or self.timeout\n        last_error = \"\"\n\n        # Retry up to 3 times for transient failures (sandbox waking from\n        # sleep returns empty / non-JSON responses while it starts up).\n        for attempt in range(3):\n            try:\n                resp = self._client.post(\n                    endpoint,\n                    json=payload,\n                    timeout=effective_timeout,\n                )\n                try:\n                    data = resp.json()\n                except (ValueError, UnicodeDecodeError):\n                    # Non-JSON response — sandbox is likely still starting up.\n                    body_preview = resp.text[:200] if resp.text else \"(empty)\"\n                    last_error = (\n                        f\"Sandbox returned non-JSON response (HTTP {resp.status_code}): \"\n                        f\"{body_preview}\"\n                    )\n                    if attempt < 2:\n                        time.sleep(3 * (attempt + 1))\n                        continue\n                    return ToolResult(success=False, error=last_error)\n\n                if resp.status_code == 200:\n                    return ToolResult(\n                        success=data.get(\"success\", True),\n                        output=data.get(\"output\", \"\"),\n                        error=data.get(\"error\", \"\"),\n                    )\n                return ToolResult(\n                    success=False,\n                    error=data.get(\"error\", f\"HTTP {resp.status_code}\"),\n                )\n            except httpx.TimeoutException:\n                return ToolResult(\n                    success=False, error=f\"Timeout after {effective_timeout}s\"\n                )\n            except httpx.ConnectError:\n                last_error = (\n                    f\"Cannot connect to sandbox. Is {self.space_id} running? \"\n                    f\"Status: {self.status}\"\n                )\n                if attempt < 2:\n                    time.sleep(3 * (attempt + 1))\n                    continue\n                return ToolResult(success=False, error=last_error)\n            except Exception as e:\n                return ToolResult(success=False, error=str(e))\n\n        return ToolResult(success=False, error=last_error or \"Unknown error\")\n\n    # ── Tools ─────────────────────────────────────────────────────\n\n    def bash(\n        self,\n        command: str,\n        *,\n        work_dir: str | None = None,\n        timeout: int | None = None,\n        description: str | None = None,\n    ) -> ToolResult:\n        return self._call(\n            \"bash\",\n            {\n                \"command\": command,\n                \"work_dir\": work_dir or self.work_dir,\n                \"timeout\": min(timeout or self.timeout, MAX_TIMEOUT),\n            },\n            timeout=timeout,\n        )\n\n    def read(\n        self, path: str, *, offset: int | None = None, limit: int | None = None\n    ) -> ToolResult:\n        self._files_read.add(path)\n        return self._call(\n            \"read\",\n            {\n                \"path\": path,\n                \"offset\": offset,\n                \"limit\": limit or (DEFAULT_READ_LIMIT if offset is None else None),\n            },\n        )\n\n    def write(self, path: str, content: str) -> ToolResult:\n        if path not in self._files_read:\n            check = self._call(\"exists\", {\"path\": path})\n            if check.success and check.output == \"true\":\n                return ToolResult(\n                    success=False,\n                    error=(\n                        f\"File {path} exists but has not been read this session. \"\n                        f\"Read it first, or use sandbox_edit for targeted changes.\"\n                    ),\n                )\n        result = self._call(\"write\", {\"path\": path, \"content\": content})\n        if result.success:\n            self._files_read.add(path)\n        return result\n\n    def edit(\n        self, path: str, old_str: str, new_str: str, *, replace_all: bool = False,\n        mode: str = \"replace\",\n    ) -> ToolResult:\n        if old_str == new_str:\n            return ToolResult(success=False, error=\"old_str and new_str are identical.\")\n        if path not in self._files_read:\n            return ToolResult(\n                success=False,\n                error=f\"File {path} has not been read this session. Read it first.\",\n            )\n        return self._call(\n            \"edit\",\n            {\n                \"path\": path,\n                \"old_str\": old_str,\n                \"new_str\": new_str,\n                \"replace_all\": replace_all,\n                \"mode\": mode,\n            },\n        )\n\n    def kill_all(self) -> ToolResult:\n        \"\"\"Kill all active bash processes on the sandbox. Used on cancellation.\"\"\"\n        return self._call(\"kill\", {})\n\n    # ── Tool schemas & dispatch ───────────────────────────────────\n\n    TOOLS = {\n        \"bash\": {\n            \"description\": (\n                \"Run a shell command in the remote sandbox and return stdout/stderr.\\n\"\n                \"\\n\"\n                \"IMPORTANT: Do NOT use bash for file operations — use the dedicated tools instead:\\n\"\n                \"- To read files: use read (not cat/head/tail)\\n\"\n                \"- To edit files: use edit (not sed/awk)\\n\"\n                \"- To write files: use write (not echo/cat <<EOF)\\n\"\n                \"\\n\"\n                \"Commands run in a shell at /app. Each invocation is independent — \"\n                \"use files in /app to persist state.\\n\"\n                \"Chain dependent commands with &&. Independent commands should be \"\n                \"separate bash calls (they can run in parallel).\\n\"\n                \"\\n\"\n                \"For long-running commands (training, evaluation), run in the background and poll:\\n\"\n                \"  nohup <command> > /app/output.log 2>&1 & echo $!\\n\"\n                \"Then check status:\\n\"\n                \"  kill -0 <PID> 2>/dev/null && echo 'running' || echo 'done'\\n\"\n                \"  tail -n 50 /app/output.log\\n\"\n                \"\\n\"\n                \"Timeout default 240s, max 1200s.\"\n            ),\n            \"parameters\": {\n                \"type\": \"object\",\n                \"required\": [\"command\"],\n                \"additionalProperties\": False,\n                \"properties\": {\n                    \"command\": {\n                        \"type\": \"string\",\n                        \"description\": \"The shell command to execute.\",\n                    },\n                    \"description\": {\n                        \"type\": \"string\",\n                        \"description\": \"Short description (5-10 words, active voice).\",\n                    },\n                    \"work_dir\": {\n                        \"type\": \"string\",\n                        \"description\": \"Working directory (default: /app).\",\n                    },\n                    \"timeout\": {\n                        \"type\": \"integer\",\n                        \"description\": \"Optional timeout in seconds (default: 240, max: 1200).\",\n                    },\n                },\n            },\n        },\n        \"read\": {\n            \"description\": (\n                \"Reads a file from the sandbox filesystem. Returns contents with line \"\n                \"numbers (cat -n format).\\n\"\n                \"\\n\"\n                \"Usage:\\n\"\n                \"- By default, reads up to 2000 lines from the beginning of the file.\\n\"\n                \"- You can optionally specify offset and limit for large files, but prefer \"\n                \"reading the whole file first.\\n\"\n                \"- Lines longer than 4000 chars are truncated.\\n\"\n                \"- Cannot read directories — use bash with 'ls' instead.\\n\"\n                \"- You should read multiple potentially useful files in parallel when possible.\\n\"\n                \"- IMPORTANT: Always read a file before editing or overwriting it. The edit and \"\n                \"write tools will reject operations on files you haven't read.\"\n            ),\n            \"parameters\": {\n                \"type\": \"object\",\n                \"required\": [\"path\"],\n                \"additionalProperties\": False,\n                \"properties\": {\n                    \"path\": {\n                        \"type\": \"string\",\n                        \"description\": \"Absolute path to the file to read.\",\n                    },\n                    \"offset\": {\n                        \"type\": \"integer\",\n                        \"description\": \"The line number to start reading from (1-based). Only provide if the file is too large to read at once.\",\n                    },\n                    \"limit\": {\n                        \"type\": \"integer\",\n                        \"description\": \"The number of lines to read. Only provide if the file is too large to read at once.\",\n                    },\n                },\n            },\n        },\n        \"write\": {\n            \"description\": (\n                \"Writes a file to the sandbox filesystem. Overwrites the existing file if \"\n                \"one exists at the path.\\n\"\n                \"\\n\"\n                \"- If this is an existing file, you MUST use the read tool first. This tool \"\n                \"will fail if you did not read the file first.\\n\"\n                \"- ALWAYS prefer editing existing files with the edit tool over overwriting \"\n                \"with write.\\n\"\n                \"- Creates parent directories as needed.\"\n            ),\n            \"parameters\": {\n                \"type\": \"object\",\n                \"required\": [\"path\", \"content\"],\n                \"additionalProperties\": False,\n                \"properties\": {\n                    \"path\": {\n                        \"type\": \"string\",\n                        \"description\": \"Absolute path to the file to write.\",\n                    },\n                    \"content\": {\n                        \"type\": \"string\",\n                        \"description\": \"The complete file content to write.\",\n                    },\n                },\n            },\n        },\n        \"edit\": {\n            \"description\": (\n                \"Performs string replacements in files. Supports exact matching with \"\n                \"fuzzy fallback.\\n\"\n                \"\\n\"\n                \"Usage:\\n\"\n                \"- You must read the file at least once before editing. This tool will \"\n                \"error if you attempt an edit without reading the file.\\n\"\n                \"- The edit will FAIL if old_str is not unique in the file. Either provide \"\n                \"a larger string with more surrounding context to make it unique, or set \"\n                \"replace_all to true.\\n\"\n                \"- old_str and new_str must differ.\\n\"\n                \"- Preserve indentation exactly as it appears in the file.\\n\"\n                \"- Do NOT include line number prefixes from read output in old_str or new_str.\\n\"\n                \"- To delete code, set new_str to empty string.\\n\"\n                \"- Use replace_all for renaming variables or strings across the file.\\n\"\n                \"\\n\"\n                \"Modes:\\n\"\n                \"- replace (default): replace first occurrence of old_str with new_str.\\n\"\n                \"- append_after: insert new_str immediately after old_str (old_str is kept).\\n\"\n                \"- prepend_before: insert new_str immediately before old_str (old_str is kept).\"\n            ),\n            \"parameters\": {\n                \"type\": \"object\",\n                \"required\": [\"path\", \"old_str\", \"new_str\"],\n                \"additionalProperties\": False,\n                \"properties\": {\n                    \"path\": {\n                        \"type\": \"string\",\n                        \"description\": \"Absolute path to the file to edit.\",\n                    },\n                    \"old_str\": {\n                        \"type\": \"string\",\n                        \"description\": \"The text to find in the file. Must match exactly (fuzzy matching is used as fallback).\",\n                    },\n                    \"new_str\": {\n                        \"type\": \"string\",\n                        \"description\": \"The replacement text. For append_after/prepend_before modes, the text to insert.\",\n                    },\n                    \"replace_all\": {\n                        \"type\": \"boolean\",\n                        \"description\": \"Replace all occurrences of old_str (default: false).\",\n                        \"default\": False,\n                    },\n                    \"mode\": {\n                        \"type\": \"string\",\n                        \"enum\": [\"replace\", \"append_after\", \"prepend_before\"],\n                        \"description\": \"Edit mode (default: replace).\",\n                        \"default\": \"replace\",\n                    },\n                },\n            },\n        },\n    }\n\n    @classmethod\n    def tool_definitions(cls) -> list[dict]:\n        return [{\"name\": name, **spec} for name, spec in cls.TOOLS.items()]\n\n    def call_tool(self, name: str, arguments: dict[str, Any]) -> ToolResult:\n        dispatch = {\n            \"bash\": lambda a: self.bash(\n                a[\"command\"],\n                work_dir=a.get(\"work_dir\"),\n                timeout=a.get(\"timeout\"),\n                description=a.get(\"description\"),\n            ),\n            \"read\": lambda a: self.read(\n                a[\"path\"],\n                offset=a.get(\"offset\"),\n                limit=a.get(\"limit\"),\n            ),\n            \"write\": lambda a: self.write(a[\"path\"], a[\"content\"]),\n            \"edit\": lambda a: self.edit(\n                a[\"path\"],\n                a[\"old_str\"],\n                a[\"new_str\"],\n                replace_all=a.get(\"replace_all\", False),\n                mode=a.get(\"mode\", \"replace\"),\n            ),\n        }\n        fn = dispatch.get(name)\n        if not fn:\n            return ToolResult(success=False, error=f\"Unknown tool: {name}\")\n        return fn(arguments)\n"
  },
  {
    "path": "agent/tools/sandbox_tool.py",
    "content": "\"\"\"\nSandbox tools — expose the Sandbox client as agent tools.\n\n5 tools total:\n  sandbox_create — explicit sandbox creation (requires approval)\n  bash, read, write, edit — operations on the sandbox\n\nIf any operation tool is called without an active sandbox,\na cpu-basic sandbox is auto-created (no approval needed).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport threading\nfrom typing import Any\n\nfrom huggingface_hub import HfApi, SpaceHardware\n\nfrom agent.core.session import Event\nfrom agent.tools.sandbox_client import Sandbox\n\n\ndef _looks_like_path(script: str) -> bool:\n    \"\"\"Return True if the script string looks like a file path (not inline code).\"\"\"\n    return (\n        isinstance(script, str)\n        and script.strip() == script\n        and not any(c in script for c in \"\\r\\n\\0\")\n        and (\n            script.startswith(\"/\")\n            or script.startswith(\"./\")\n            or script.startswith(\"../\")\n        )\n    )\n\n\nasync def resolve_sandbox_script(\n    sandbox: Any, script: str\n) -> tuple[str | None, str | None]:\n    \"\"\"Read a file from the sandbox if *script* looks like a path.\n\n    Returns:\n        (content, error) — content is the file text on success,\n        error is a message on failure.  Both None means *script*\n        is not a path (caller should use it as-is).\n    \"\"\"\n    if not sandbox or not _looks_like_path(script):\n        return None, None\n    try:\n        # Use the read endpoint instead of bash(\"cat ...\") which truncates at 25KB.\n        result = await asyncio.to_thread(sandbox.read, script, limit=100_000)\n        if result.success and result.output:\n            # Strip line number prefixes (read returns \"N\\tcontent\" format)\n            lines = []\n            for line in result.output.split(\"\\n\"):\n                parts = line.split(\"\\t\", 1)\n                lines.append(parts[1] if len(parts) == 2 else line)\n            return \"\\n\".join(lines), None\n        return None, f\"Failed to read {script} from sandbox: {result.error}\"\n    except Exception as e:\n        return None, f\"Failed to read {script} from sandbox: {e}\"\n\n\n# ── Tool name mapping (short agent names → Sandbox client names) ──────\n\n\nasync def _ensure_sandbox(\n    session: Any, hardware: str = \"cpu-basic\", **create_kwargs\n) -> tuple[Sandbox | None, str | None]:\n    \"\"\"\n    Ensure a sandbox exists on the session. Auto-creates with given hardware if needed.\n\n    Returns:\n        (sandbox, error_message) — one will be None.\n    \"\"\"\n    if session and getattr(session, \"sandbox\", None):\n        return session.sandbox, None\n\n    if not session:\n        return None, \"No session available.\"\n\n    token = session.hf_token\n    if not token:\n        return None, \"No HF token available. Cannot create sandbox.\"\n\n    api = HfApi(token=token)\n    user_info = api.whoami()\n    owner = user_info.get(\"name\", user_info.get(\"user\", \"\"))\n    if not owner:\n        return None, \"Could not determine HF username from token.\"\n\n    await session.send_event(\n        Event(\n            event_type=\"tool_log\",\n            data={\n                \"tool\": \"sandbox\",\n                \"log\": f\"Auto-creating sandbox for {owner} ({hardware})...\",\n            },\n        )\n    )\n\n    # Thread-safe log callback: posts tool_log events from the worker thread\n    loop = asyncio.get_running_loop()\n\n    def _log(msg: str) -> None:\n        loop.call_soon_threadsafe(\n            session.event_queue.put_nowait,\n            Event(event_type=\"tool_log\", data={\"tool\": \"sandbox\", \"log\": msg}),\n        )\n\n    # Bridge asyncio cancel event to a threading.Event for the blocking create call.\n    # We poll session._cancelled from the main loop in a background task and set\n    # a threading.Event that Sandbox.create checks during its polling loops.\n    cancel_flag = threading.Event()\n\n    async def _watch_cancel():\n        await session._cancelled.wait()\n        cancel_flag.set()\n\n    watcher_task = asyncio.create_task(_watch_cancel())\n\n    kwargs = {\n        \"owner\": owner,\n        \"hardware\": hardware,\n        \"token\": token,\n        \"secrets\": {\"HF_TOKEN\": token},\n        \"log\": _log,\n        \"cancel_event\": cancel_flag,\n        **create_kwargs,\n    }\n    if hardware != \"cpu-basic\":\n        kwargs[\"sleep_time\"] = 2700\n    try:\n        sb = await asyncio.to_thread(Sandbox.create, **kwargs)\n    except Sandbox.Cancelled:\n        return None, \"Sandbox creation cancelled by user.\"\n    finally:\n        watcher_task.cancel()\n    session.sandbox = sb\n\n    # Set a descriptive title (template title is inherited on duplicate)\n    from huggingface_hub import metadata_update\n\n    await asyncio.to_thread(\n        metadata_update,\n        sb.space_id,\n        {\"title\": \"ml-intern sandbox\"},\n        repo_type=\"space\",\n        overwrite=True,\n        token=token,\n    )\n\n    await session.send_event(\n        Event(\n            event_type=\"tool_log\",\n            data={\"tool\": \"sandbox\", \"log\": f\"Sandbox ready: {sb.space_id} ({sb.url})\"},\n        )\n    )\n\n    return sb, None\n\n\n# ── sandbox_create tool ──────────────────────────────────────────────\n\nSANDBOX_CREATE_TOOL_SPEC = {\n    \"name\": \"sandbox_create\",\n    \"description\": (\n        \"Create a persistent remote Linux environment for developing and testing scripts.\\n\\n\"\n        \"Workflow: sandbox_create → write script → pip install → test with small run → fix errors → hf_jobs at scale.\\n\"\n        \"The sandbox persists across tool calls within the session. pip install works out of the box.\\n\\n\"\n        \"Use this when: you need to develop, test, and iterate on scripts before launching via hf_jobs. \"\n        \"Especially for training scripts where you need to verify imports, test on a small subset, and fix errors interactively.\\n\\n\"\n        \"Skip this when: the task is a simple one-shot operation (status check, resource search, quick data query), \"\n        \"or the script is copied from a verified working example with minimal changes.\\n\\n\"\n        \"For ML code that uses CUDA, bf16, or model loading: use GPU hardware (t4-small minimum). \"\n        \"CPU sandboxes cannot run GPU code paths — your test will not catch GPU-related errors.\\n\\n\"\n        \"Before choosing hardware, estimate your VRAM needs (models you run, training data size). Rule of thumb: bf16/fp16 ≈ 2 bytes/param, \"\n        \"fp32 ≈ 4 bytes/param, plus ~20% overhead for optimizer states during training.\\n\"\n        \"Common picks: t4-small (16GB VRAM, fits ≤1-3B), a10g-small (24GB, ≤7B), a100-large (80GB, ≤30B). \"\n        \"If the model won't fit, pick larger hardware upfront — OOM on a sandbox wastes time.\\n\\n\"\n        \"Hardware: \" + \", \".join([e.value for e in SpaceHardware]) + \".\\n\"\n    ),\n    \"parameters\": {\n        \"type\": \"object\",\n        \"required\": [],\n        \"additionalProperties\": False,\n        \"properties\": {\n            \"hardware\": {\n                \"type\": \"string\",\n                \"enum\": [e.value for e in SpaceHardware],\n                \"description\": \"Hardware tier for the sandbox (default: cpu-basic)\",\n            },\n            \"private\": {\n                \"type\": \"boolean\",\n                \"description\": \"If true, create a private Space\",\n            },\n        },\n    },\n}\n\n\nasync def sandbox_create_handler(\n    args: dict[str, Any], session: Any = None\n) -> tuple[str, bool]:\n    \"\"\"Handle sandbox_create tool calls.\"\"\"\n    # If sandbox already exists, return its info\n    if session and getattr(session, \"sandbox\", None):\n        sb = session.sandbox\n        return (\n            f\"Sandbox already active: {sb.space_id}\\n\"\n            f\"URL: {sb.url}\\n\"\n            f\"Use bash/read/write/edit to interact with it.\"\n        ), True\n\n    hardware = args.get(\"hardware\", \"cpu-basic\")\n    create_kwargs = {}\n    if \"private\" in args:\n        create_kwargs[\"private\"] = args[\"private\"]\n\n    try:\n        sb, error = await _ensure_sandbox(session, hardware=hardware, **create_kwargs)\n    except Exception as e:\n        return f\"Failed to create sandbox: {e}\", False\n\n    if error:\n        return error, False\n\n    return (\n        f\"Sandbox created: {sb.space_id}\\n\"\n        f\"URL: {sb.url}\\n\"\n        f\"Hardware: {hardware}\\n\"\n        f\"Use bash/read/write/edit to interact with it.\"\n    ), True\n\n\ndef _make_tool_handler(sandbox_tool_name: str):\n    \"\"\"Factory: create a handler for a sandbox operation tool.\"\"\"\n\n    async def handler(args: dict[str, Any], session: Any = None) -> tuple[str, bool]:\n        # Require sandbox to exist — user must approve sandbox_create first\n        if not session or not getattr(session, \"sandbox\", None):\n            return \"No sandbox running. Call sandbox_create first to start one.\", False\n\n        sb = session.sandbox\n\n        try:\n            result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)\n            if result.success:\n                output = result.output or \"(no output)\"\n                return output, True\n            else:\n                error_msg = result.error or \"Unknown error\"\n                output = result.output\n                if output:\n                    return f\"{output}\\n\\nERROR: {error_msg}\", False\n                return f\"ERROR: {error_msg}\", False\n        except Exception as e:\n            return f\"Sandbox operation failed: {e}\", False\n\n    return handler\n\n\ndef get_sandbox_tools():\n    \"\"\"Return all 5 sandbox ToolSpecs (sandbox_create + 4 operation tools).\"\"\"\n    from agent.core.tools import ToolSpec\n\n    tools = []\n\n    # sandbox_create (explicit creation, requires approval)\n    tools.append(\n        ToolSpec(\n            name=SANDBOX_CREATE_TOOL_SPEC[\"name\"],\n            description=SANDBOX_CREATE_TOOL_SPEC[\"description\"],\n            parameters=SANDBOX_CREATE_TOOL_SPEC[\"parameters\"],\n            handler=sandbox_create_handler,\n        )\n    )\n\n    # Operation tools (auto-execute, no approval needed)\n    for name in Sandbox.TOOLS.keys():\n        spec = Sandbox.TOOLS[name]\n        tools.append(\n            ToolSpec(\n                name=name,\n                description=spec[\"description\"],\n                parameters=spec[\"parameters\"],\n                handler=_make_tool_handler(name),\n            )\n        )\n\n    return tools\n"
  },
  {
    "path": "agent/tools/types.py",
    "content": "\"\"\"\nTypes for Hugging Face tools\n\nPorted from: hf-mcp-server/packages/mcp/src/types/\n\"\"\"\n\nfrom typing import TypedDict\n\n\nclass ToolResult(TypedDict, total=False):\n    \"\"\"Result returned by HF tool operations\"\"\"\n\n    formatted: str\n    totalResults: int\n    resultsShared: int\n    isError: bool\n"
  },
  {
    "path": "agent/tools/utilities.py",
    "content": "\"\"\"\nUtility functions for Hugging Face tools\n\nPorted from: hf-mcp-server/packages/mcp/src/jobs/formatters.ts\nIncludes GPU memory validation for job submissions\n\"\"\"\n\nimport json\nfrom datetime import datetime\nfrom typing import Any, Dict, List, Optional\n\n\ndef truncate(text: str, max_length: int) -> str:\n    \"\"\"Truncate a string to a maximum length with ellipsis\"\"\"\n    if len(text) <= max_length:\n        return text\n    return text[: max_length - 3] + \"...\"\n\n\ndef format_date(date_str: Optional[str]) -> str:\n    \"\"\"Format a date string to a readable format\"\"\"\n    if not date_str:\n        return \"N/A\"\n    try:\n        date = datetime.fromisoformat(date_str.replace(\"Z\", \"+00:00\"))\n        return date.strftime(\"%Y-%m-%d %H:%M:%S\")\n    except Exception:\n        return date_str\n\n\ndef format_command(command: Optional[List[str]]) -> str:\n    \"\"\"Format command array as a single string\"\"\"\n    if not command or len(command) == 0:\n        return \"N/A\"\n    return \" \".join(command)\n\n\ndef get_image_or_space(job: Dict[str, Any]) -> str:\n    \"\"\"Get image/space identifier from job\"\"\"\n    if job.get(\"spaceId\"):\n        return job[\"spaceId\"]\n    if job.get(\"dockerImage\"):\n        return job[\"dockerImage\"]\n    return \"N/A\"\n\n\ndef format_jobs_table(jobs: List[Dict[str, Any]]) -> str:\n    \"\"\"Format jobs as a markdown table\"\"\"\n    if len(jobs) == 0:\n        return \"No jobs found.\"\n\n    # Calculate dynamic ID column width\n    longest_id_length = max(len(job[\"id\"]) for job in jobs)\n    id_column_width = max(longest_id_length, len(\"JOB ID\"))\n\n    # Define column widths\n    col_widths = {\n        \"id\": id_column_width,\n        \"image\": 20,\n        \"command\": 30,\n        \"created\": 19,\n        \"status\": 12,\n    }\n\n    # Build header\n    header = f\"| {'JOB ID'.ljust(col_widths['id'])} | {'IMAGE/SPACE'.ljust(col_widths['image'])} | {'COMMAND'.ljust(col_widths['command'])} | {'CREATED'.ljust(col_widths['created'])} | {'STATUS'.ljust(col_widths['status'])} |\"\n    separator = f\"|{'-' * (col_widths['id'] + 2)}|{'-' * (col_widths['image'] + 2)}|{'-' * (col_widths['command'] + 2)}|{'-' * (col_widths['created'] + 2)}|{'-' * (col_widths['status'] + 2)}|\"\n\n    # Build rows\n    rows = []\n    for job in jobs:\n        job_id = job[\"id\"]\n        image = truncate(get_image_or_space(job), col_widths[\"image\"])\n        command = truncate(format_command(job.get(\"command\")), col_widths[\"command\"])\n        created = truncate(format_date(job.get(\"createdAt\")), col_widths[\"created\"])\n        status = truncate(job[\"status\"][\"stage\"], col_widths[\"status\"])\n\n        rows.append(\n            f\"| {job_id.ljust(col_widths['id'])} | {image.ljust(col_widths['image'])} | {command.ljust(col_widths['command'])} | {created.ljust(col_widths['created'])} | {status.ljust(col_widths['status'])} |\"\n        )\n\n    return \"\\n\".join([header, separator] + rows)\n\n\ndef format_scheduled_jobs_table(jobs: List[Dict[str, Any]]) -> str:\n    \"\"\"Format scheduled jobs as a markdown table\"\"\"\n    if len(jobs) == 0:\n        return \"No scheduled jobs found.\"\n\n    # Calculate dynamic ID column width\n    longest_id_length = max(len(job[\"id\"]) for job in jobs)\n    id_column_width = max(longest_id_length, len(\"ID\"))\n\n    # Define column widths\n    col_widths = {\n        \"id\": id_column_width,\n        \"schedule\": 12,\n        \"image\": 18,\n        \"command\": 25,\n        \"lastRun\": 19,\n        \"nextRun\": 19,\n        \"suspend\": 9,\n    }\n\n    # Build header\n    header = f\"| {'ID'.ljust(col_widths['id'])} | {'SCHEDULE'.ljust(col_widths['schedule'])} | {'IMAGE/SPACE'.ljust(col_widths['image'])} | {'COMMAND'.ljust(col_widths['command'])} | {'LAST RUN'.ljust(col_widths['lastRun'])} | {'NEXT RUN'.ljust(col_widths['nextRun'])} | {'SUSPENDED'.ljust(col_widths['suspend'])} |\"\n    separator = f\"|{'-' * (col_widths['id'] + 2)}|{'-' * (col_widths['schedule'] + 2)}|{'-' * (col_widths['image'] + 2)}|{'-' * (col_widths['command'] + 2)}|{'-' * (col_widths['lastRun'] + 2)}|{'-' * (col_widths['nextRun'] + 2)}|{'-' * (col_widths['suspend'] + 2)}|\"\n\n    # Build rows\n    rows = []\n    for job in jobs:\n        job_id = job[\"id\"]\n        schedule = truncate(job[\"schedule\"], col_widths[\"schedule\"])\n        image = truncate(get_image_or_space(job[\"jobSpec\"]), col_widths[\"image\"])\n        command = truncate(\n            format_command(job[\"jobSpec\"].get(\"command\")), col_widths[\"command\"]\n        )\n        last_run = truncate(format_date(job.get(\"lastRun\")), col_widths[\"lastRun\"])\n        next_run = truncate(format_date(job.get(\"nextRun\")), col_widths[\"nextRun\"])\n        suspend = \"Yes\" if job.get(\"suspend\") else \"No\"\n\n        rows.append(\n            f\"| {job_id.ljust(col_widths['id'])} | {schedule.ljust(col_widths['schedule'])} | {image.ljust(col_widths['image'])} | {command.ljust(col_widths['command'])} | {last_run.ljust(col_widths['lastRun'])} | {next_run.ljust(col_widths['nextRun'])} | {suspend.ljust(col_widths['suspend'])} |\"\n        )\n\n    return \"\\n\".join([header, separator] + rows)\n\n\ndef format_job_details(jobs: Any) -> str:\n    \"\"\"Format job details as JSON in a markdown code block\"\"\"\n\n    job_array = jobs if isinstance(jobs, list) else [jobs]\n    json_str = json.dumps(job_array, indent=2)\n    return f\"```json\\n{json_str}\\n```\"\n\n\ndef format_scheduled_job_details(jobs: Any) -> str:\n    \"\"\"Format scheduled job details as JSON in a markdown code block\"\"\"\n\n    job_array = jobs if isinstance(jobs, list) else [jobs]\n    json_str = json.dumps(job_array, indent=2)\n    return f\"```json\\n{json_str}\\n```\"\n"
  },
  {
    "path": "agent/utils/__init__.py",
    "content": "\"\"\"\nUtility functions and helpers\n\"\"\"\n"
  },
  {
    "path": "agent/utils/boot_timing.py",
    "content": "\"\"\"Shared timing and color helpers for startup visual effects.\"\"\"\n\nimport math\n\n\ndef settle_curve(progress: float, sharpness: float = 3.0) -> float:\n    \"\"\"Return noise amount in range 1..0 for normalized progress 0..1.\"\"\"\n    t = max(0.0, min(1.0, progress))\n    return math.exp(-sharpness * t)\n\n\ndef warm_gold_from_white(progress: float) -> tuple[int, int, int]:\n    \"\"\"Interpolate from white to warm gold for progress 0..1.\"\"\"\n    t = max(0.0, min(1.0, progress))\n    return 255, int(255 - 55 * t), int(255 - 175 * t)\n"
  },
  {
    "path": "agent/utils/braille.py",
    "content": "\"\"\"Braille-character canvas for high-resolution terminal graphics.\n\nEach terminal cell maps to a 2x4 dot grid using Unicode braille characters\n(U+2800–U+28FF), giving 2× horizontal and 4× vertical resolution.\n\"\"\"\n\n# Braille dot positions:  (0,0) (1,0)    dots 1,4\n#                         (0,1) (1,1)    dots 2,5\n#                         (0,2) (1,2)    dots 3,6\n#                         (0,3) (1,3)    dots 7,8\n_DOT_MAP = (\n    (0x01, 0x08),\n    (0x02, 0x10),\n    (0x04, 0x20),\n    (0x40, 0x80),\n)\n\n\nclass BrailleCanvas:\n    \"\"\"A pixel canvas that renders to braille characters.\"\"\"\n\n    def __init__(self, term_width: int, term_height: int):\n        self.term_width = term_width\n        self.term_height = term_height\n        self.pixel_width = term_width * 2\n        self.pixel_height = term_height * 4\n        self._buf = bytearray(term_width * term_height)\n\n    def clear(self) -> None:\n        for i in range(len(self._buf)):\n            self._buf[i] = 0\n\n    def set_pixel(self, x: int, y: int) -> None:\n        if 0 <= x < self.pixel_width and 0 <= y < self.pixel_height:\n            cx, rx = divmod(x, 2)\n            cy, ry = divmod(y, 4)\n            self._buf[cy * self.term_width + cx] |= _DOT_MAP[ry][rx]\n\n    def render(self) -> list[str]:\n        lines = []\n        for row in range(self.term_height):\n            offset = row * self.term_width\n            line = \"\".join(\n                chr(0x2800 + self._buf[offset + col])\n                for col in range(self.term_width)\n            )\n            lines.append(line)\n        return lines\n\n\n# ── Bitmap font (5×7 uppercase + digits) ──────────────────────────────\n\n_FONT: dict[str, list[str]] = {}\n\ndef _define_font() -> None:\n    \"\"\"Define a simple 5×7 bitmap font for uppercase ASCII.\"\"\"\n    glyphs = {\n        \"A\": [\" ## \", \"#  #\", \"#  #\", \"####\", \"#  #\", \"#  #\", \"#  #\"],\n        \"B\": [\"### \", \"#  #\", \"#  #\", \"### \", \"#  #\", \"#  #\", \"### \"],\n        \"C\": [\" ## \", \"#  #\", \"#   \", \"#   \", \"#   \", \"#  #\", \" ## \"],\n        \"D\": [\"### \", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \"### \"],\n        \"E\": [\"####\", \"#   \", \"#   \", \"### \", \"#   \", \"#   \", \"####\"],\n        \"F\": [\"####\", \"#   \", \"#   \", \"### \", \"#   \", \"#   \", \"#   \"],\n        \"G\": [\" ## \", \"#  #\", \"#   \", \"# ##\", \"#  #\", \"#  #\", \" ###\"],\n        \"H\": [\"#  #\", \"#  #\", \"#  #\", \"####\", \"#  #\", \"#  #\", \"#  #\"],\n        \"I\": [\"###\", \" # \", \" # \", \" # \", \" # \", \" # \", \"###\"],\n        \"J\": [\"  ##\", \"  # \", \"  # \", \"  # \", \"  # \", \"# # \", \" #  \"],\n        \"K\": [\"#  #\", \"# # \", \"##  \", \"##  \", \"# # \", \"#  #\", \"#  #\"],\n        \"L\": [\"#   \", \"#   \", \"#   \", \"#   \", \"#   \", \"#   \", \"####\"],\n        \"M\": [\"#   #\", \"## ##\", \"# # #\", \"# # #\", \"#   #\", \"#   #\", \"#   #\"],\n        \"N\": [\"#  #\", \"## #\", \"## #\", \"# ##\", \"# ##\", \"#  #\", \"#  #\"],\n        \"O\": [\" ## \", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \" ## \"],\n        \"P\": [\"### \", \"#  #\", \"#  #\", \"### \", \"#   \", \"#   \", \"#   \"],\n        \"Q\": [\" ## \", \"#  #\", \"#  #\", \"#  #\", \"# ##\", \"#  #\", \" ## \"],\n        \"R\": [\"### \", \"#  #\", \"#  #\", \"### \", \"# # \", \"#  #\", \"#  #\"],\n        \"S\": [\" ## \", \"#  #\", \"#   \", \" ## \", \"   #\", \"#  #\", \" ## \"],\n        \"T\": [\"#####\", \"  #  \", \"  #  \", \"  #  \", \"  #  \", \"  #  \", \"  #  \"],\n        \"U\": [\"#  #\", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \" ## \"],\n        \"V\": [\"#   #\", \"#   #\", \"#   #\", \" # # \", \" # # \", \"  #  \", \"  #  \"],\n        \"W\": [\"#   #\", \"#   #\", \"#   #\", \"# # #\", \"# # #\", \"## ##\", \"#   #\"],\n        \"X\": [\"#  #\", \"#  #\", \" ## \", \" ## \", \" ## \", \"#  #\", \"#  #\"],\n        \"Y\": [\"#   #\", \"#   #\", \" # # \", \"  #  \", \"  #  \", \"  #  \", \"  #  \"],\n        \"Z\": [\"####\", \"   #\", \"  # \", \" #  \", \"#   \", \"#   \", \"####\"],\n        \" \": [\"  \", \"  \", \"  \", \"  \", \"  \", \"  \", \"  \"],\n        \"0\": [\" ## \", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \"#  #\", \" ## \"],\n        \"1\": [\" # \", \"## \", \" # \", \" # \", \" # \", \" # \", \"###\"],\n        \"2\": [\" ## \", \"#  #\", \"   #\", \"  # \", \" #  \", \"#   \", \"####\"],\n        \"3\": [\" ## \", \"#  #\", \"   #\", \" ## \", \"   #\", \"#  #\", \" ## \"],\n        \"4\": [\"#  #\", \"#  #\", \"#  #\", \"####\", \"   #\", \"   #\", \"   #\"],\n        \"5\": [\"####\", \"#   \", \"### \", \"   #\", \"   #\", \"#  #\", \" ## \"],\n        \"6\": [\" ## \", \"#   \", \"### \", \"#  #\", \"#  #\", \"#  #\", \" ## \"],\n        \"7\": [\"####\", \"   #\", \"  # \", \" #  \", \" #  \", \" #  \", \" #  \"],\n        \"8\": [\" ## \", \"#  #\", \"#  #\", \" ## \", \"#  #\", \"#  #\", \" ## \"],\n        \"9\": [\" ## \", \"#  #\", \"#  #\", \" ###\", \"   #\", \"   #\", \" ## \"],\n    }\n    _FONT.update(glyphs)\n\n\n_define_font()\n\n\ndef text_to_pixels(text: str, scale: int = 1) -> list[tuple[int, int]]:\n    \"\"\"Convert text string to a list of (x, y) pixel positions using bitmap font.\"\"\"\n    pixels = []\n    cursor_x = 0\n    for ch in text.upper():\n        glyph = _FONT.get(ch)\n        if glyph is None:\n            cursor_x += 4 * scale\n            continue\n        for row_idx, row in enumerate(glyph):\n            for col_idx, cell in enumerate(row):\n                if cell == \"#\":\n                    for sy in range(scale):\n                        for sx in range(scale):\n                            pixels.append((cursor_x + col_idx * scale + sx,\n                                           row_idx * scale + sy))\n        glyph_width = max(len(r) for r in glyph)\n        cursor_x += (glyph_width + 1) * scale\n    return pixels\n"
  },
  {
    "path": "agent/utils/crt_boot.py",
    "content": "\"\"\"CRT / glitch boot sequence effect for CLI startup.\n\nSimulates an old CRT terminal booting up: text appearing character by character\nwith noise artifacts, then settling into a clean display.\n\"\"\"\n\nimport random\nimport time\n\nfrom rich.console import Console\nfrom rich.text import Text\nfrom rich.live import Live\n\nfrom agent.utils.boot_timing import settle_curve\n\n\ndef _glitch_text(text: str, intensity: float, rng: random.Random) -> str:\n    \"\"\"Add random glitch characters to text.\"\"\"\n    glitch_chars = \"█▓▒░┃┫┣╋╏╎─━┅┄\"\n    result = list(text)\n    for i in range(len(result)):\n        if rng.random() < intensity:\n            result[i] = rng.choice(glitch_chars)\n    return \"\".join(result)\n\n\ndef run_boot_sequence(console: Console, boot_lines: list[tuple[str, str]]) -> None:\n    \"\"\"Run the CRT boot sequence effect.\n\n    Args:\n        console: Rich console instance.\n        boot_lines: List of (text, rich_style) tuples to display.\n    \"\"\"\n    term_height = min(console.height - 2, 40)\n    rng = random.Random(42)\n\n    with Live(console=console, refresh_per_second=30, transient=True) as live:\n        displayed_lines: list[tuple[str, str]] = []\n\n        for line_text, line_style in boot_lines:\n            if not line_text:\n                displayed_lines.append((\"\", \"\"))\n                continue\n\n            line_len = max(1, len(line_text))\n            # Type out each character\n            for char_idx in range(len(line_text) + 1):\n                result = Text()\n                progress = char_idx / line_len\n                noise = settle_curve(progress)\n                prev_glitch_chance = 0.01 + 0.06 * noise\n                prev_glitch_intensity = 0.02 + 0.12 * noise\n                scanline_chance = 0.005 + 0.03 * noise\n\n                # Render previously completed lines\n                for prev_text, prev_style in displayed_lines:\n                    if rng.random() < prev_glitch_chance:\n                        result.append(_glitch_text(prev_text, prev_glitch_intensity, rng), style=prev_style)\n                    else:\n                        result.append(prev_text, style=prev_style)\n                    result.append(\"\\n\")\n\n                # Current line being typed\n                typed = line_text[:char_idx]\n                cursor = \"█\" if char_idx < len(line_text) else \"\"\n\n                # Noise after cursor\n                noise_tail = \"\"\n                if char_idx < len(line_text):\n                    noise_len = rng.randint(0, int(1 + 5 * noise))\n                    noise_tail = \"\".join(rng.choice(\"░▒▓\") for _ in range(noise_len))\n\n                result.append(typed, style=line_style)\n                result.append(cursor, style=\"bold rgb(255,200,80)\")\n                result.append(noise_tail, style=\"dim rgb(180,140,40)\")\n                result.append(\"\\n\")\n\n                # Faint scanlines in remaining space\n                remaining = term_height - len(displayed_lines) - 2\n                for _ in range(max(0, remaining)):\n                    if rng.random() < scanline_chance:\n                        scan_len = rng.randint(5, 30)\n                        result.append(\"─\" * scan_len, style=\"dim rgb(180,140,40)\")\n                    result.append(\"\\n\")\n\n                live.update(result)\n\n                # Variable typing speed\n                if line_text[char_idx - 1:char_idx] in \" .\":\n                    time.sleep(0.025)\n                else:\n                    time.sleep(0.010)\n\n            displayed_lines.append((line_text, line_style))\n            time.sleep(0.06)\n\n        # Hold with blinking cursor\n        for frame in range(20):\n            result = Text()\n            for prev_text, prev_style in displayed_lines:\n                result.append(prev_text, style=prev_style)\n                result.append(\"\\n\")\n            if frame % 8 < 4:\n                result.append(\"█\", style=\"rgb(255,200,80)\")\n            live.update(result)\n            time.sleep(0.05)\n\n    # Print final clean frame\n    final = Text()\n    for prev_text, prev_style in displayed_lines:\n        final.append(prev_text, style=prev_style)\n        final.append(\"\\n\")\n    console.print(final)\n"
  },
  {
    "path": "agent/utils/particle_logo.py",
    "content": "\"\"\"Particle coalesce effect for the HUGGING FACE ML INTERN logo.\n\nRandom particles swirl in from the edges, converge to form the text\n\"HUGGING FACE / ML INTERN\", hold briefly, then the final frame is printed.\nRendered with braille characters for high detail.\n\nBased on Leandro's particle_coalesce.py demo.\n\"\"\"\n\nimport math\nimport random\nimport time\n\nfrom rich.console import Console\nfrom rich.text import Text\nfrom rich.align import Align\nfrom rich.live import Live\n\nfrom agent.utils.braille import BrailleCanvas, text_to_pixels\nfrom agent.utils.boot_timing import settle_curve, warm_gold_from_white\n\n\nclass Particle:\n    __slots__ = (\"x\", \"y\", \"target_x\", \"target_y\", \"vx\", \"vy\", \"phase\", \"delay\")\n\n    def __init__(self, x: float, y: float, target_x: float, target_y: float, delay: float = 0):\n        self.x = x\n        self.y = y\n        self.target_x = target_x\n        self.target_y = target_y\n        self.vx = 0.0\n        self.vy = 0.0\n        self.phase = random.uniform(0, math.pi * 2)\n        self.delay = delay\n\n    def update_converge(self, t: float, strength: float = 0.08, damping: float = 0.92):\n        \"\"\"Move toward target with spring-like physics.\"\"\"\n        if t < self.delay:\n            # Still in swirl phase\n            self.x += self.vx\n            self.y += self.vy\n            self.vx *= 0.99\n            self.vy *= 0.99\n            # Gentle spiral\n            angle = self.phase + t * 2\n            self.vx += math.cos(angle) * 0.3\n            self.vy += math.sin(angle) * 0.3\n            return\n\n        # Spring toward target\n        dx = self.target_x - self.x\n        dy = self.target_y - self.y\n        self.vx += dx * strength\n        self.vy += dy * strength\n        self.vx *= damping\n        self.vy *= damping\n        self.x += self.vx\n        self.y += self.vy\n\n    @property\n    def at_target(self) -> bool:\n        return abs(self.x - self.target_x) < 1.5 and abs(self.y - self.target_y) < 1.5\n\n\ndef run_particle_logo(console: Console, hold_seconds: float = 1.5) -> None:\n    \"\"\"Run the particle coalesce effect.\"\"\"\n    term_width = min(console.width, 120)\n    term_height = min(console.height - 4, 35)\n\n    canvas = BrailleCanvas(term_width, term_height)\n\n    # Get target positions from text\n    text_pixels_line1 = text_to_pixels(\"HUGGING FACE\", scale=2)\n    text_pixels_line2 = text_to_pixels(\"ML INTERN\", scale=2)\n\n    # Calculate dimensions for centering\n    def get_bounds(pixels):\n        if not pixels:\n            return 0, 0, 0, 0\n        xs = [p[0] for p in pixels]\n        ys = [p[1] for p in pixels]\n        return min(xs), max(xs), min(ys), max(ys)\n\n    min_x1, max_x1, min_y1, max_y1 = get_bounds(text_pixels_line1)\n    min_x2, max_x2, min_y2, max_y2 = get_bounds(text_pixels_line2)\n\n    w1, h1 = max_x1 - min_x1 + 1, max_y1 - min_y1 + 1\n    w2, h2 = max_x2 - min_x2 + 1, max_y2 - min_y2 + 1\n\n    total_h = h1 + 6 + h2  # gap between lines\n    start_y = (canvas.pixel_height - total_h) // 2\n\n    # Center line 1\n    offset_x1 = (canvas.pixel_width - w1) // 2 - min_x1\n    offset_y1 = start_y - min_y1\n    targets_1 = [(p[0] + offset_x1, p[1] + offset_y1) for p in text_pixels_line1]\n\n    # Center line 2\n    offset_x2 = (canvas.pixel_width - w2) // 2 - min_x2\n    offset_y2 = start_y + h1 + 6 - min_y2\n    targets_2 = [(p[0] + offset_x2, p[1] + offset_y2) for p in text_pixels_line2]\n\n    all_targets = targets_1 + targets_2\n\n    # Subsample for performance — take every Nth pixel\n    step = max(1, len(all_targets) // 1500)\n    sampled_targets = all_targets[::step]\n\n    # Create particles at random edge positions\n    rng = random.Random(42)\n    particles = []\n    pw, ph = canvas.pixel_width, canvas.pixel_height\n\n    for i, (tx, ty) in enumerate(sampled_targets):\n        # Spawn from random edge\n        side = rng.choice([\"top\", \"bottom\", \"left\", \"right\"])\n        if side == \"top\":\n            sx, sy = rng.uniform(0, pw), rng.uniform(-20, -5)\n        elif side == \"bottom\":\n            sx, sy = rng.uniform(0, pw), rng.uniform(ph + 5, ph + 20)\n        elif side == \"left\":\n            sx, sy = rng.uniform(-20, -5), rng.uniform(0, ph)\n        else:\n            sx, sy = rng.uniform(pw + 5, pw + 20), rng.uniform(0, ph)\n\n        delay = rng.uniform(0, 0.4)  # staggered start\n        p = Particle(sx, sy, tx, ty, delay=delay)\n        # Initial velocity — gentle swirl\n        angle = math.atan2(ph / 2 - sy, pw / 2 - sx) + rng.gauss(0, 0.8)\n        speed = rng.uniform(1.0, 2.5)\n        p.vx = math.cos(angle) * speed\n        p.vy = math.sin(angle) * speed\n        particles.append(p)\n\n    # Also add some extra ambient particles that never converge\n    ambient = []\n    for _ in range(200):\n        ax = rng.uniform(0, pw)\n        ay = rng.uniform(0, ph)\n        ap = Particle(ax, ay, ax, ay)\n        ap.vx = rng.gauss(0, 1)\n        ap.vy = rng.gauss(0, 1)\n        ambient.append(ap)\n\n    # Timing: 1s converge + 2s hold = 3s total\n    fps = 24\n    converge_frames = int(fps * 0.9)\n    hold_frames = int(fps * hold_seconds)\n    total_frames = converge_frames + hold_frames\n\n    with Live(console=console, refresh_per_second=fps, transient=True) as live:\n        for frame in range(total_frames):\n            canvas.clear()\n            t = frame * 0.03\n\n            # Update ambient particles (always drifting)\n            for ap in ambient:\n                ap.x += ap.vx + math.sin(t + ap.phase) * 0.5\n                ap.y += ap.vy + math.cos(t + ap.phase * 1.3) * 0.5\n                # Wrap around\n                ap.x = ap.x % pw\n                ap.y = ap.y % ph\n\n                # Fade out ambient during hold phase\n                if frame < converge_frames:\n                    alpha = 0.3 + 0.2 * math.sin(t * 2 + ap.phase)\n                else:\n                    fade = (frame - converge_frames) / hold_frames\n                    alpha = (0.3 + 0.2 * math.sin(t * 2 + ap.phase)) * (1 - fade)\n                if alpha > 0.25:\n                    canvas.set_pixel(int(ap.x), int(ap.y))\n\n            if frame < converge_frames:\n                # Converge phase\n                progress = frame / converge_frames\n                noise = settle_curve(progress)\n                for p in particles:\n                    p.update_converge(t, strength=0.06, damping=0.90)\n                    canvas.set_pixel(int(p.x), int(p.y))\n\n                    # Trail effect\n                    trail_scale = 0.2 + 0.5 * noise\n                    trail_x = int(p.x - p.vx * trail_scale)\n                    trail_y = int(p.y - p.vy * trail_scale)\n                    canvas.set_pixel(trail_x, trail_y)\n\n                # Color transitions from white to warm gold\n                r, g, b = warm_gold_from_white(progress)\n            else:\n                # Hold phase — settle into solid logo\n                settle_t = (frame - converge_frames) / hold_frames\n                for p in particles:\n                    # Jitter decays to zero\n                    jitter = (1 - settle_t) * 0.7\n                    jx = p.target_x + math.sin(t * 3 + p.phase) * jitter\n                    jy = p.target_y + math.cos(t * 3 + p.phase * 1.5) * jitter\n                    canvas.set_pixel(int(jx), int(jy))\n                    canvas.set_pixel(int(p.target_x), int(p.target_y))\n\n                r, g, b = 255, 200, 80\n\n            # Render with color\n            lines = canvas.render()\n            result = Text()\n            for line in lines:\n                for ch in line:\n                    if ch == chr(0x2800):\n                        result.append(ch)\n                    else:\n                        result.append(ch, style=f\"rgb({r},{g},{b})\")\n                result.append(\"\\n\")\n\n            live.update(Align.center(result))\n            time.sleep(1.0 / fps)\n\n    # Print final settled frame\n    canvas.clear()\n    for p in particles:\n        canvas.set_pixel(int(p.target_x), int(p.target_y))\n    final = Text()\n    for line in canvas.render():\n        for ch in line:\n            if ch == chr(0x2800):\n                final.append(ch)\n            else:\n                final.append(ch, style=\"rgb(255,200,80)\")\n        final.append(\"\\n\")\n    console.print(Align.center(final))\n"
  },
  {
    "path": "agent/utils/reliability_checks.py",
    "content": "\"\"\"Reliability checks for job submissions and other operations\"\"\"\n\n\ndef check_training_script_save_pattern(script: str) -> str | None:\n    \"\"\"Check if a training script properly saves models.\"\"\"\n    has_from_pretrained = \"from_pretrained\" in script\n    has_push_to_hub = \"push_to_hub\" in script\n\n    if has_from_pretrained and not has_push_to_hub:\n        return \"\\n\\033[91mWARNING: No model save detected in this script. Ensure this is intentional.\\033[0m\"\n    elif has_from_pretrained and has_push_to_hub:\n        return \"\\n\\033[92mModel will be pushed to hub after training.\\033[0m\"\n\n    return None\n"
  },
  {
    "path": "agent/utils/terminal_display.py",
    "content": "\"\"\"\nTerminal display utilities — rich-powered CLI formatting.\n\"\"\"\n\nimport re\n\nfrom rich.console import Console\nfrom rich.markdown import Heading, Markdown\nfrom rich.panel import Panel\nfrom rich.theme import Theme\n\n\nclass _LeftHeading(Heading):\n    \"\"\"Rich's default Markdown renders h1/h2 centered via Align.center.\n    Yield the styled text directly so headings stay left-aligned.\"\"\"\n\n    def __rich_console__(self, console, options):\n        self.text.justify = \"left\"\n        yield self.text\n\n\nMarkdown.elements[\"heading_open\"] = _LeftHeading\n\n\n_ANSI_RE = re.compile(r\"\\x1b\\[[0-9;]*[a-zA-Z]\")\n\n\ndef _clip_to_width(s: str, width: int) -> str:\n    \"\"\"Truncate a string to `width` visible columns, preserving ANSI styles.\n\n    Needed for the sub-agent live redraw: cursor-up-and-erase assumes one\n    logical line == one terminal row. If a line wraps, cursor-up undershoots\n    and the next redraw corrupts the display. Truncating prevents wrap.\n    \"\"\"\n    if width <= 0:\n        return s\n    out: list[str] = []\n    visible = 0\n    i = 0\n    # Reserve 1 char for the trailing ellipsis\n    limit = width - 1\n    truncated = False\n    while i < len(s):\n        m = _ANSI_RE.match(s, i)\n        if m:\n            out.append(m.group())\n            i = m.end()\n            continue\n        if visible >= limit:\n            truncated = True\n            break\n        out.append(s[i])\n        visible += 1\n        i += 1\n    if truncated:\n        # Strip styles (so ellipsis isn't left hanging inside a style run)\n        out.append(\"\\033[0m…\")\n    return \"\".join(out)\n\n_THEME = Theme({\n    \"tool.name\": \"bold rgb(255,200,80)\",\n    \"tool.args\": \"dim\",\n    \"tool.ok\": \"dim green\",\n    \"tool.fail\": \"dim red\",\n    \"info\": \"dim\",\n    \"muted\": \"dim\",\n    # Markdown emphasis colors\n    \"markdown.strong\": \"bold rgb(255,200,80)\",\n    \"markdown.emphasis\": \"italic rgb(180,140,40)\",\n    \"markdown.code\": \"rgb(120,220,255)\",\n    \"markdown.code_block\": \"rgb(120,220,255)\",\n    \"markdown.link\": \"underline rgb(90,180,255)\",\n    \"markdown.h1\": \"bold rgb(255,200,80)\",\n    \"markdown.h2\": \"bold rgb(240,180,95)\",\n    \"markdown.h3\": \"bold rgb(220,165,100)\",\n})\n\n_console = Console(theme=_THEME, highlight=False)\n\n# Indent prefix for all agent output (aligns under the `>` prompt)\n_I = \"  \"\n\n\ndef get_console() -> Console:\n    return _console\n\n\n# ── Banner ─────────────────────────────────────────────────────────────\n\ndef print_banner(model: str | None = None, hf_user: str | None = None) -> None:\n    \"\"\"Print particle logo then CRT boot sequence with system info.\"\"\"\n    from agent.utils.particle_logo import run_particle_logo\n    from agent.utils.crt_boot import run_boot_sequence\n\n    # Particle coalesce logo — 1.5s converge, 2s hold\n    run_particle_logo(_console, hold_seconds=2.0)\n\n    # Clear screen for CRT boot — starts from top\n    _console.file.write(\"\\033[2J\\033[H\")\n    _console.file.flush()\n\n    model_label = model or \"bedrock/us.anthropic.claude-opus-4-6-v1\"\n    user_label = hf_user or \"not logged in\"\n\n    # Warm gold palette matching the shimmer highlight (255, 200, 80)\n    gold = \"rgb(255,200,80)\"\n    dim_gold = \"rgb(180,140,40)\"\n\n    boot_lines = [\n        (f\"{_I}Initializing agent runtime...\", gold),\n        (f\"{_I}  User: {user_label}\", dim_gold),\n        (f\"{_I}  Model: {model_label}\", dim_gold),\n        (f\"{_I}  Tools: loading...\", dim_gold),\n        (\"\", \"\"),\n        (f\"{_I}/help for commands · /model to switch · /quit to exit\", gold),\n    ]\n\n    run_boot_sequence(_console, boot_lines)\n\n\n# ── Init progress ──────────────────────────────────────────────────────\n\ndef print_init_done(tool_count: int = 0) -> None:\n    import time\n    f = _console.file\n    # Overwrite the \"Tools: loading...\" line with actual count\n    f.write(f\"\\033[A\\033[A\\033[A\\033[K\")  # Move up 3 lines (blank + help + blank) then up to tools line\n    f.write(f\"\\033[A\\033[K\")\n    gold = \"\\033[38;2;180;140;40m\"\n    reset = \"\\033[0m\"\n    tool_text = f\"{_I}  Tools: {tool_count} loaded\"\n    for ch in tool_text:\n        f.write(f\"{gold}{ch}{reset}\")\n        f.flush()\n        time.sleep(0.012)\n    f.write(\"\\n\\n\")\n    # Reprint the help line\n    f.write(f\"{_I}\\033[38;2;255;200;80m/help for commands · /model to switch · /quit to exit{reset}\\n\\n\")\n    # Ready message — minimal padding\n    f.write(f\"{_I}\\033[38;2;255;200;80mReady. Let's build something impressive.{reset}\\n\")\n    f.flush()\n\n\n# ── Tool calls ─────────────────────────────────────────────────────────\n\ndef print_tool_call(tool_name: str, args_preview: str) -> None:\n    import time\n    f = _console.file\n    # CRT-style: type out tool name in HF yellow\n    gold = \"\\033[38;2;255;200;80m\"\n    reset = \"\\033[0m\"\n    f.write(f\"{_I}{gold}▸ \")\n    for ch in tool_name:\n        f.write(ch)\n        f.flush()\n        time.sleep(0.015)\n    f.write(f\"{reset}  \\033[2m{args_preview}{reset}\\n\")\n    f.flush()\n\n\ndef print_tool_output(output: str, success: bool, truncate: bool = True) -> None:\n    if truncate:\n        output = _truncate(output, max_lines=10)\n    style = \"tool.ok\" if success else \"tool.fail\"\n    # Indent each line of tool output\n    indented = \"\\n\".join(f\"{_I}  {line}\" for line in output.split(\"\\n\"))\n    _console.print(f\"[{style}]{indented}[/{style}]\")\n\n\nclass SubAgentDisplayManager:\n    \"\"\"Manages multiple concurrent sub-agent displays.\n\n    Each agent gets its own stats and rolling tool-call log.\n    All agents are rendered together so terminal escape-code\n    erase/redraw stays consistent.\n    \"\"\"\n\n    _MAX_VISIBLE = 4  # tool-call lines shown per agent\n\n    def __init__(self):\n        self._agents: dict[str, dict] = {}  # agent_id -> state dict\n        self._lines_on_screen = 0\n        self._ticker_task = None\n\n    def start(self, agent_id: str, label: str = \"research\") -> None:\n        import asyncio\n        import time\n        self._agents[agent_id] = {\n            \"label\": label,\n            \"calls\": [],\n            \"tool_count\": 0,\n            \"token_count\": 0,\n            \"start_time\": time.monotonic(),\n        }\n        if not self._ticker_task:\n            self._ticker_task = asyncio.ensure_future(self._tick())\n        self._redraw()\n\n    def set_tokens(self, agent_id: str, tokens: int) -> None:\n        if agent_id in self._agents:\n            self._agents[agent_id][\"token_count\"] = tokens\n\n    def set_tool_count(self, agent_id: str, count: int) -> None:\n        if agent_id in self._agents:\n            self._agents[agent_id][\"tool_count\"] = count\n\n    def add_call(self, agent_id: str, tool_desc: str) -> None:\n        if agent_id in self._agents:\n            self._agents[agent_id][\"calls\"].append(tool_desc)\n            self._redraw()\n\n    def clear(self, agent_id: str) -> None:\n        # On completion: erase the live region, freeze a single-line summary\n        # for this agent (\"✓ research: … (stats)\") above the live region so\n        # the user sees each sub-agent finish cleanly without the tool-call\n        # noise, then redraw remaining live agents.\n        agent = self._agents.pop(agent_id, None)\n        self._erase()\n        if agent is not None:\n            width = max(10, _console.width)\n            line = _clip_to_width(self._render_completion_line(agent), width)\n            _console.file.write(line + \"\\n\")\n            _console.file.flush()\n        self._lines_on_screen = 0\n        if not self._agents:\n            if self._ticker_task:\n                self._ticker_task.cancel()\n                self._ticker_task = None\n        else:\n            self._redraw()\n\n    @staticmethod\n    def _render_completion_line(agent: dict) -> str:\n        stats = SubAgentDisplayManager._format_stats(agent)\n        label = agent[\"label\"]\n        # dim green check + dim label; stats in parens\n        line = f\"{_I}\\033[38;2;120;200;140m✓\\033[0m \\033[2m{label}\\033[0m\"\n        if stats:\n            line += f\"  \\033[2m({stats})\\033[0m\"\n        return line\n\n    async def _tick(self) -> None:\n        import asyncio\n        try:\n            while True:\n                await asyncio.sleep(1.0)\n                if self._agents:\n                    self._redraw()\n        except asyncio.CancelledError:\n            pass\n\n    @staticmethod\n    def _format_stats(agent: dict) -> str:\n        import time\n        start = agent[\"start_time\"]\n        if start is None:\n            return \"\"\n        elapsed = time.monotonic() - start\n        if elapsed < 60:\n            time_str = f\"{elapsed:.0f}s\"\n        else:\n            time_str = f\"{elapsed / 60:.0f}m {elapsed % 60:.0f}s\"\n        tok = agent[\"token_count\"]\n        tok_str = f\"{tok / 1000:.1f}k\" if tok >= 1000 else str(tok)\n        return f\"{agent['tool_count']} tool uses · {tok_str} tokens · {time_str}\"\n\n    def _erase(self) -> None:\n        if self._lines_on_screen > 0:\n            f = _console.file\n            for _ in range(self._lines_on_screen):\n                f.write(\"\\033[A\\033[K\")\n            f.flush()\n\n    def _render_agent_lines(self, agent: dict, compact: bool = False) -> list[str]:\n        \"\"\"Render one agent's block.\n\n        compact=True → single line (label + stats + most-recent tool name);\n        compact=False → header + up to _MAX_VISIBLE rolling tool-call lines.\n        We use compact mode when multiple agents are live so the total live\n        region stays small enough to fit on one screen. Otherwise cursor-up\n        can't reach lines that have scrolled into scrollback, and every\n        redraw pollutes history with a stale copy.\n        \"\"\"\n        stats = self._format_stats(agent)\n        label = agent[\"label\"]\n        header = f\"{_I}\\033[38;2;255;200;80m▸ {label}\\033[0m\"\n        if stats:\n            header += f\"  \\033[2m({stats})\\033[0m\"\n        if compact:\n            latest = agent[\"calls\"][-1] if agent[\"calls\"] else \"\"\n            if latest:\n                # Strip long json tails for the inline view\n                short = latest.split(\"  \")[0] if \"  \" in latest else latest\n                header += f\" \\033[2m·\\033[0m \\033[2m{short}\\033[0m\"\n            return [header]\n        lines = [header]\n        visible = agent[\"calls\"][-self._MAX_VISIBLE:]\n        for desc in visible:\n            lines.append(f\"{_I}  \\033[2m{desc}\\033[0m\")\n        return lines\n\n    def _redraw(self) -> None:\n        f = _console.file\n        self._erase()\n        compact = len(self._agents) > 1\n        width = max(10, _console.width)\n        lines: list[str] = []\n        for agent in self._agents.values():\n            for ln in self._render_agent_lines(agent, compact=compact):\n                lines.append(_clip_to_width(ln, width))\n        for line in lines:\n            f.write(line + \"\\n\")\n        f.flush()\n        self._lines_on_screen = len(lines)\n\n\n_subagent_display = SubAgentDisplayManager()\n\n\ndef print_tool_log(tool: str, log: str, agent_id: str = \"\", label: str = \"\") -> None:\n    \"\"\"Handle tool log events — sub-agent calls get the rolling display.\"\"\"\n    if tool == \"research\":\n        aid = agent_id or \"research\"\n        if log == \"Starting research sub-agent...\":\n            _subagent_display.start(aid, label or \"research\")\n        elif log == \"Research complete.\":\n            _subagent_display.clear(aid)\n        elif log.startswith(\"tokens:\"):\n            _subagent_display.set_tokens(aid, int(log[7:]))\n        elif log.startswith(\"tools:\"):\n            _subagent_display.set_tool_count(aid, int(log[6:]))\n        else:\n            _subagent_display.add_call(aid, log)\n    else:\n        _console.print(f\"{_I}[dim]{tool}: {log}[/dim]\")\n\n\n# ── Messages ───────────────────────────────────────────────────────────\n\nasync def print_markdown(\n    text: str,\n    cancel_event: \"asyncio.Event | None\" = None,\n    instant: bool = False,\n) -> None:\n    import asyncio\n    import io, random\n    from rich.padding import Padding\n\n    _console.print()\n\n    # Render markdown to a string buffer so we can type it out\n    buf = io.StringIO()\n    # Important: StringIO is not a TTY, so Rich would normally strip styles.\n    # Force terminal rendering so ANSI style codes are preserved for typewriter output.\n    buf_console = Console(\n        file=buf,\n        width=_console.width,\n        highlight=False,\n        theme=_THEME,\n        force_terminal=True,\n        color_system=_console.color_system or \"truecolor\",\n    )\n    buf_console.print(Padding(Markdown(text), (0, 0, 0, 2)))\n    rendered = buf.getvalue()\n\n    # Strip trailing whitespace from each line so we don't type across the full width\n    lines = rendered.split(\"\\n\")\n    rendered = \"\\n\".join(line.rstrip() for line in lines)\n\n    f = _console.file\n\n    # Headless / non-interactive: dump the rendered markdown in one write.\n    if instant:\n        f.write(rendered)\n        f.write(\"\\n\")\n        f.flush()\n        return\n\n    # CRT typewriter effect — async so the event loop can service signal\n    # handlers (Ctrl+C during streaming) between characters. If cancelled\n    # mid-type, stop cleanly: write an ANSI reset so half-open color state\n    # doesn't bleed onto the \"interrupted\" line, and return.\n    rng = random.Random(42)\n    cancelled = False\n    for ch in rendered:\n        if cancel_event is not None and cancel_event.is_set():\n            cancelled = True\n            break\n        f.write(ch)\n        f.flush()\n        if ch == \"\\n\":\n            await asyncio.sleep(0.002)\n        elif ch == \" \":\n            await asyncio.sleep(0.002)\n        elif rng.random() < 0.03:\n            await asyncio.sleep(0.015)\n        else:\n            await asyncio.sleep(0.004)\n    f.write(\"\\033[0m\\n\" if cancelled else \"\\n\")\n    f.flush()\n\n\ndef print_error(message: str) -> None:\n    _console.print(f\"\\n{_I}[bold red]Error:[/bold red] {message}\")\n\n\ndef print_turn_complete() -> None:\n    pass  # no separator — clean output\n\n\ndef print_interrupted() -> None:\n    _console.print(f\"\\n{_I}[dim italic]interrupted[/dim italic]\")\n\n\ndef print_compacted(old_tokens: int, new_tokens: int) -> None:\n    _console.print(f\"{_I}[dim]context compacted: {old_tokens:,} → {new_tokens:,} tokens[/dim]\")\n\n\n# ── Approval ───────────────────────────────────────────────────────────\n\ndef print_approval_header(count: int) -> None:\n    label = f\"Approval required — {count} item{'s' if count != 1 else ''}\"\n    _console.print()\n    _console.print(f\"{_I}\", Panel(f\"[bold yellow]{label}[/bold yellow]\", border_style=\"yellow\", expand=False))\n\n\ndef print_approval_item(index: int, total: int, tool_name: str, operation: str) -> None:\n    _console.print(f\"\\n{_I}[bold]\\\\[{index}/{total}][/bold]  [tool.name]{tool_name}[/tool.name]  {operation}\")\n\n\ndef print_yolo_approve(count: int) -> None:\n    _console.print(f\"{_I}[bold yellow]yolo →[/bold yellow] auto-approved {count} item(s)\")\n\n\n# ── Help ───────────────────────────────────────────────────────────────\n\nHELP_TEXT = f\"\"\"\\\n{_I}[bold]Commands[/bold]\n{_I}  [cyan]/help[/cyan]            Show this help\n{_I}  [cyan]/undo[/cyan]            Undo last turn\n{_I}  [cyan]/compact[/cyan]         Compact context window\n{_I}  [cyan]/model[/cyan] [id]      Show available models or switch\n{_I}  [cyan]/effort[/cyan] [level]  Reasoning effort (minimal|low|medium|high|xhigh|max|off)\n{_I}  [cyan]/yolo[/cyan]            Toggle auto-approve mode\n{_I}  [cyan]/status[/cyan]          Current model & turn count\n{_I}  [cyan]/quit[/cyan]            Exit\"\"\"\n\n\ndef print_help() -> None:\n    _console.print()\n    _console.print(HELP_TEXT)\n    _console.print()\n\n\n# ── Plan display ───────────────────────────────────────────────────────\n\ndef format_plan_display() -> str:\n    \"\"\"Format the current plan for display.\"\"\"\n    from agent.tools.plan_tool import get_current_plan\n\n    plan = get_current_plan()\n    if not plan:\n        return \"\"\n\n    completed = [t for t in plan if t[\"status\"] == \"completed\"]\n    in_progress = [t for t in plan if t[\"status\"] == \"in_progress\"]\n    pending = [t for t in plan if t[\"status\"] == \"pending\"]\n\n    lines = []\n    for t in completed:\n        lines.append(f\"{_I}[green]✓[/green] [dim]{t['content']}[/dim]\")\n    for t in in_progress:\n        lines.append(f\"{_I}[yellow]▸[/yellow] {t['content']}\")\n    for t in pending:\n        lines.append(f\"{_I}[dim]○ {t['content']}[/dim]\")\n\n    summary = f\"[dim]{len(completed)}/{len(plan)} done[/dim]\"\n    lines.append(f\"{_I}{summary}\")\n    return \"\\n\".join(lines)\n\n\ndef print_plan() -> None:\n    plan_str = format_plan_display()\n    if plan_str:\n        _console.print(plan_str)\n\n\n# ── Formatting for plan_tool output (used by plan_tool handler) ────────\n\ndef format_plan_tool_output(todos: list) -> str:\n    if not todos:\n        return \"Plan is empty.\"\n\n    lines = [\"Plan updated:\", \"\"]\n    completed = [t for t in todos if t[\"status\"] == \"completed\"]\n    in_progress = [t for t in todos if t[\"status\"] == \"in_progress\"]\n    pending = [t for t in todos if t[\"status\"] == \"pending\"]\n\n    for t in completed:\n        lines.append(f\"  [x] {t['id']}. {t['content']}\")\n    for t in in_progress:\n        lines.append(f\"  [~] {t['id']}. {t['content']}\")\n    for t in pending:\n        lines.append(f\"  [ ] {t['id']}. {t['content']}\")\n\n    lines.append(f\"\\n{len(completed)}/{len(todos)} done\")\n    return \"\\n\".join(lines)\n\n\n# ── Internal helpers ───────────────────────────────────────────────────\n\ndef _truncate(text: str, max_lines: int = 6) -> str:\n    lines = text.split(\"\\n\")\n    if len(lines) <= max_lines:\n        return text\n    return \"\\n\".join(lines[:max_lines]) + f\"\\n... ({len(lines) - max_lines} more lines)\"\n"
  },
  {
    "path": "backend/__init__.py",
    "content": "# Backend package for HF Agent web interface\n"
  },
  {
    "path": "backend/dependencies.py",
    "content": "\"\"\"Authentication dependencies for FastAPI routes.\n\n- In dev mode (OAUTH_CLIENT_ID not set): auth is bypassed, returns a default \"dev\" user.\n- In production: validates Bearer tokens or cookies against HF OAuth.\n\"\"\"\n\nimport logging\nimport os\nimport time\nfrom typing import Any\n\nimport httpx\nfrom fastapi import HTTPException, Request, status\n\nlogger = logging.getLogger(__name__)\n\nOPENID_PROVIDER_URL = os.environ.get(\"OPENID_PROVIDER_URL\", \"https://huggingface.co\")\nAUTH_ENABLED = bool(os.environ.get(\"OAUTH_CLIENT_ID\", \"\"))\nHF_EMPLOYEE_ORG = os.environ.get(\"HF_EMPLOYEE_ORG\", \"huggingface\")\n\n# Simple in-memory token cache: token -> (user_info, expiry_time)\n_token_cache: dict[str, tuple[dict[str, Any], float]] = {}\nTOKEN_CACHE_TTL = 300  # 5 minutes\n\n# Org membership cache: key -> expiry_time (only caches positive results)\n_org_member_cache: dict[str, float] = {}\n\nDEV_USER: dict[str, Any] = {\n    \"user_id\": \"dev\",\n    \"username\": \"dev\",\n    \"authenticated\": True,\n    \"plan\": \"org\",  # Dev runs at the Pro/Org quota tier so local testing isn't capped.\n}\n\n# Plan field discovery — log the whoami-v2 shape once at DEBUG so we can\n# confirm the actual key in production without hammering the HF API.\n_WHOAMI_SHAPE_LOGGED = False\n\n\nasync def _validate_token(token: str) -> dict[str, Any] | None:\n    \"\"\"Validate a token against HF OAuth userinfo endpoint.\n\n    Results are cached for TOKEN_CACHE_TTL seconds to avoid excessive API calls.\n    \"\"\"\n    now = time.time()\n\n    # Check cache\n    if token in _token_cache:\n        user_info, expiry = _token_cache[token]\n        if now < expiry:\n            return user_info\n        del _token_cache[token]\n\n    # Validate against HF\n    async with httpx.AsyncClient(timeout=10.0) as client:\n        try:\n            response = await client.get(\n                f\"{OPENID_PROVIDER_URL}/oauth/userinfo\",\n                headers={\"Authorization\": f\"Bearer {token}\"},\n            )\n            if response.status_code != 200:\n                logger.debug(\"Token validation failed: status %d\", response.status_code)\n                return None\n            user_info = response.json()\n            _token_cache[token] = (user_info, now + TOKEN_CACHE_TTL)\n            return user_info\n        except httpx.HTTPError as e:\n            logger.warning(\"Token validation error: %s\", e)\n            return None\n\n\ndef _user_from_info(user_info: dict[str, Any]) -> dict[str, Any]:\n    \"\"\"Build a normalized user dict from HF userinfo response.\"\"\"\n    return {\n        \"user_id\": user_info.get(\"sub\", user_info.get(\"preferred_username\", \"unknown\")),\n        \"username\": user_info.get(\"preferred_username\", \"unknown\"),\n        \"name\": user_info.get(\"name\"),\n        \"picture\": user_info.get(\"picture\"),\n        \"authenticated\": True,\n    }\n\n\ndef _normalize_plan(whoami: dict[str, Any]) -> str:\n    \"\"\"Map an HF /api/whoami-v2 payload to one of: 'free' | 'pro' | 'org'.\n\n    The exact field shape in whoami-v2 isn't documented for our purposes,\n    so we try a handful of likely keys and fall back to 'free'. The first\n    call logs the raw shape at DEBUG (see `_fetch_user_plan`) so we can\n    pin the real key post-deploy.\n    \"\"\"\n    plan_str = \"\"\n    for key in (\"plan\", \"type\", \"accountType\"):\n        val = whoami.get(key)\n        if isinstance(val, str) and val:\n            plan_str = val.lower()\n            break\n\n    if not plan_str:\n        if whoami.get(\"isPro\") is True or whoami.get(\"is_pro\") is True:\n            return \"pro\"\n\n    if \"pro\" in plan_str or \"enterprise\" in plan_str or \"team\" in plan_str:\n        return \"pro\"\n\n    # Org tier: anyone in a paid / enterprise org. We don't pay for this\n    # right now, but the \"pro\" cap applies identically.\n    orgs = whoami.get(\"orgs\") or []\n    if isinstance(orgs, list):\n        for org in orgs:\n            if isinstance(org, dict):\n                org_plan = str(org.get(\"plan\") or org.get(\"type\") or \"\").lower()\n                if \"pro\" in org_plan or \"enterprise\" in org_plan or \"team\" in org_plan:\n                    return \"org\"\n\n    return \"free\"\n\n\nasync def _fetch_user_plan(token: str) -> str:\n    \"\"\"Look up the user's HF plan via /api/whoami-v2.\n\n    Returns 'free' | 'pro' | 'org'. Non-200, network errors, or an unknown\n    payload shape all collapse to 'free' — safe default; we'd rather under-\n    grant the Pro cap than over-grant it on bad data.\n    \"\"\"\n    global _WHOAMI_SHAPE_LOGGED\n    async with httpx.AsyncClient(timeout=5.0) as client:\n        try:\n            resp = await client.get(\n                f\"{OPENID_PROVIDER_URL}/api/whoami-v2\",\n                headers={\"Authorization\": f\"Bearer {token}\"},\n            )\n            if resp.status_code != 200:\n                return \"free\"\n            whoami = resp.json()\n        except httpx.HTTPError:\n            return \"free\"\n        except ValueError:\n            return \"free\"\n\n    if not _WHOAMI_SHAPE_LOGGED:\n        _WHOAMI_SHAPE_LOGGED = True\n        logger.debug(\n            \"whoami-v2 payload keys: %s (sample values: plan=%r type=%r isPro=%r)\",\n            sorted(whoami.keys()) if isinstance(whoami, dict) else type(whoami).__name__,\n            whoami.get(\"plan\") if isinstance(whoami, dict) else None,\n            whoami.get(\"type\") if isinstance(whoami, dict) else None,\n            whoami.get(\"isPro\") if isinstance(whoami, dict) else None,\n        )\n\n    if not isinstance(whoami, dict):\n        return \"free\"\n    return _normalize_plan(whoami)\n\n\nasync def _extract_user_from_token(token: str) -> dict[str, Any] | None:\n    \"\"\"Validate a token and return a user dict, or None.\"\"\"\n    user_info = await _validate_token(token)\n    if user_info is None:\n        return None\n    user = _user_from_info(user_info)\n    user[\"plan\"] = await _fetch_user_plan(token)\n    return user\n\n\nasync def check_org_membership(token: str, org_name: str) -> bool:\n    \"\"\"Check if the token owner belongs to an HF org. Only caches positive results.\"\"\"\n    now = time.time()\n    key = token + org_name\n    cached = _org_member_cache.get(key)\n    if cached and cached > now:\n        return True\n\n    async with httpx.AsyncClient(timeout=10.0) as client:\n        try:\n            resp = await client.get(\n                f\"{OPENID_PROVIDER_URL}/api/whoami-v2\",\n                headers={\"Authorization\": f\"Bearer {token}\"},\n            )\n            if resp.status_code != 200:\n                return False\n            orgs = {o.get(\"name\") for o in resp.json().get(\"orgs\", [])}\n            if org_name in orgs:\n                _org_member_cache[key] = now + TOKEN_CACHE_TTL\n                return True\n            return False\n        except httpx.HTTPError:\n            return False\n\n\nasync def get_current_user(request: Request) -> dict[str, Any]:\n    \"\"\"FastAPI dependency: extract and validate the current user.\n\n    Checks (in order):\n    1. Authorization: Bearer <token> header\n    2. hf_access_token cookie\n\n    In dev mode (AUTH_ENABLED=False), returns a default dev user.\n    \"\"\"\n    if not AUTH_ENABLED:\n        return DEV_USER\n\n    # Try Authorization header\n    auth_header = request.headers.get(\"Authorization\", \"\")\n    if auth_header.startswith(\"Bearer \"):\n        token = auth_header[7:]\n        user = await _extract_user_from_token(token)\n        if user:\n            return user\n\n    # Try cookie\n    token = request.cookies.get(\"hf_access_token\")\n    if token:\n        user = await _extract_user_from_token(token)\n        if user:\n            return user\n\n    raise HTTPException(\n        status_code=status.HTTP_401_UNAUTHORIZED,\n        detail=\"Not authenticated. Please log in via /auth/login.\",\n        headers={\"WWW-Authenticate\": \"Bearer\"},\n    )\n\n\ndef _extract_token(request: Request) -> str | None:\n    \"\"\"Pull the HF access token from the Authorization header or cookie.\n\n    Mirrors the lookup order used by ``get_current_user``.\n    \"\"\"\n    auth_header = request.headers.get(\"Authorization\", \"\")\n    if auth_header.startswith(\"Bearer \"):\n        return auth_header[7:]\n    return request.cookies.get(\"hf_access_token\")\n\n\nasync def require_huggingface_org_member(request: Request) -> bool:\n    \"\"\"Return True if the caller is a member of the ``huggingface`` org.\n\n    Used to gate endpoints that can push a session onto an Anthropic model\n    billed to the Space's ``ANTHROPIC_API_KEY``. Returns True unconditionally\n    in dev mode so local testing isn't blocked.\n    \"\"\"\n    if not AUTH_ENABLED:\n        return True\n    token = _extract_token(request)\n    if not token:\n        return False\n    return await check_org_membership(token, HF_EMPLOYEE_ORG)\n\n\n"
  },
  {
    "path": "backend/main.py",
    "content": "\"\"\"FastAPI application for HF Agent web interface.\"\"\"\n\nimport logging\nimport os\nfrom contextlib import asynccontextmanager\nfrom pathlib import Path\n\nfrom dotenv import load_dotenv\nfrom fastapi import FastAPI\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.staticfiles import StaticFiles\nfrom routes.agent import router as agent_router\nfrom routes.auth import router as auth_router\n\n# Load .env from project root (parent directory)\nload_dotenv(Path(__file__).parent.parent / \".env\")\n\n# Configure logging\nlogging.basicConfig(\n    level=logging.INFO,\n    format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\",\n)\nlogger = logging.getLogger(__name__)\n\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI):\n    \"\"\"Application lifespan handler.\"\"\"\n    logger.info(\"Starting HF Agent backend...\")\n    yield\n    logger.info(\"Shutting down HF Agent backend...\")\n\n\napp = FastAPI(\n    title=\"HF Agent\",\n    description=\"ML Engineering Assistant API\",\n    version=\"1.0.0\",\n    lifespan=lifespan,\n)\n\n# CORS middleware for development\napp.add_middleware(\n    CORSMiddleware,\n    allow_origins=[\n        \"http://localhost:5173\",  # Vite dev server\n        \"http://localhost:3000\",\n        \"http://127.0.0.1:5173\",\n        \"http://127.0.0.1:3000\",\n    ],\n    allow_credentials=True,\n    allow_methods=[\"*\"],\n    allow_headers=[\"*\"],\n)\n\n# Include routers\napp.include_router(agent_router)\napp.include_router(auth_router)\n\n# Serve static files (frontend build) in production\nstatic_path = Path(__file__).parent.parent / \"static\"\nif static_path.exists():\n    app.mount(\"/\", StaticFiles(directory=str(static_path), html=True), name=\"static\")\n    logger.info(f\"Serving static files from {static_path}\")\nelse:\n    logger.info(\"No static directory found, running in API-only mode\")\n\n\n@app.get(\"/api\")\nasync def api_root():\n    \"\"\"API root endpoint.\"\"\"\n    return {\n        \"name\": \"HF Agent API\",\n        \"version\": \"1.0.0\",\n        \"docs\": \"/docs\",\n    }\n\n\nif __name__ == \"__main__\":\n    import uvicorn\n\n    port = int(os.environ.get(\"PORT\", 7860))\n    uvicorn.run(app, host=\"0.0.0.0\", port=port)\n"
  },
  {
    "path": "backend/models.py",
    "content": "\"\"\"Pydantic models for API requests and responses.\"\"\"\n\nfrom enum import Enum\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\n\nclass OpType(str, Enum):\n    \"\"\"Operation types matching agent/core/agent_loop.py.\"\"\"\n\n    USER_INPUT = \"user_input\"\n    EXEC_APPROVAL = \"exec_approval\"\n    INTERRUPT = \"interrupt\"\n    UNDO = \"undo\"\n    COMPACT = \"compact\"\n    SHUTDOWN = \"shutdown\"\n\n\nclass Operation(BaseModel):\n    \"\"\"Operation to be submitted to the agent.\"\"\"\n\n    op_type: OpType\n    data: dict[str, Any] | None = None\n\n\nclass Submission(BaseModel):\n    \"\"\"Submission wrapper with ID and operation.\"\"\"\n\n    id: str\n    operation: Operation\n\n\nclass ToolApproval(BaseModel):\n    \"\"\"Approval decision for a single tool call.\"\"\"\n\n    tool_call_id: str\n    approved: bool\n    feedback: str | None = None\n    edited_script: str | None = None\n\n\nclass ApprovalRequest(BaseModel):\n    \"\"\"Request to approve/reject tool calls.\"\"\"\n\n    session_id: str\n    approvals: list[ToolApproval]\n\n\nclass SubmitRequest(BaseModel):\n    \"\"\"Request to submit user input.\"\"\"\n\n    session_id: str\n    text: str\n\n\nclass TruncateRequest(BaseModel):\n    \"\"\"Request to truncate conversation history to before a specific user message.\"\"\"\n\n    user_message_index: int\n\n\nclass SessionResponse(BaseModel):\n    \"\"\"Response when creating a new session.\"\"\"\n\n    session_id: str\n    ready: bool = True\n\n\nclass PendingApprovalTool(BaseModel):\n    \"\"\"A tool waiting for user approval.\"\"\"\n\n    tool: str\n    tool_call_id: str\n    arguments: dict[str, Any] = {}\n\n\nclass SessionInfo(BaseModel):\n    \"\"\"Session metadata.\"\"\"\n\n    session_id: str\n    created_at: str\n    is_active: bool\n    is_processing: bool = False\n    message_count: int\n    user_id: str = \"dev\"\n    pending_approval: list[PendingApprovalTool] | None = None\n    model: str | None = None\n\n\nclass HealthResponse(BaseModel):\n    \"\"\"Health check response.\"\"\"\n\n    status: str = \"ok\"\n    active_sessions: int = 0\n    max_sessions: int = 0\n\n\nclass LLMHealthResponse(BaseModel):\n    \"\"\"LLM provider health check response.\"\"\"\n\n    status: str  # \"ok\" | \"error\"\n    model: str\n    error: str | None = None\n    error_type: str | None = None  # \"auth\" | \"credits\" | \"rate_limit\" | \"network\" | \"unknown\"\n"
  },
  {
    "path": "backend/routes/__init__.py",
    "content": "# Routes package\n"
  },
  {
    "path": "backend/routes/agent.py",
    "content": "\"\"\"Agent API routes — REST + SSE endpoints.\n\nAll routes (except /health) require authentication via the get_current_user\ndependency. In dev mode (no OAUTH_CLIENT_ID), auth is bypassed automatically.\n\"\"\"\n\nimport asyncio\nimport json\nimport logging\nimport os\nfrom typing import Any\n\nfrom dependencies import get_current_user, require_huggingface_org_member\nfrom fastapi import (\n    APIRouter,\n    Depends,\n    HTTPException,\n    Request,\n)\nfrom fastapi.responses import StreamingResponse\nfrom litellm import acompletion\nfrom models import (\n    ApprovalRequest,\n    HealthResponse,\n    LLMHealthResponse,\n    SessionInfo,\n    SessionResponse,\n    SubmitRequest,\n    TruncateRequest,\n)\nfrom session_manager import MAX_SESSIONS, AgentSession, SessionCapacityError, session_manager\n\nimport user_quotas\n\nfrom agent.core.llm_params import _resolve_llm_params\n\nlogger = logging.getLogger(__name__)\n\nrouter = APIRouter(prefix=\"/api\", tags=[\"agent\"])\n\nAVAILABLE_MODELS = [\n    {\n        \"id\": \"moonshotai/Kimi-K2.6\",\n        \"label\": \"Kimi K2.6\",\n        \"provider\": \"huggingface\",\n        \"tier\": \"free\",\n        \"recommended\": True,\n    },\n    {\n        \"id\": \"bedrock/us.anthropic.claude-opus-4-6-v1\",\n        \"label\": \"Claude Opus 4.6\",\n        \"provider\": \"anthropic\",\n        \"tier\": \"pro\",\n        \"recommended\": True,\n    },\n    {\n        \"id\": \"MiniMaxAI/MiniMax-M2.7\",\n        \"label\": \"MiniMax M2.7\",\n        \"provider\": \"huggingface\",\n        \"tier\": \"free\",\n    },\n    {\n        \"id\": \"zai-org/GLM-5.1\",\n        \"label\": \"GLM 5.1\",\n        \"provider\": \"huggingface\",\n        \"tier\": \"free\",\n    },\n]\n\n\ndef _is_anthropic_model(model_id: str) -> bool:\n    return \"anthropic\" in model_id\n\n\nasync def _require_hf_for_anthropic(request: Request, model_id: str) -> None:\n    \"\"\"403 if a non-``huggingface``-org user tries to select an Anthropic model.\n\n    Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every\n    other model in ``AVAILABLE_MODELS`` is routed through HF Router and\n    billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so\n    non-HF users can still freely switch between the free models.\n\n    Pattern: https://github.com/huggingface/ml-intern/pull/63\n    \"\"\"\n    if not _is_anthropic_model(model_id):\n        return\n    if not await require_huggingface_org_member(request):\n        raise HTTPException(\n            status_code=403,\n            detail={\n                \"error\": \"anthropic_restricted\",\n                \"message\": (\n                    \"Opus is gated to HF staff. Pick a free model — \"\n                    \"Kimi K2.6, MiniMax M2.7, or GLM 5.1 — instead.\"\n                ),\n            },\n        )\n\n\nasync def _enforce_claude_quota(\n    user: dict[str, Any],\n    agent_session: AgentSession,\n) -> None:\n    \"\"\"Charge the user's daily Claude quota on first use of Anthropic in a session.\n\n    Runs at *message-submit* time, not session-create time — so spinning up a\n    Claude session to look around doesn't burn quota. The ``claude_counted``\n    flag on ``AgentSession`` guards against re-counting the same session.\n\n    No-ops when the session's current model isn't Anthropic, or when this\n    session has already been charged. Raises 429 when the user has hit\n    their daily cap.\n    \"\"\"\n    if agent_session.claude_counted:\n        return\n    model_name = agent_session.session.config.model_name\n    if not _is_anthropic_model(model_name):\n        return\n    user_id = user[\"user_id\"]\n    used = await user_quotas.get_claude_used_today(user_id)\n    cap = user_quotas.daily_cap_for(user.get(\"plan\"))\n    if used >= cap:\n        raise HTTPException(\n            status_code=429,\n            detail={\n                \"error\": \"claude_daily_cap\",\n                \"plan\": user.get(\"plan\", \"free\"),\n                \"cap\": cap,\n                \"message\": (\n                    \"Daily Claude limit reached. Upgrade to HF Pro for \"\n                    f\"{user_quotas.CLAUDE_PRO_DAILY}/day or use a free model.\"\n                ),\n            },\n        )\n    await user_quotas.increment_claude(user_id)\n    agent_session.claude_counted = True\n\n\ndef _check_session_access(session_id: str, user: dict[str, Any]) -> None:\n    \"\"\"Verify the user has access to the given session. Raises 403 or 404.\"\"\"\n    info = session_manager.get_session_info(session_id)\n    if not info:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    if not session_manager.verify_session_access(session_id, user[\"user_id\"]):\n        raise HTTPException(status_code=403, detail=\"Access denied to this session\")\n\n\n@router.get(\"/health\", response_model=HealthResponse)\nasync def health_check() -> HealthResponse:\n    \"\"\"Health check endpoint.\"\"\"\n    return HealthResponse(\n        status=\"ok\",\n        active_sessions=session_manager.active_session_count,\n        max_sessions=MAX_SESSIONS,\n    )\n\n\n@router.get(\"/health/llm\", response_model=LLMHealthResponse)\nasync def llm_health_check() -> LLMHealthResponse:\n    \"\"\"Check if the LLM provider is reachable and the API key is valid.\n\n    Makes a minimal 1-token completion call.  Catches common errors:\n    - 401 → invalid API key\n    - 402/insufficient_quota → out of credits\n    - 429 → rate limited\n    - timeout / network → provider unreachable\n    \"\"\"\n    model = session_manager.config.model_name\n    try:\n        llm_params = _resolve_llm_params(model, reasoning_effort=\"high\")\n        await acompletion(\n            messages=[{\"role\": \"user\", \"content\": \"hi\"}],\n            max_tokens=1,\n            timeout=10,\n            **llm_params,\n        )\n        return LLMHealthResponse(status=\"ok\", model=model)\n    except Exception as e:\n        err_str = str(e).lower()\n        error_type = \"unknown\"\n\n        if (\n            \"401\" in err_str\n            or \"auth\" in err_str\n            or \"invalid\" in err_str\n            or \"api key\" in err_str\n        ):\n            error_type = \"auth\"\n        elif (\n            \"402\" in err_str\n            or \"credit\" in err_str\n            or \"quota\" in err_str\n            or \"insufficient\" in err_str\n            or \"billing\" in err_str\n        ):\n            error_type = \"credits\"\n        elif \"429\" in err_str or \"rate\" in err_str:\n            error_type = \"rate_limit\"\n        elif \"timeout\" in err_str or \"connect\" in err_str or \"network\" in err_str:\n            error_type = \"network\"\n\n        logger.warning(f\"LLM health check failed ({error_type}): {e}\")\n        return LLMHealthResponse(\n            status=\"error\",\n            model=model,\n            error=str(e)[:500],\n            error_type=error_type,\n        )\n\n\n@router.get(\"/config/model\")\nasync def get_model() -> dict:\n    \"\"\"Get current model and available models. No auth required.\"\"\"\n    return {\n        \"current\": session_manager.config.model_name,\n        \"available\": AVAILABLE_MODELS,\n    }\n\n\n_TITLE_STRIP_CHARS = str.maketrans(\"\", \"\", \"`*_~#[]()\")\n\n\n@router.post(\"/title\")\nasync def generate_title(\n    request: SubmitRequest, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Generate a short title for a chat session based on the first user message.\n\n    Always uses gpt-oss-120b via Cerebras on the HF router. The tab headline\n    renders as plain text, so the model is told to avoid markdown and any\n    stray formatting characters are stripped before returning. gpt-oss is a\n    reasoning model — reasoning_effort=low keeps the reasoning budget small\n    so the 60-token output budget isn't consumed before the title is written.\n    \"\"\"\n    api_key = (\n        os.environ.get(\"INFERENCE_TOKEN\")\n        or (user.get(\"hf_token\") if isinstance(user, dict) else None)\n        or os.environ.get(\"HF_TOKEN\")\n    )\n    try:\n        response = await acompletion(\n            # Double openai/ prefix: LiteLLM strips the first as its provider\n            # prefix, leaving the HF model id on the wire for the router.\n            model=\"openai/openai/gpt-oss-120b:cerebras\",\n            api_base=\"https://router.huggingface.co/v1\",\n            api_key=api_key,\n            messages=[\n                {\n                    \"role\": \"system\",\n                    \"content\": (\n                        \"Generate a very short title (max 6 words) for a chat conversation \"\n                        \"that starts with the following user message. \"\n                        \"Reply with ONLY the title in plain text. \"\n                        \"Do NOT use markdown, backticks, asterisks, quotes, brackets, or any \"\n                        \"formatting characters. No punctuation at the end.\"\n                    ),\n                },\n                {\"role\": \"user\", \"content\": request.text[:500]},\n            ],\n            max_tokens=60,\n            temperature=0.3,\n            timeout=10,\n            reasoning_effort=\"low\",\n        )\n        title = response.choices[0].message.content.strip().strip('\"').strip(\"'\")\n        title = title.translate(_TITLE_STRIP_CHARS).strip()\n        if len(title) > 50:\n            title = title[:50].rstrip() + \"…\"\n        return {\"title\": title}\n    except Exception as e:\n        logger.warning(f\"Title generation failed: {e}\")\n        fallback = request.text.strip()\n        title = fallback[:40].rstrip() + \"…\" if len(fallback) > 40 else fallback\n        return {\"title\": title}\n\n\n@router.post(\"/session\", response_model=SessionResponse)\nasync def create_session(\n    request: Request, user: dict = Depends(get_current_user)\n) -> SessionResponse:\n    \"\"\"Create a new agent session bound to the authenticated user.\n\n    The user's HF access token is extracted from the Authorization header\n    and stored in the session so that tools (e.g. hf_jobs) can act on\n    behalf of the user.\n\n    Optional body ``{\"model\"?: <id>}`` selects the session's LLM; unknown\n    ids are rejected (400). The Claude-quota gate runs at message-submit\n    time, not here — spinning up an Opus session to look around is free.\n\n    Returns 503 if the server or user has reached the session limit.\n    \"\"\"\n    # Extract the user's HF token (Bearer header, HttpOnly cookie, or env var)\n    hf_token = None\n    auth_header = request.headers.get(\"Authorization\", \"\")\n    if auth_header.startswith(\"Bearer \"):\n        hf_token = auth_header[7:]\n    if not hf_token:\n        hf_token = request.cookies.get(\"hf_access_token\")\n    if not hf_token:\n        hf_token = os.environ.get(\"HF_TOKEN\")\n\n    # Optional model override. Empty body falls back to the config default.\n    model: str | None = None\n    try:\n        body = await request.json()\n    except Exception:\n        body = None\n    if isinstance(body, dict):\n        model = body.get(\"model\")\n\n    valid_ids = {m[\"id\"] for m in AVAILABLE_MODELS}\n    if model and model not in valid_ids:\n        raise HTTPException(status_code=400, detail=f\"Unknown model: {model}\")\n\n    # Opus is gated to HF staff (PR #63). Only fires when the resolved model\n    # is Anthropic; free models pass through.\n    resolved_model = model or session_manager.config.model_name\n    await _require_hf_for_anthropic(request, resolved_model)\n\n    try:\n        session_id = await session_manager.create_session(\n            user_id=user[\"user_id\"], hf_token=hf_token, model=model\n        )\n    except SessionCapacityError as e:\n        raise HTTPException(status_code=503, detail=str(e))\n\n    return SessionResponse(session_id=session_id, ready=True)\n\n\n@router.post(\"/session/restore-summary\", response_model=SessionResponse)\nasync def restore_session_summary(\n    request: Request, body: dict, user: dict = Depends(get_current_user)\n) -> SessionResponse:\n    \"\"\"Create a new session seeded with a summary of the caller's prior\n    conversation. The client sends its cached messages; we run the standard\n    summarization prompt on them and drop the result into the new\n    session's context as a user-role system note.\n\n    Optional ``\"model\"`` in the body overrides the session's LLM. The\n    Claude-quota gate runs at message-submit time, not here.\n    \"\"\"\n    messages = body.get(\"messages\")\n    if not isinstance(messages, list) or not messages:\n        raise HTTPException(status_code=400, detail=\"Missing 'messages' array\")\n\n    hf_token = None\n    auth_header = request.headers.get(\"Authorization\", \"\")\n    if auth_header.startswith(\"Bearer \"):\n        hf_token = auth_header[7:]\n    if not hf_token:\n        hf_token = request.cookies.get(\"hf_access_token\")\n    if not hf_token:\n        hf_token = os.environ.get(\"HF_TOKEN\")\n\n    model = body.get(\"model\")\n    valid_ids = {m[\"id\"] for m in AVAILABLE_MODELS}\n    if model and model not in valid_ids:\n        raise HTTPException(status_code=400, detail=f\"Unknown model: {model}\")\n\n    resolved_model = model or session_manager.config.model_name\n    await _require_hf_for_anthropic(request, resolved_model)\n\n    try:\n        session_id = await session_manager.create_session(\n            user_id=user[\"user_id\"], hf_token=hf_token, model=model\n        )\n    except SessionCapacityError as e:\n        raise HTTPException(status_code=503, detail=str(e))\n\n    try:\n        summarized = await session_manager.seed_from_summary(session_id, messages)\n    except ValueError as e:\n        raise HTTPException(status_code=500, detail=str(e))\n    except Exception as e:\n        logger.exception(\"seed_from_summary failed\")\n        raise HTTPException(status_code=500, detail=f\"Summary failed: {e}\")\n\n    logger.info(\n        f\"Seeded session {session_id} for {user.get('username', 'unknown')} \"\n        f\"(summary of {summarized} messages)\"\n    )\n    return SessionResponse(session_id=session_id, ready=True)\n\n\n@router.get(\"/session/{session_id}\", response_model=SessionInfo)\nasync def get_session(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> SessionInfo:\n    \"\"\"Get session information. Only accessible by the session owner.\"\"\"\n    _check_session_access(session_id, user)\n    info = session_manager.get_session_info(session_id)\n    return SessionInfo(**info)\n\n\n@router.post(\"/session/{session_id}/model\")\nasync def set_session_model(\n    session_id: str,\n    body: dict,\n    request: Request,\n    user: dict = Depends(get_current_user),\n) -> dict:\n    \"\"\"Switch the active model for a single session (tab-scoped).\n\n    Takes effect on the next LLM call in that session — other sessions\n    (including other browser tabs) are unaffected. Model switches don't\n    charge quota — the Claude-quota gate only fires at message-submit time.\n\n    Switching TO an Anthropic model requires HF org membership (PR #63);\n    free-model switches are unrestricted.\n    \"\"\"\n    _check_session_access(session_id, user)\n    model_id = body.get(\"model\")\n    if not model_id:\n        raise HTTPException(status_code=400, detail=\"Missing 'model' field\")\n    valid_ids = {m[\"id\"] for m in AVAILABLE_MODELS}\n    if model_id not in valid_ids:\n        raise HTTPException(status_code=400, detail=f\"Unknown model: {model_id}\")\n    await _require_hf_for_anthropic(request, model_id)\n    agent_session = session_manager.sessions.get(session_id)\n    if not agent_session:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    agent_session.session.update_model(model_id)\n    logger.info(\n        f\"Session {session_id} model → {model_id} \"\n        f\"(by {user.get('username', 'unknown')})\"\n    )\n    return {\"session_id\": session_id, \"model\": model_id}\n\n\n@router.get(\"/user/quota\")\nasync def get_user_quota(user: dict = Depends(get_current_user)) -> dict:\n    \"\"\"Return the user's plan tier and today's Claude-session quota state.\"\"\"\n    plan = user.get(\"plan\", \"free\")\n    used = await user_quotas.get_claude_used_today(user[\"user_id\"])\n    cap = user_quotas.daily_cap_for(plan)\n    return {\n        \"plan\": plan,\n        \"claude_used_today\": used,\n        \"claude_daily_cap\": cap,\n        \"claude_remaining\": max(0, cap - used),\n    }\n\n\n@router.get(\"/sessions\", response_model=list[SessionInfo])\nasync def list_sessions(user: dict = Depends(get_current_user)) -> list[SessionInfo]:\n    \"\"\"List sessions belonging to the authenticated user.\"\"\"\n    sessions = session_manager.list_sessions(user_id=user[\"user_id\"])\n    return [SessionInfo(**s) for s in sessions]\n\n\n@router.delete(\"/session/{session_id}\")\nasync def delete_session(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Delete a session. Only accessible by the session owner.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.delete_session(session_id)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found\")\n    return {\"status\": \"deleted\", \"session_id\": session_id}\n\n\n@router.post(\"/submit\")\nasync def submit_input(\n    request: SubmitRequest, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Submit user input to a session. Only accessible by the session owner.\"\"\"\n    _check_session_access(request.session_id, user)\n    agent_session = session_manager.sessions.get(request.session_id)\n    if agent_session is not None:\n        await _enforce_claude_quota(user, agent_session)\n    success = await session_manager.submit_user_input(request.session_id, request.text)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"submitted\", \"session_id\": request.session_id}\n\n\n@router.post(\"/approve\")\nasync def submit_approval(\n    request: ApprovalRequest, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Submit tool approvals to a session. Only accessible by the session owner.\"\"\"\n    _check_session_access(request.session_id, user)\n    approvals = [\n        {\n            \"tool_call_id\": a.tool_call_id,\n            \"approved\": a.approved,\n            \"feedback\": a.feedback,\n            \"edited_script\": a.edited_script,\n        }\n        for a in request.approvals\n    ]\n    success = await session_manager.submit_approval(request.session_id, approvals)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"submitted\", \"session_id\": request.session_id}\n\n\n@router.post(\"/chat/{session_id}\")\nasync def chat_sse(\n    session_id: str,\n    request: Request,\n    user: dict = Depends(get_current_user),\n) -> StreamingResponse:\n    \"\"\"SSE endpoint: submit input or approval, then stream events until turn ends.\"\"\"\n    _check_session_access(session_id, user)\n\n    agent_session = session_manager.sessions.get(session_id)\n    if not agent_session or not agent_session.is_active:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n\n    # Parse body\n    body = await request.json()\n\n    # Subscribe BEFORE submitting so we never miss events — even if the\n    # agent loop processes the submission before this coroutine continues.\n    broadcaster = agent_session.broadcaster\n    sub_id, event_queue = broadcaster.subscribe()\n\n    # Submit the operation\n    text = body.get(\"text\")\n    approvals = body.get(\"approvals\")\n\n    # Gate user-message sends against the daily Claude quota. Approvals are\n    # continuations of an in-progress turn — the session was already charged\n    # on its first message, so we skip the gate there.\n    if text is not None and not approvals:\n        try:\n            await _enforce_claude_quota(user, agent_session)\n        except HTTPException:\n            broadcaster.unsubscribe(sub_id)\n            raise\n\n    try:\n        if approvals:\n            formatted = [\n                {\n                    \"tool_call_id\": a[\"tool_call_id\"],\n                    \"approved\": a[\"approved\"],\n                    \"feedback\": a.get(\"feedback\"),\n                    \"edited_script\": a.get(\"edited_script\"),\n                }\n                for a in approvals\n            ]\n            success = await session_manager.submit_approval(session_id, formatted)\n        elif text is not None:\n            success = await session_manager.submit_user_input(session_id, text)\n        else:\n            broadcaster.unsubscribe(sub_id)\n            raise HTTPException(status_code=400, detail=\"Must provide 'text' or 'approvals'\")\n\n        if not success:\n            broadcaster.unsubscribe(sub_id)\n            raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    except HTTPException:\n        raise\n    except Exception:\n        broadcaster.unsubscribe(sub_id)\n        raise\n\n    return _sse_response(broadcaster, event_queue, sub_id)\n\n\n# ---------------------------------------------------------------------------\n# Shared SSE helpers\n# ---------------------------------------------------------------------------\n_TERMINAL_EVENTS = {\"turn_complete\", \"approval_required\", \"error\", \"interrupted\", \"shutdown\"}\n_SSE_KEEPALIVE_SECONDS = 15\n\n\ndef _sse_response(broadcaster, event_queue, sub_id) -> StreamingResponse:\n    \"\"\"Build a StreamingResponse that drains *event_queue* as SSE,\n    sending keepalive comments every 15 s to prevent proxy timeouts.\"\"\"\n\n    async def event_generator():\n        try:\n            while True:\n                try:\n                    msg = await asyncio.wait_for(\n                        event_queue.get(), timeout=_SSE_KEEPALIVE_SECONDS\n                    )\n                except asyncio.TimeoutError:\n                    # SSE comment — ignored by parsers, keeps connection alive\n                    yield \": keepalive\\n\\n\"\n                    continue\n                event_type = msg.get(\"event_type\", \"\")\n                yield f\"data: {json.dumps(msg)}\\n\\n\"\n                if event_type in _TERMINAL_EVENTS:\n                    break\n        finally:\n            broadcaster.unsubscribe(sub_id)\n\n    return StreamingResponse(\n        event_generator(),\n        media_type=\"text/event-stream\",\n        headers={\n            \"Cache-Control\": \"no-cache\",\n            \"Connection\": \"keep-alive\",\n            \"X-Accel-Buffering\": \"no\",\n        },\n    )\n\n\n@router.get(\"/events/{session_id}\")\nasync def subscribe_events(\n    session_id: str,\n    user: dict = Depends(get_current_user),\n) -> StreamingResponse:\n    \"\"\"Subscribe to events for a running session without submitting new input.\n\n    Used by the frontend to re-attach after a connection drop (e.g. screen\n    sleep).  Returns 404 if the session isn't active or isn't processing.\n    \"\"\"\n    _check_session_access(session_id, user)\n\n    agent_session = session_manager.sessions.get(session_id)\n    if not agent_session or not agent_session.is_active:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n\n    broadcaster = agent_session.broadcaster\n    sub_id, event_queue = broadcaster.subscribe()\n    return _sse_response(broadcaster, event_queue, sub_id)\n\n\n@router.post(\"/interrupt/{session_id}\")\nasync def interrupt_session(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Interrupt the current operation in a session.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.interrupt(session_id)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"interrupted\", \"session_id\": session_id}\n\n\n@router.get(\"/session/{session_id}/messages\")\nasync def get_session_messages(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> list[dict]:\n    \"\"\"Return the session's message history from memory.\"\"\"\n    _check_session_access(session_id, user)\n    agent_session = session_manager.sessions.get(session_id)\n    if not agent_session or not agent_session.is_active:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return [msg.model_dump() for msg in agent_session.session.context_manager.items]\n\n\n@router.post(\"/undo/{session_id}\")\nasync def undo_session(session_id: str, user: dict = Depends(get_current_user)) -> dict:\n    \"\"\"Undo the last turn in a session.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.undo(session_id)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"undo_requested\", \"session_id\": session_id}\n\n\n@router.post(\"/truncate/{session_id}\")\nasync def truncate_session(\n    session_id: str, body: TruncateRequest, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Truncate conversation to before a specific user message.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.truncate(session_id, body.user_message_index)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found, inactive, or message index out of range\")\n    return {\"status\": \"truncated\", \"session_id\": session_id}\n\n\n@router.post(\"/compact/{session_id}\")\nasync def compact_session(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Compact the context in a session.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.compact(session_id)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"compact_requested\", \"session_id\": session_id}\n\n\n@router.post(\"/shutdown/{session_id}\")\nasync def shutdown_session(\n    session_id: str, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Shutdown a session.\"\"\"\n    _check_session_access(session_id, user)\n    success = await session_manager.shutdown_session(session_id)\n    if not success:\n        raise HTTPException(status_code=404, detail=\"Session not found or inactive\")\n    return {\"status\": \"shutdown_requested\", \"session_id\": session_id}\n\n\n"
  },
  {
    "path": "backend/routes/auth.py",
    "content": "\"\"\"Authentication routes for HF OAuth.\n\nHandles the OAuth 2.0 authorization code flow with HF as provider.\nAfter successful auth, sets an HttpOnly cookie with the access token.\n\"\"\"\n\nimport os\nimport secrets\nimport time\nfrom urllib.parse import urlencode\n\nimport httpx\nfrom dependencies import AUTH_ENABLED, check_org_membership, get_current_user\nfrom fastapi import APIRouter, Depends, HTTPException, Request\nfrom fastapi.responses import RedirectResponse\n\nrouter = APIRouter(prefix=\"/auth\", tags=[\"auth\"])\n\n# OAuth configuration from environment\nOAUTH_CLIENT_ID = os.environ.get(\"OAUTH_CLIENT_ID\", \"\")\nOAUTH_CLIENT_SECRET = os.environ.get(\"OAUTH_CLIENT_SECRET\", \"\")\nOPENID_PROVIDER_URL = os.environ.get(\"OPENID_PROVIDER_URL\", \"https://huggingface.co\")\n\n# In-memory OAuth state store with expiry (5 min TTL)\n_OAUTH_STATE_TTL = 300\noauth_states: dict[str, dict] = {}\n\n\ndef _cleanup_expired_states() -> None:\n    \"\"\"Remove expired OAuth states to prevent memory growth.\"\"\"\n    now = time.time()\n    expired = [k for k, v in oauth_states.items() if now > v.get(\"expires_at\", 0)]\n    for k in expired:\n        del oauth_states[k]\n\n\ndef get_redirect_uri(request: Request) -> str:\n    \"\"\"Get the OAuth callback redirect URI.\"\"\"\n    # In HF Spaces, use the SPACE_HOST if available\n    space_host = os.environ.get(\"SPACE_HOST\")\n    if space_host:\n        return f\"https://{space_host}/auth/callback\"\n    # Otherwise construct from request\n    return str(request.url_for(\"oauth_callback\"))\n\n\n@router.get(\"/login\")\nasync def oauth_login(request: Request) -> RedirectResponse:\n    \"\"\"Initiate OAuth login flow.\"\"\"\n    if not OAUTH_CLIENT_ID:\n        raise HTTPException(\n            status_code=500,\n            detail=\"OAuth not configured. Set OAUTH_CLIENT_ID environment variable.\",\n        )\n\n    # Clean up expired states to prevent memory growth\n    _cleanup_expired_states()\n\n    # Generate state for CSRF protection\n    state = secrets.token_urlsafe(32)\n    oauth_states[state] = {\n        \"redirect_uri\": get_redirect_uri(request),\n        \"expires_at\": time.time() + _OAUTH_STATE_TTL,\n    }\n\n    # Build authorization URL\n    params = {\n        \"client_id\": OAUTH_CLIENT_ID,\n        \"redirect_uri\": get_redirect_uri(request),\n        \"scope\": \"openid profile read-repos write-repos contribute-repos manage-repos inference-api jobs write-discussions\",\n        \"response_type\": \"code\",\n        \"state\": state,\n        \"orgIds\": os.environ.get(\n            \"HF_OAUTH_ORG_ID\", \"698dbf55845d85df163175f1\"\n        ),  # ml-agent-explorers\n    }\n    auth_url = f\"{OPENID_PROVIDER_URL}/oauth/authorize?{urlencode(params)}\"\n\n    return RedirectResponse(url=auth_url)\n\n\n@router.get(\"/callback\")\nasync def oauth_callback(\n    request: Request, code: str = \"\", state: str = \"\"\n) -> RedirectResponse:\n    \"\"\"Handle OAuth callback.\"\"\"\n    # Verify state\n    if state not in oauth_states:\n        raise HTTPException(status_code=400, detail=\"Invalid state parameter\")\n\n    stored_state = oauth_states.pop(state)\n    redirect_uri = stored_state[\"redirect_uri\"]\n\n    if not code:\n        raise HTTPException(status_code=400, detail=\"No authorization code provided\")\n\n    # Exchange code for token\n    token_url = f\"{OPENID_PROVIDER_URL}/oauth/token\"\n    async with httpx.AsyncClient() as client:\n        try:\n            response = await client.post(\n                token_url,\n                data={\n                    \"grant_type\": \"authorization_code\",\n                    \"code\": code,\n                    \"redirect_uri\": redirect_uri,\n                    \"client_id\": OAUTH_CLIENT_ID,\n                    \"client_secret\": OAUTH_CLIENT_SECRET,\n                },\n            )\n            response.raise_for_status()\n            token_data = response.json()\n        except httpx.HTTPError as e:\n            raise HTTPException(status_code=500, detail=f\"Token exchange failed: {e}\")\n\n    # Get user info\n    access_token = token_data.get(\"access_token\")\n    if not access_token:\n        raise HTTPException(\n            status_code=500,\n            detail=\"Token exchange succeeded but no access_token was returned.\",\n        )\n\n    # Fetch user info (optional — failure is not fatal)\n    async with httpx.AsyncClient() as client:\n        try:\n            userinfo_response = await client.get(\n                f\"{OPENID_PROVIDER_URL}/oauth/userinfo\",\n                headers={\"Authorization\": f\"Bearer {access_token}\"},\n            )\n            userinfo_response.raise_for_status()\n        except httpx.HTTPError:\n            pass  # user_info not required for auth flow\n\n    # Set access token as HttpOnly cookie (not in URL — avoids leaks via\n    # Referrer headers, browser history, and server logs)\n    is_production = bool(os.environ.get(\"SPACE_HOST\"))\n    response = RedirectResponse(url=\"/\", status_code=302)\n    response.set_cookie(\n        key=\"hf_access_token\",\n        value=access_token,\n        httponly=True,\n        secure=is_production,  # Secure flag only in production (HTTPS)\n        samesite=\"lax\",\n        max_age=3600 * 24 * 7,  # 7 days\n        path=\"/\",\n    )\n    return response\n\n\n@router.get(\"/logout\")\nasync def logout() -> RedirectResponse:\n    \"\"\"Log out the user by clearing the auth cookie.\"\"\"\n    response = RedirectResponse(url=\"/\")\n    response.delete_cookie(key=\"hf_access_token\", path=\"/\")\n    return response\n\n\n@router.get(\"/status\")\nasync def auth_status() -> dict:\n    \"\"\"Check if OAuth is enabled on this instance.\"\"\"\n    return {\"auth_enabled\": AUTH_ENABLED}\n\n\n@router.get(\"/me\")\nasync def get_me(user: dict = Depends(get_current_user)) -> dict:\n    \"\"\"Get current user info. Returns the authenticated user or dev user.\n\n    Uses the shared auth dependency which handles cookie + Bearer token.\n    \"\"\"\n    return user\n\n\nORG_NAME = \"ml-agent-explorers\"\n\n\n@router.get(\"/org-membership\")\nasync def org_membership(\n    request: Request, user: dict = Depends(get_current_user)\n) -> dict:\n    \"\"\"Check if the authenticated user belongs to the ml-agent-explorers org.\"\"\"\n    if not AUTH_ENABLED:\n        return {\"is_member\": True}\n    token = request.cookies.get(\"hf_access_token\") or \"\"\n    if not token:\n        return {\"is_member\": False}\n    is_member = await check_org_membership(token, ORG_NAME)\n    return {\"is_member\": is_member}\n"
  },
  {
    "path": "backend/session_manager.py",
    "content": "\"\"\"Session manager for handling multiple concurrent agent sessions.\"\"\"\n\nimport asyncio\nimport logging\nimport uuid\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any, Optional\n\nfrom agent.config import load_config\nfrom agent.core.agent_loop import process_submission\nfrom agent.core.session import Event, OpType, Session\nfrom agent.core.tools import ToolRouter\n\n# Get project root (parent of backend directory)\nPROJECT_ROOT = Path(__file__).parent.parent\nDEFAULT_CONFIG_PATH = str(PROJECT_ROOT / \"configs\" / \"main_agent_config.json\")\n\n\n# These dataclasses match agent/main.py structure\n@dataclass\nclass Operation:\n    \"\"\"Operation to be executed by the agent.\"\"\"\n\n    op_type: OpType\n    data: Optional[dict[str, Any]] = None\n\n\n@dataclass\nclass Submission:\n    \"\"\"Submission to the agent loop.\"\"\"\n\n    id: str\n    operation: Operation\n\n\nlogger = logging.getLogger(__name__)\n\n\nclass EventBroadcaster:\n    \"\"\"Reads from the agent's event queue and fans out to SSE subscribers.\n\n    Events that arrive when no subscribers are listening are discarded.\n    With SSE each turn is a separate request, so there is no reconnect\n    scenario that would need buffered replay.\n    \"\"\"\n\n    def __init__(self, event_queue: asyncio.Queue):\n        self._source = event_queue\n        self._subscribers: dict[int, asyncio.Queue] = {}\n        self._counter = 0\n\n    def subscribe(self) -> tuple[int, asyncio.Queue]:\n        \"\"\"Create a new subscriber. Returns (id, queue).\"\"\"\n        self._counter += 1\n        sub_id = self._counter\n        q: asyncio.Queue = asyncio.Queue()\n        self._subscribers[sub_id] = q\n        return sub_id, q\n\n    def unsubscribe(self, sub_id: int) -> None:\n        self._subscribers.pop(sub_id, None)\n\n    async def run(self) -> None:\n        \"\"\"Main loop — reads from source queue and broadcasts.\"\"\"\n        while True:\n            try:\n                event: Event = await self._source.get()\n                msg = {\"event_type\": event.event_type, \"data\": event.data}\n                for q in self._subscribers.values():\n                    await q.put(msg)\n            except asyncio.CancelledError:\n                break\n            except Exception as e:\n                logger.error(f\"EventBroadcaster error: {e}\")\n\n\n@dataclass\nclass AgentSession:\n    \"\"\"Wrapper for an agent session with its associated resources.\"\"\"\n\n    session_id: str\n    session: Session\n    tool_router: ToolRouter\n    submission_queue: asyncio.Queue\n    user_id: str = \"dev\"  # Owner of this session\n    hf_token: str | None = None  # User's HF OAuth token for tool execution\n    task: asyncio.Task | None = None\n    created_at: datetime = field(default_factory=datetime.utcnow)\n    is_active: bool = True\n    is_processing: bool = False  # True while a submission is being executed\n    broadcaster: Any = None\n    # True once this session has been counted against the user's daily\n    # Claude quota. Guards double-counting when the user re-selects an\n    # Anthropic model mid-session.\n    claude_counted: bool = False\n\n\nclass SessionCapacityError(Exception):\n    \"\"\"Raised when no more sessions can be created.\"\"\"\n\n    def __init__(self, message: str, error_type: str = \"global\") -> None:\n        super().__init__(message)\n        self.error_type = error_type  # \"global\" or \"per_user\"\n\n\n# ── Capacity limits ─────────────────────────────────────────────────\n# Sized for HF Spaces 8 vCPU / 32 GB RAM.\n# Each session uses ~10-20 MB (context, tools, queues, task); 200 × 20 MB\n# = 4 GB worst case, leaving plenty of headroom for the Python runtime\n# and per-request overhead.\nMAX_SESSIONS: int = 200\nMAX_SESSIONS_PER_USER: int = 10\n\n\nclass SessionManager:\n    \"\"\"Manages multiple concurrent agent sessions.\"\"\"\n\n    def __init__(self, config_path: str | None = None) -> None:\n        self.config = load_config(config_path or DEFAULT_CONFIG_PATH)\n        self.sessions: dict[str, AgentSession] = {}\n        self._lock = asyncio.Lock()\n\n    def _count_user_sessions(self, user_id: str) -> int:\n        \"\"\"Count active sessions owned by a specific user.\"\"\"\n        return sum(\n            1\n            for s in self.sessions.values()\n            if s.user_id == user_id and s.is_active\n        )\n\n    async def create_session(\n        self,\n        user_id: str = \"dev\",\n        hf_token: str | None = None,\n        model: str | None = None,\n    ) -> str:\n        \"\"\"Create a new agent session and return its ID.\n\n        Session() and ToolRouter() constructors contain blocking I/O\n        (e.g. HfApi().whoami(), litellm.get_max_tokens()) so they are\n        executed in a thread pool to avoid freezing the async event loop.\n\n        Args:\n            user_id: The ID of the user who owns this session.\n            hf_token: The user's HF OAuth token, stored for tool execution.\n            model: Optional model override. When set, replaces ``model_name``\n                on the per-session config clone. None falls back to the\n                config default.\n\n        Raises:\n            SessionCapacityError: If the server or user has reached the\n                maximum number of concurrent sessions.\n        \"\"\"\n        # ── Capacity checks ──────────────────────────────────────────\n        async with self._lock:\n            active_count = self.active_session_count\n            if active_count >= MAX_SESSIONS:\n                raise SessionCapacityError(\n                    f\"Server is at capacity ({active_count}/{MAX_SESSIONS} sessions). \"\n                    \"Please try again later.\",\n                    error_type=\"global\",\n                )\n            if user_id != \"dev\":\n                user_count = self._count_user_sessions(user_id)\n                if user_count >= MAX_SESSIONS_PER_USER:\n                    raise SessionCapacityError(\n                        f\"You have reached the maximum of {MAX_SESSIONS_PER_USER} \"\n                        \"concurrent sessions. Please close an existing session first.\",\n                        error_type=\"per_user\",\n                    )\n\n        session_id = str(uuid.uuid4())\n\n        # Create queues for this session\n        submission_queue: asyncio.Queue = asyncio.Queue()\n        event_queue: asyncio.Queue = asyncio.Queue()\n\n        # Run blocking constructors in a thread to keep the event loop responsive.\n        # Without this, Session.__init__ → ContextManager → litellm.get_max_tokens()\n        # blocks all HTTP/SSE handling.\n        import time as _time\n\n        def _create_session_sync():\n            t0 = _time.monotonic()\n            tool_router = ToolRouter(self.config.mcpServers, hf_token=hf_token)\n            # Deep-copy config so each session's model switches independently —\n            # tab A picking GLM doesn't flip tab B off Claude.\n            session_config = self.config.model_copy(deep=True)\n            if model:\n                session_config.model_name = model\n            session = Session(\n                event_queue, config=session_config, tool_router=tool_router,\n                hf_token=hf_token,\n            )\n            t1 = _time.monotonic()\n            logger.info(f\"Session initialized in {t1 - t0:.2f}s\")\n            return tool_router, session\n\n        tool_router, session = await asyncio.to_thread(_create_session_sync)\n\n        # Create wrapper\n        agent_session = AgentSession(\n            session_id=session_id,\n            session=session,\n            tool_router=tool_router,\n            submission_queue=submission_queue,\n            user_id=user_id,\n            hf_token=hf_token,\n        )\n\n        async with self._lock:\n            self.sessions[session_id] = agent_session\n\n        # Start the agent loop task\n        task = asyncio.create_task(\n            self._run_session(session_id, submission_queue, event_queue, tool_router)\n        )\n        agent_session.task = task\n\n        logger.info(f\"Created session {session_id} for user {user_id}\")\n        return session_id\n\n    async def seed_from_summary(self, session_id: str, messages: list[dict]) -> int:\n        \"\"\"Rehydrate a session from cached prior messages via summarization.\n\n        Runs the standard summarization prompt (same one compaction uses)\n        over the provided messages, then seeds the new session's context\n        with that summary. Tool-call pairing concerns disappear because the\n        output is plain text. Returns the number of messages summarized.\n        \"\"\"\n        from litellm import Message\n\n        from agent.context_manager.manager import _RESTORE_PROMPT, summarize_messages\n\n        agent_session = self.sessions.get(session_id)\n        if not agent_session:\n            raise ValueError(f\"Session {session_id} not found\")\n\n        # Parse into Message objects, tolerating malformed entries.\n        parsed: list[Message] = []\n        for raw in messages:\n            if raw.get(\"role\") == \"system\":\n                continue  # the new session has its own system prompt\n            try:\n                parsed.append(Message.model_validate(raw))\n            except Exception as e:\n                logger.warning(\"Dropping malformed message during seed: %s\", e)\n\n        if not parsed:\n            return 0\n\n        session = agent_session.session\n        # Pass the real tool specs so the summarizer sees what the agent\n        # actually has — otherwise Anthropic's modify_params injects a\n        # dummy tool and the summarizer editorializes that the original\n        # tool calls were fabricated.\n        tool_specs = None\n        try:\n            tool_specs = agent_session.tool_router.get_tool_specs_for_llm()\n        except Exception:\n            pass\n        try:\n            summary, _ = await summarize_messages(\n                parsed,\n                model_name=session.config.model_name,\n                hf_token=session.hf_token,\n                max_tokens=4000,\n                prompt=_RESTORE_PROMPT,\n                tool_specs=tool_specs,\n            )\n        except Exception as e:\n            logger.error(\"Summary call failed during seed: %s\", e)\n            raise\n\n        seed = Message(\n            role=\"user\",\n            content=(\n                \"[SYSTEM: Your prior memory of this conversation — written \"\n                \"in your own voice right before restart. Continue from here.]\\n\\n\"\n                + (summary or \"(no summary returned)\")\n            ),\n        )\n        session.context_manager.items.append(seed)\n        return len(parsed)\n\n    @staticmethod\n    async def _cleanup_sandbox(session: Session) -> None:\n        \"\"\"Delete the sandbox Space if one was created for this session.\"\"\"\n        sandbox = getattr(session, \"sandbox\", None)\n        if sandbox and getattr(sandbox, \"_owns_space\", False):\n            try:\n                logger.info(f\"Deleting sandbox {sandbox.space_id}...\")\n                await asyncio.to_thread(sandbox.delete)\n            except Exception as e:\n                logger.warning(f\"Failed to delete sandbox {sandbox.space_id}: {e}\")\n\n    async def _run_session(\n        self,\n        session_id: str,\n        submission_queue: asyncio.Queue,\n        event_queue: asyncio.Queue,\n        tool_router: ToolRouter,\n    ) -> None:\n        \"\"\"Run the agent loop for a session and broadcast events via EventBroadcaster.\"\"\"\n        agent_session = self.sessions.get(session_id)\n        if not agent_session:\n            logger.error(f\"Session {session_id} not found\")\n            return\n\n        session = agent_session.session\n\n        # Start event broadcaster task\n        broadcaster = EventBroadcaster(event_queue)\n        agent_session.broadcaster = broadcaster\n        broadcast_task = asyncio.create_task(broadcaster.run())\n\n        try:\n            async with tool_router:\n                # Send ready event\n                await session.send_event(\n                    Event(event_type=\"ready\", data={\"message\": \"Agent initialized\"})\n                )\n\n                while session.is_running:\n                    try:\n                        # Wait for submission with timeout to allow checking is_running\n                        submission = await asyncio.wait_for(\n                            submission_queue.get(), timeout=1.0\n                        )\n                        agent_session.is_processing = True\n                        try:\n                            should_continue = await process_submission(session, submission)\n                        finally:\n                            agent_session.is_processing = False\n                        if not should_continue:\n                            break\n                    except asyncio.TimeoutError:\n                        continue\n                    except asyncio.CancelledError:\n                        logger.info(f\"Session {session_id} cancelled\")\n                        break\n                    except Exception as e:\n                        logger.error(f\"Error in session {session_id}: {e}\")\n                        await session.send_event(\n                            Event(event_type=\"error\", data={\"error\": str(e)})\n                        )\n\n        finally:\n            broadcast_task.cancel()\n            try:\n                await broadcast_task\n            except asyncio.CancelledError:\n                pass\n\n            await self._cleanup_sandbox(session)\n\n            async with self._lock:\n                if session_id in self.sessions:\n                    self.sessions[session_id].is_active = False\n\n            logger.info(f\"Session {session_id} ended\")\n\n    async def submit(self, session_id: str, operation: Operation) -> bool:\n        \"\"\"Submit an operation to a session.\"\"\"\n        async with self._lock:\n            agent_session = self.sessions.get(session_id)\n\n        if not agent_session or not agent_session.is_active:\n            logger.warning(f\"Session {session_id} not found or inactive\")\n            return False\n\n        submission = Submission(id=f\"sub_{uuid.uuid4().hex[:8]}\", operation=operation)\n        await agent_session.submission_queue.put(submission)\n        return True\n\n    async def submit_user_input(self, session_id: str, text: str) -> bool:\n        \"\"\"Submit user input to a session.\"\"\"\n        operation = Operation(op_type=OpType.USER_INPUT, data={\"text\": text})\n        return await self.submit(session_id, operation)\n\n    async def submit_approval(\n        self, session_id: str, approvals: list[dict[str, Any]]\n    ) -> bool:\n        \"\"\"Submit tool approvals to a session.\"\"\"\n        operation = Operation(\n            op_type=OpType.EXEC_APPROVAL, data={\"approvals\": approvals}\n        )\n        return await self.submit(session_id, operation)\n\n    async def interrupt(self, session_id: str) -> bool:\n        \"\"\"Interrupt a session by signalling cancellation directly (bypasses queue).\"\"\"\n        agent_session = self.sessions.get(session_id)\n        if not agent_session or not agent_session.is_active:\n            return False\n        agent_session.session.cancel()\n        return True\n\n    async def undo(self, session_id: str) -> bool:\n        \"\"\"Undo last turn in a session.\"\"\"\n        operation = Operation(op_type=OpType.UNDO)\n        return await self.submit(session_id, operation)\n\n    async def truncate(self, session_id: str, user_message_index: int) -> bool:\n        \"\"\"Truncate conversation to before a specific user message (direct, no queue).\"\"\"\n        async with self._lock:\n            agent_session = self.sessions.get(session_id)\n        if not agent_session or not agent_session.is_active:\n            return False\n        return agent_session.session.context_manager.truncate_to_user_message(user_message_index)\n\n    async def compact(self, session_id: str) -> bool:\n        \"\"\"Compact context in a session.\"\"\"\n        operation = Operation(op_type=OpType.COMPACT)\n        return await self.submit(session_id, operation)\n\n    async def shutdown_session(self, session_id: str) -> bool:\n        \"\"\"Shutdown a specific session.\"\"\"\n        operation = Operation(op_type=OpType.SHUTDOWN)\n        success = await self.submit(session_id, operation)\n\n        if success:\n            async with self._lock:\n                agent_session = self.sessions.get(session_id)\n                if agent_session and agent_session.task:\n                    # Wait for task to complete\n                    try:\n                        await asyncio.wait_for(agent_session.task, timeout=5.0)\n                    except asyncio.TimeoutError:\n                        agent_session.task.cancel()\n\n        return success\n\n    async def delete_session(self, session_id: str) -> bool:\n        \"\"\"Delete a session entirely.\"\"\"\n        async with self._lock:\n            agent_session = self.sessions.pop(session_id, None)\n\n        if not agent_session:\n            return False\n\n        # Clean up sandbox Space before cancelling the task\n        await self._cleanup_sandbox(agent_session.session)\n\n        # Cancel the task if running\n        if agent_session.task and not agent_session.task.done():\n            agent_session.task.cancel()\n            try:\n                await agent_session.task\n            except asyncio.CancelledError:\n                pass\n\n        return True\n\n    def get_session_owner(self, session_id: str) -> str | None:\n        \"\"\"Get the user_id that owns a session, or None if session doesn't exist.\"\"\"\n        agent_session = self.sessions.get(session_id)\n        if not agent_session:\n            return None\n        return agent_session.user_id\n\n    def verify_session_access(self, session_id: str, user_id: str) -> bool:\n        \"\"\"Check if a user has access to a session.\n\n        Returns True if:\n        - The session exists AND the user owns it\n        - The user_id is \"dev\" (dev mode bypass)\n        \"\"\"\n        owner = self.get_session_owner(session_id)\n        if owner is None:\n            return False\n        if user_id == \"dev\" or owner == \"dev\":\n            return True\n        return owner == user_id\n\n    def get_session_info(self, session_id: str) -> dict[str, Any] | None:\n        \"\"\"Get information about a session.\"\"\"\n        agent_session = self.sessions.get(session_id)\n        if not agent_session:\n            return None\n\n        # Extract pending approval tools if any\n        pending_approval = None\n        pa = agent_session.session.pending_approval\n        if pa and pa.get(\"tool_calls\"):\n            pending_approval = []\n            for tc in pa[\"tool_calls\"]:\n                import json\n                try:\n                    args = json.loads(tc.function.arguments)\n                except (json.JSONDecodeError, AttributeError):\n                    args = {}\n                pending_approval.append({\n                    \"tool\": tc.function.name,\n                    \"tool_call_id\": tc.id,\n                    \"arguments\": args,\n                })\n\n        return {\n            \"session_id\": session_id,\n            \"created_at\": agent_session.created_at.isoformat(),\n            \"is_active\": agent_session.is_active,\n            \"is_processing\": agent_session.is_processing,\n            \"message_count\": len(agent_session.session.context_manager.items),\n            \"user_id\": agent_session.user_id,\n            \"pending_approval\": pending_approval,\n            \"model\": agent_session.session.config.model_name,\n        }\n\n    def list_sessions(self, user_id: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"List sessions, optionally filtered by user.\n\n        Args:\n            user_id: If provided, only return sessions owned by this user.\n                     If \"dev\", return all sessions (dev mode).\n        \"\"\"\n        results = []\n        for sid in self.sessions:\n            info = self.get_session_info(sid)\n            if not info:\n                continue\n            if user_id and user_id != \"dev\" and info.get(\"user_id\") != user_id:\n                continue\n            results.append(info)\n        return results\n\n    @property\n    def active_session_count(self) -> int:\n        \"\"\"Get count of active sessions.\"\"\"\n        return sum(1 for s in self.sessions.values() if s.is_active)\n\n\n# Global session manager instance\nsession_manager = SessionManager()\n"
  },
  {
    "path": "backend/start.sh",
    "content": "#!/bin/bash\n# Entrypoint for HF Spaces dev mode compatibility.\n# Dev mode spawns CMD multiple times simultaneously on restart.\n# Only the first instance can bind port 7860 — the rest must exit\n# with code 0 so the dev mode daemon doesn't mark the app as crashed.\n\n# Run uvicorn; if it fails due to port conflict, exit cleanly.\nuvicorn main:app --host 0.0.0.0 --port 7860\nEXIT_CODE=$?\n\nif [ $EXIT_CODE -ne 0 ]; then\n    # Check if this was a port-in-use failure (another instance already running)\n    echo \"uvicorn exited with code $EXIT_CODE, exiting gracefully.\"\n    exit 0\nfi\n"
  },
  {
    "path": "backend/user_quotas.py",
    "content": "\"\"\"In-memory daily quota for Claude session creations.\n\nTracks per-user Claude session starts against a daily cap derived from the\nuser's HF plan. Caps reset at UTC midnight; the store itself is in-process\nand wipes on restart (deliberate — the cost of occasional over-subsidy at\nrestart is much lower than running a DB).\n\nUnit: session *creations*, not messages. A user who selects Claude in a new\nsession consumes one quota point; switching an existing Claude session to\nClaude again doesn't (`AgentSession.claude_counted` guards that).\n\nCap tiers:\n  free user   → CLAUDE_FREE_DAILY (1)\n  pro / org   → CLAUDE_PRO_DAILY  (20)\n\"\"\"\n\nimport asyncio\nimport os\nfrom datetime import UTC, datetime\n\nCLAUDE_FREE_DAILY: int = int(os.environ.get(\"CLAUDE_FREE_DAILY\", \"1\"))\nCLAUDE_PRO_DAILY: int = int(os.environ.get(\"CLAUDE_PRO_DAILY\", \"20\"))\n\n# user_id -> (day_utc_iso, count_for_that_day)\n_claude_counts: dict[str, tuple[str, int]] = {}\n_lock = asyncio.Lock()\n\n\ndef _today() -> str:\n    return datetime.now(UTC).date().isoformat()\n\n\ndef daily_cap_for(plan: str | None) -> int:\n    \"\"\"Return the daily Claude-session cap for the given plan.\"\"\"\n    return CLAUDE_FREE_DAILY if (plan or \"free\") == \"free\" else CLAUDE_PRO_DAILY\n\n\nasync def get_claude_used_today(user_id: str) -> int:\n    \"\"\"Return today's Claude session count for the user (0 if none / stale day).\"\"\"\n    async with _lock:\n        entry = _claude_counts.get(user_id)\n        if entry is None:\n            return 0\n        day, count = entry\n        if day != _today():\n            # Stale day — drop the entry so the first increment starts fresh.\n            _claude_counts.pop(user_id, None)\n            return 0\n        return count\n\n\nasync def increment_claude(user_id: str) -> int:\n    \"\"\"Bump today's Claude session count for the user. Returns the new value.\"\"\"\n    async with _lock:\n        today = _today()\n        day, count = _claude_counts.get(user_id, (today, 0))\n        if day != today:\n            count = 0\n        count += 1\n        _claude_counts[user_id] = (today, count)\n        return count\n\n\nasync def refund_claude(user_id: str) -> None:\n    \"\"\"Decrement today's count — used when session creation fails after a successful gate.\"\"\"\n    async with _lock:\n        entry = _claude_counts.get(user_id)\n        if entry is None:\n            return\n        day, count = entry\n        if day != _today():\n            _claude_counts.pop(user_id, None)\n            return\n        new_count = max(0, count - 1)\n        if new_count == 0:\n            _claude_counts.pop(user_id, None)\n        else:\n            _claude_counts[user_id] = (day, new_count)\n\n\ndef _reset_for_tests() -> None:\n    \"\"\"Test-only: clear the in-memory store.\"\"\"\n    _claude_counts.clear()\n"
  },
  {
    "path": "configs/main_agent_config.json",
    "content": "{\n  \"model_name\": \"bedrock/us.anthropic.claude-opus-4-6-v1\",\n  \"save_sessions\": true,\n  \"session_dataset_repo\": \"akseljoonas/hf-agent-sessions\",\n  \"yolo_mode\": false,\n  \"confirm_cpu_jobs\": true,\n  \"auto_file_upload\": true,\n  \"mcpServers\": {\n    \"hf-mcp-server\": {\n      \"transport\": \"http\",\n      \"url\": \"https://huggingface.co/mcp?login\"\n    }\n  }\n}\n"
  },
  {
    "path": "frontend/eslint.config.js",
    "content": "import js from '@eslint/js'\nimport globals from 'globals'\nimport reactHooks from 'eslint-plugin-react-hooks'\nimport reactRefresh from 'eslint-plugin-react-refresh'\nimport tseslint from 'typescript-eslint'\n\nexport default tseslint.config(\n  { ignores: ['dist'] },\n  {\n    extends: [js.configs.recommended, ...tseslint.configs.recommended],\n    files: ['**/*.{ts,tsx}'],\n    languageOptions: {\n      ecmaVersion: 2020,\n      globals: globals.browser,\n    },\n    plugins: {\n      'react-hooks': reactHooks,\n      'react-refresh': reactRefresh,\n    },\n    rules: {\n      ...reactHooks.configs.recommended.rules,\n      'react-refresh/only-export-components': [\n        'warn',\n        { allowConstantExport: true },\n      ],\n    },\n  },\n)\n"
  },
  {
    "path": "frontend/index.html",
    "content": "<!DOCTYPE html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <link rel=\"icon\" type=\"image/webp\" href=\"/smolagents.webp\" />\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n    <title>ML Intern</title>\n    <link rel=\"preconnect\" href=\"https://fonts.googleapis.com\" />\n    <link rel=\"preconnect\" href=\"https://fonts.gstatic.com\" crossorigin />\n    <link href=\"https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap\" rel=\"stylesheet\" />\n  </head>\n  <body style=\"margin: 0; padding: 0; background-color: #0D1117; color: #E6EDF3;\">\n    <div id=\"root\"></div>\n    <script type=\"module\" src=\"/src/main.tsx\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "frontend/package.json",
    "content": "{\n  \"name\": \"hf-agent-frontend\",\n  \"private\": true,\n  \"version\": \"1.0.0\",\n  \"type\": \"module\",\n  \"scripts\": {\n    \"dev\": \"vite\",\n    \"build\": \"tsc -b && vite build\",\n    \"lint\": \"eslint .\",\n    \"preview\": \"vite preview\"\n  },\n  \"dependencies\": {\n    \"@ai-sdk/react\": \"^3.0.93\",\n    \"@emotion/react\": \"^11.13.0\",\n    \"@emotion/styled\": \"^11.13.0\",\n    \"@mui/icons-material\": \"^6.1.0\",\n    \"@mui/material\": \"^6.1.0\",\n    \"ai\": \"^6.0.91\",\n    \"react\": \"^18.3.1\",\n    \"react-dom\": \"^18.3.1\",\n    \"react-markdown\": \"^9.0.1\",\n    \"react-syntax-highlighter\": \"^16.1.0\",\n    \"remark-gfm\": \"^4.0.1\",\n    \"zustand\": \"^5.0.0\"\n  },\n  \"devDependencies\": {\n    \"@eslint/js\": \"^9.13.0\",\n    \"@types/react\": \"^18.3.12\",\n    \"@types/react-dom\": \"^18.3.1\",\n    \"@types/react-syntax-highlighter\": \"^15.5.13\",\n    \"@vitejs/plugin-react\": \"^4.3.3\",\n    \"eslint\": \"^9.13.0\",\n    \"eslint-plugin-react-hooks\": \"^5.0.0\",\n    \"eslint-plugin-react-refresh\": \"^0.4.13\",\n    \"globals\": \"^15.11.0\",\n    \"typescript\": \"~5.6.2\",\n    \"typescript-eslint\": \"^8.10.0\",\n    \"vite\": \"^5.4.10\"\n  }\n}\n"
  },
  {
    "path": "frontend/src/App.tsx",
    "content": "import { Box } from '@mui/material';\nimport AppLayout from '@/components/Layout/AppLayout';\nimport { useAuth } from '@/hooks/useAuth';\n\nfunction App() {\n  // Non-blocking auth check — fires in background, updates store when done.\n  // If auth fails later, apiFetch redirects to /auth/login.\n  useAuth();\n\n  return (\n    <Box sx={{ height: '100vh', display: 'flex' }}>\n      <AppLayout />\n    </Box>\n  );\n}\n\nexport default App;\n"
  },
  {
    "path": "frontend/src/components/Chat/ActivityStatusBar.tsx",
    "content": "import { Box, Typography } from '@mui/material';\nimport { keyframes } from '@mui/system';\nimport { useAgentStore, type ActivityStatus } from '@/store/agentStore';\n\nconst shimmer = keyframes`\n  0% { background-position: -100% center; }\n  50% { background-position: 200% center; }\n  100% { background-position: -100% center; }\n`;\n\nconst TOOL_LABELS: Record<string, string> = {\n  sandbox_create: 'Creating sandbox for code development, this might take 1-2 minutes',\n  bash: 'Running command in sandbox',\n  hf_jobs: 'Running a GPU job, this might take a while',\n  hf_repo_files: 'Uploading file',\n  hf_repo_git: 'Git operation',\n  hf_inspect_dataset: 'Inspecting dataset',\n  hf_search: 'Searching',\n  plan_tool: 'Planning',\n  research: 'Researching',\n};\n\n/** Format raw research log into a clean status label. */\nfunction formatResearchStatus(raw: string): string {\n  const s = raw.replace(/^▸\\s*/, '');\n  const jsonStart = s.indexOf('{');\n  const toolName = jsonStart > 0 ? s.slice(0, jsonStart).trim() : s.trim();\n  let args: Record<string, string> = {};\n  if (jsonStart > 0) {\n    const jsonStr = s.slice(jsonStart);\n    try {\n      const parsed = JSON.parse(jsonStr);\n      for (const [k, v] of Object.entries(parsed)) {\n        if (typeof v === 'string') args[k] = v;\n      }\n    } catch {\n      // JSON is likely truncated — extract complete \"key\": \"value\" pairs\n      for (const m of jsonStr.matchAll(/\"(\\w+)\":\\s*\"([^\"]*)\"/g)) {\n        args[m[1]] = m[2];\n      }\n      // Also try to extract a truncated value for known keys if not found yet\n      if (!args.query && !args.arxiv_id) {\n        const partial = jsonStr.match(/\"(query|arxiv_id)\":\\s*\"([^\"]*)/);\n        if (partial) args[partial[1]] = partial[2];\n      }\n    }\n  }\n\n  if (toolName === 'github_find_examples') {\n    const d = (args.keyword) || (args.repo);\n    return d ? `Finding examples: ${d}` : 'Finding examples';\n  }\n  if (toolName === 'github_read_file') {\n    const f = ((args.path) || '').split('/').pop();\n    return f ? `Reading ${f}` : 'Reading file';\n  }\n  if (toolName === 'explore_hf_docs') {\n    const d = (args.endpoint) || (args.query);\n    return d ? `Exploring docs: ${d}` : 'Exploring docs';\n  }\n  if (toolName === 'fetch_hf_docs') {\n    const p = ((args.url) || '').split('/').pop()?.replace(/\\.md$/, '');\n    return p ? `Reading docs: ${p}` : 'Fetching docs';\n  }\n  if (toolName === 'hf_inspect_dataset') {\n    const d = args.dataset as string;\n    return d ? `Inspecting dataset: ${d}` : 'Inspecting dataset';\n  }\n  if (toolName === 'hf_papers') {\n    const op = args.operation as string;\n    const detail = (args.query) || (args.arxiv_id) || (args.positive_ids);\n    const opLabels: Record<string, string> = {\n      trending: 'Browsing trending papers',\n      search: 'Searching papers',\n      paper_details: 'Reading paper details',\n      read_paper: 'Reading paper',\n      citation_graph: 'Tracing citations',\n      snippet_search: 'Searching paper passages',\n      recommend: 'Finding similar papers',\n      find_datasets: 'Finding paper datasets',\n      find_models: 'Finding paper models',\n      find_collections: 'Finding paper collections',\n      find_all_resources: 'Finding paper resources',\n    };\n    const base = (op && opLabels[op]) || 'Searching papers';\n    return detail ? `${base}: ${detail}` : base;\n  }\n  if (toolName === 'find_hf_api') {\n    const d = (args.query) || (args.tag);\n    return d ? `Finding API: ${d}` : 'Finding API endpoints';\n  }\n  if (toolName === 'hf_repo_files') {\n    const d = (args.repo_id) || (args.repo);\n    return d ? `Reading ${d} files` : 'Reading repo files';\n  }\n  return 'Researching';\n}\n\nfunction statusLabel(status: ActivityStatus): string {\n  switch (status.type) {\n    case 'thinking': return 'Thinking';\n    case 'streaming': return 'Writing';\n    case 'tool': {\n      if (status.toolName === 'research' && status.description) {\n        return formatResearchStatus(status.description);\n      }\n      const base = status.description || TOOL_LABELS[status.toolName] || `Running ${status.toolName}`;\n      if (status.toolName === 'bash' && status.description && /install/i.test(status.description)) {\n        return `${base} — this can take a few minutes, sit tight`;\n      }\n      return base;\n    }\n    case 'waiting-approval': return 'Waiting for approval';\n    case 'cancelled': return 'What should the agent do instead?';\n    default: return '';\n  }\n}\n\nexport default function ActivityStatusBar() {\n  const activityStatus = useAgentStore(s => s.activityStatus);\n\n  if (activityStatus.type === 'idle') return null;\n\n  const label = statusLabel(activityStatus);\n\n  return (\n    <Box sx={{ px: 2, py: 0.5, minHeight: 28, display: 'flex', alignItems: 'center' }}>\n      <Typography\n        sx={{\n          fontFamily: 'monospace',\n          fontSize: '0.72rem',\n          fontWeight: 500,\n          letterSpacing: '0.02em',\n          background: 'linear-gradient(90deg, var(--muted-text) 30%, var(--text) 50%, var(--muted-text) 70%)',\n          backgroundSize: '250% 100%',\n          backgroundClip: 'text',\n          WebkitBackgroundClip: 'text',\n          WebkitTextFillColor: 'transparent',\n          animation: `${shimmer} 4s ease-in-out infinite`,\n        }}\n      >\n        {label}{activityStatus.type !== 'cancelled' && '…'}\n      </Typography>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/AssistantMessage.tsx",
    "content": "import { useMemo } from 'react';\nimport { Box, Stack, Typography } from '@mui/material';\nimport MarkdownContent from './MarkdownContent';\nimport ToolCallGroup from './ToolCallGroup';\nimport type { UIMessage } from 'ai';\nimport type { MessageMeta } from '@/types/agent';\n\ninterface AssistantMessageProps {\n  message: UIMessage;\n  isStreaming?: boolean;\n  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;\n}\n\n/**\n * Groups consecutive tool parts together so they render as a single\n * ToolCallGroup (visually identical to the old segments approach).\n */\ntype DynamicToolPart = Extract<UIMessage['parts'][number], { type: 'dynamic-tool' }>;\n\nfunction groupParts(parts: UIMessage['parts']) {\n  const groups: Array<\n    | { kind: 'text'; text: string; idx: number }\n    | { kind: 'tools'; tools: DynamicToolPart[]; idx: number }\n  > = [];\n\n  for (let i = 0; i < parts.length; i++) {\n    const part = parts[i];\n\n    if (part.type === 'text') {\n      groups.push({ kind: 'text', text: part.text, idx: i });\n    } else if (part.type === 'dynamic-tool') {\n      const toolPart = part as DynamicToolPart;\n      const last = groups[groups.length - 1];\n      if (last?.kind === 'tools') {\n        last.tools.push(toolPart);\n      } else {\n        groups.push({ kind: 'tools', tools: [toolPart], idx: i });\n      }\n    }\n    // step-start, step-end, etc. are ignored visually\n  }\n\n  return groups;\n}\n\nexport default function AssistantMessage({ message, isStreaming = false, approveTools }: AssistantMessageProps) {\n  const groups = useMemo(() => groupParts(message.parts), [message.parts]);\n\n  // Find the last text group index for streaming cursor\n  let lastTextIdx = -1;\n  for (let i = groups.length - 1; i >= 0; i--) {\n    if (groups[i].kind === 'text') { lastTextIdx = i; break; }\n  }\n\n  const meta = message.metadata as MessageMeta | undefined;\n  const timeStr = meta?.createdAt\n    ? new Date(meta.createdAt).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })\n    : null;\n\n  if (groups.length === 0) return null;\n\n  return (\n    <Box sx={{ minWidth: 0 }}>\n      <Stack direction=\"row\" alignItems=\"baseline\" spacing={1} sx={{ mb: 0.5 }}>\n        <Typography\n          variant=\"caption\"\n          sx={{\n            fontWeight: 700,\n            fontSize: '0.72rem',\n            color: 'var(--muted-text)',\n            textTransform: 'uppercase',\n            letterSpacing: '0.04em',\n          }}\n        >\n          Assistant\n        </Typography>\n        {timeStr && (\n          <Typography variant=\"caption\" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem' }}>\n            {timeStr}\n          </Typography>\n        )}\n      </Stack>\n\n      <Box\n        sx={{\n          maxWidth: { xs: '95%', md: '85%' },\n          bgcolor: 'var(--surface)',\n          borderRadius: 1.5,\n          borderTopLeftRadius: 4,\n          px: { xs: 1.5, md: 2.5 },\n          py: 1.5,\n          border: '1px solid var(--border)',\n        }}\n      >\n        {groups.map((group, i) => {\n          if (group.kind === 'text' && group.text) {\n            return (\n              <MarkdownContent\n                key={group.idx}\n                content={group.text}\n                isStreaming={isStreaming && i === lastTextIdx}\n              />\n            );\n          }\n          if (group.kind === 'tools' && group.tools.length > 0) {\n            return (\n              <ToolCallGroup\n                key={group.idx}\n                tools={group.tools}\n                approveTools={approveTools}\n              />\n            );\n          }\n          return null;\n        })}\n      </Box>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/ChatInput.tsx",
    "content": "import { useState, useCallback, useEffect, useRef, KeyboardEvent } from 'react';\nimport { Box, TextField, IconButton, CircularProgress, Typography, Menu, MenuItem, ListItemIcon, ListItemText, Chip } from '@mui/material';\nimport ArrowUpwardIcon from '@mui/icons-material/ArrowUpward';\nimport ArrowDropDownIcon from '@mui/icons-material/ArrowDropDown';\nimport StopIcon from '@mui/icons-material/Stop';\nimport { apiFetch } from '@/utils/api';\nimport { useUserQuota } from '@/hooks/useUserQuota';\nimport ClaudeCapDialog from '@/components/ClaudeCapDialog';\nimport { useAgentStore } from '@/store/agentStore';\nimport { FIRST_FREE_MODEL_PATH } from '@/utils/model';\n\n// Model configuration\ninterface ModelOption {\n  id: string;\n  name: string;\n  description: string;\n  modelPath: string;\n  avatarUrl: string;\n  recommended?: boolean;\n}\n\nconst getHfAvatarUrl = (modelId: string) => {\n  const org = modelId.split('/')[0];\n  return `https://huggingface.co/api/avatars/${org}`;\n};\n\nconst MODEL_OPTIONS: ModelOption[] = [\n  {\n    id: 'kimi-k2.6',\n    name: 'Kimi K2.6',\n    description: 'Novita',\n    modelPath: 'moonshotai/Kimi-K2.6',\n    avatarUrl: getHfAvatarUrl('moonshotai/Kimi-K2.6'),\n    recommended: true,\n  },\n  {\n    id: 'claude-opus',\n    name: 'Claude Opus 4.6',\n    description: 'Anthropic',\n    modelPath: 'anthropic/claude-opus-4-6',\n    avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',\n    recommended: true,\n  },\n  {\n    id: 'minimax-m2.7',\n    name: 'MiniMax M2.7',\n    description: 'Novita',\n    modelPath: 'MiniMaxAI/MiniMax-M2.7',\n    avatarUrl: getHfAvatarUrl('MiniMaxAI/MiniMax-M2.7'),\n  },\n  {\n    id: 'glm-5.1',\n    name: 'GLM 5.1',\n    description: 'Together',\n    modelPath: 'zai-org/GLM-5.1',\n    avatarUrl: getHfAvatarUrl('zai-org/GLM-5.1'),\n  },\n];\n\nconst findModelByPath = (path: string): ModelOption | undefined => {\n  return MODEL_OPTIONS.find(m => m.modelPath === path || path?.includes(m.id));\n};\n\ninterface ChatInputProps {\n  sessionId?: string;\n  onSend: (text: string) => void;\n  onStop?: () => void;\n  isProcessing?: boolean;\n  disabled?: boolean;\n  placeholder?: string;\n}\n\nconst isClaudeModel = (m: ModelOption) => m.modelPath.startsWith('anthropic/');\nconst firstFreeModel = () => MODEL_OPTIONS.find(m => !isClaudeModel(m)) ?? MODEL_OPTIONS[0];\n\nexport default function ChatInput({ sessionId, onSend, onStop, isProcessing = false, disabled = false, placeholder = 'Ask anything...' }: ChatInputProps) {\n  const [input, setInput] = useState('');\n  const inputRef = useRef<HTMLTextAreaElement>(null);\n  const [selectedModelId, setSelectedModelId] = useState<string>(MODEL_OPTIONS[0].id);\n  const [modelAnchorEl, setModelAnchorEl] = useState<null | HTMLElement>(null);\n  const { quota, refresh: refreshQuota } = useUserQuota();\n  // The daily-cap dialog is triggered from two places: (a) a 429 returned\n  // from the chat transport when the user tries to send on Opus over cap —\n  // surfaced via the agent-store flag — and (b) nothing else right now\n  // (switching models is free). Keeping the open state in the store means\n  // the hook layer can flip it without threading props through.\n  const claudeQuotaExhausted = useAgentStore((s) => s.claudeQuotaExhausted);\n  const setClaudeQuotaExhausted = useAgentStore((s) => s.setClaudeQuotaExhausted);\n  const lastSentRef = useRef<string>('');\n\n  // Model is per-session: fetch this tab's current model every time the\n  // session changes. Other tabs keep their own selections independently.\n  useEffect(() => {\n    if (!sessionId) return;\n    let cancelled = false;\n    apiFetch(`/api/session/${sessionId}`)\n      .then((res) => (res.ok ? res.json() : null))\n      .then((data) => {\n        if (cancelled) return;\n        if (data?.model) {\n          const model = findModelByPath(data.model);\n          if (model) setSelectedModelId(model.id);\n        }\n      })\n      .catch(() => { /* ignore */ });\n    return () => { cancelled = true; };\n  }, [sessionId]);\n\n  const selectedModel = MODEL_OPTIONS.find(m => m.id === selectedModelId) || MODEL_OPTIONS[0];\n\n  // Auto-focus the textarea when the session becomes ready\n  useEffect(() => {\n    if (!disabled && !isProcessing && inputRef.current) {\n      inputRef.current.focus();\n    }\n  }, [disabled, isProcessing]);\n\n  const handleSend = useCallback(() => {\n    if (input.trim() && !disabled) {\n      lastSentRef.current = input;\n      onSend(input);\n      setInput('');\n    }\n  }, [input, disabled, onSend]);\n\n  // When the chat transport reports a Claude-quota 429, restore the typed\n  // text so the user doesn't lose their message.\n  useEffect(() => {\n    if (claudeQuotaExhausted && lastSentRef.current) {\n      setInput(lastSentRef.current);\n    }\n  }, [claudeQuotaExhausted]);\n\n  // Refresh the quota display whenever the session changes (user might\n  // have started another tab that spent quota).\n  useEffect(() => {\n    if (sessionId) refreshQuota();\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n  }, [sessionId]);\n\n  const handleKeyDown = useCallback(\n    (e: KeyboardEvent<HTMLDivElement>) => {\n      if (e.key === 'Enter' && !e.shiftKey) {\n        e.preventDefault();\n        handleSend();\n      }\n    },\n    [handleSend]\n  );\n\n  const handleModelClick = (event: React.MouseEvent<HTMLElement>) => {\n    setModelAnchorEl(event.currentTarget);\n  };\n\n  const handleModelClose = () => {\n    setModelAnchorEl(null);\n  };\n\n  const handleSelectModel = async (model: ModelOption) => {\n    handleModelClose();\n    if (!sessionId) return;\n    try {\n      const res = await apiFetch(`/api/session/${sessionId}/model`, {\n        method: 'POST',\n        body: JSON.stringify({ model: model.modelPath }),\n      });\n      if (res.ok) setSelectedModelId(model.id);\n    } catch { /* ignore */ }\n  };\n\n  // Dialog close: just clear the flag. The typed text is already restored.\n  const handleCapDialogClose = useCallback(() => {\n    setClaudeQuotaExhausted(false);\n  }, [setClaudeQuotaExhausted]);\n\n  // \"Use a free model\" — switch the current session to Kimi (or the first\n  // non-Anthropic option) and auto-retry the send that tripped the cap.\n  const handleUseFreeModel = useCallback(async () => {\n    setClaudeQuotaExhausted(false);\n    if (!sessionId) return;\n    const free = MODEL_OPTIONS.find(m => m.modelPath === FIRST_FREE_MODEL_PATH)\n      ?? firstFreeModel();\n    try {\n      const res = await apiFetch(`/api/session/${sessionId}/model`, {\n        method: 'POST',\n        body: JSON.stringify({ model: free.modelPath }),\n      });\n      if (res.ok) {\n        setSelectedModelId(free.id);\n        const retryText = lastSentRef.current;\n        if (retryText) {\n          onSend(retryText);\n          setInput('');\n          lastSentRef.current = '';\n        }\n      }\n    } catch { /* ignore */ }\n  }, [sessionId, onSend, setClaudeQuotaExhausted]);\n\n  // Hide the chip until the user has actually burned quota — an unused\n  // Opus session shouldn't populate a counter.\n  const claudeChip = (() => {\n    if (!quota || quota.claudeUsedToday === 0) return null;\n    if (quota.plan === 'free') {\n      return quota.claudeRemaining > 0 ? 'Free today' : 'Pro only';\n    }\n    return `${quota.claudeUsedToday}/${quota.claudeDailyCap} today`;\n  })();\n\n  return (\n    <Box\n      sx={{\n        pb: { xs: 2, md: 4 },\n        pt: { xs: 1, md: 2 },\n        position: 'relative',\n        zIndex: 10,\n      }}\n    >\n      <Box sx={{ maxWidth: '880px', mx: 'auto', width: '100%', px: { xs: 0, sm: 1, md: 2 } }}>\n        <Box\n          className=\"composer\"\n          sx={{\n            display: 'flex',\n            gap: '10px',\n            alignItems: 'flex-start',\n            bgcolor: 'var(--composer-bg)',\n            borderRadius: 'var(--radius-md)',\n            p: '12px',\n            border: '1px solid var(--border)',\n            transition: 'box-shadow 0.2s ease, border-color 0.2s ease',\n            '&:focus-within': {\n                borderColor: 'var(--accent-yellow)',\n                boxShadow: 'var(--focus)',\n            }\n          }}\n        >\n          <TextField\n            fullWidth\n            multiline\n            maxRows={6}\n            value={input}\n            onChange={(e) => setInput(e.target.value)}\n            onKeyDown={handleKeyDown}\n            placeholder={placeholder}\n            disabled={disabled || isProcessing}\n            variant=\"standard\"\n            inputRef={inputRef}\n            InputProps={{\n                disableUnderline: true,\n                sx: {\n                    color: 'var(--text)',\n                    fontSize: '15px',\n                    fontFamily: 'inherit',\n                    padding: 0,\n                    lineHeight: 1.5,\n                    minHeight: { xs: '44px', md: '56px' },\n                    alignItems: 'flex-start',\n                }\n            }}\n            sx={{\n                flex: 1,\n                '& .MuiInputBase-root': {\n                    p: 0,\n                    backgroundColor: 'transparent',\n                },\n                '& textarea': {\n                    resize: 'none',\n                    padding: '0 !important',\n                }\n            }}\n          />\n          {isProcessing ? (\n            <IconButton\n              onClick={onStop}\n              sx={{\n                mt: 1,\n                p: 1.5,\n                borderRadius: '10px',\n                color: 'var(--muted-text)',\n                transition: 'all 0.2s',\n                position: 'relative',\n                '&:hover': {\n                  bgcolor: 'var(--hover-bg)',\n                  color: 'var(--accent-red)',\n                },\n              }}\n            >\n              <Box sx={{ position: 'relative', display: 'flex', alignItems: 'center', justifyContent: 'center' }}>\n                <CircularProgress size={28} thickness={3} sx={{ color: 'inherit', position: 'absolute' }} />\n                <StopIcon sx={{ fontSize: 16 }} />\n              </Box>\n            </IconButton>\n          ) : (\n            <IconButton\n              onClick={handleSend}\n              disabled={disabled || !input.trim()}\n              sx={{\n                mt: 1,\n                p: 1,\n                borderRadius: '10px',\n                color: 'var(--muted-text)',\n                transition: 'all 0.2s',\n                '&:hover': {\n                  color: 'var(--accent-yellow)',\n                  bgcolor: 'var(--hover-bg)',\n                },\n                '&.Mui-disabled': {\n                  opacity: 0.3,\n                },\n              }}\n            >\n              <ArrowUpwardIcon fontSize=\"small\" />\n            </IconButton>\n          )}\n        </Box>\n\n        {/* Powered By Badge */}\n        <Box\n          onClick={handleModelClick}\n          sx={{\n            display: 'flex',\n            alignItems: 'center',\n            justifyContent: 'center',\n            mt: 1.5,\n            gap: 0.8,\n            opacity: 0.6,\n            cursor: 'pointer',\n            transition: 'opacity 0.2s',\n            '&:hover': {\n              opacity: 1\n            }\n          }}\n        >\n          <Typography variant=\"caption\" sx={{ fontSize: '10px', color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em', fontWeight: 500 }}>\n            powered by\n          </Typography>\n          <img\n            src={selectedModel.avatarUrl}\n            alt={selectedModel.name}\n            style={{ height: '14px', width: '14px', objectFit: 'contain', borderRadius: '2px' }}\n          />\n          <Typography variant=\"caption\" sx={{ fontSize: '10px', color: 'var(--text)', fontWeight: 600, letterSpacing: '0.02em' }}>\n            {selectedModel.name}\n          </Typography>\n          <ArrowDropDownIcon sx={{ fontSize: '14px', color: 'var(--muted-text)' }} />\n        </Box>\n\n        {/* Model Selection Menu */}\n        <Menu\n          anchorEl={modelAnchorEl}\n          open={Boolean(modelAnchorEl)}\n          onClose={handleModelClose}\n          anchorOrigin={{\n            vertical: 'top',\n            horizontal: 'center',\n          }}\n          transformOrigin={{\n            vertical: 'bottom',\n            horizontal: 'center',\n          }}\n          slotProps={{\n            paper: {\n              sx: {\n                bgcolor: 'var(--panel)',\n                border: '1px solid var(--divider)',\n                mb: 1,\n                maxHeight: '400px',\n              }\n            }\n          }}\n        >\n          {MODEL_OPTIONS.map((model) => (\n            <MenuItem\n              key={model.id}\n              onClick={() => handleSelectModel(model)}\n              selected={selectedModelId === model.id}\n              sx={{\n                py: 1.5,\n                '&.Mui-selected': {\n                  bgcolor: 'rgba(255,255,255,0.05)',\n                }\n              }}\n            >\n              <ListItemIcon>\n                <img\n                  src={model.avatarUrl}\n                  alt={model.name}\n                  style={{ width: 24, height: 24, borderRadius: '4px', objectFit: 'cover' }}\n                />\n              </ListItemIcon>\n              <ListItemText\n                primary={\n                  <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>\n                    {model.name}\n                    {model.recommended && (\n                      <Chip\n                        label=\"Recommended\"\n                        size=\"small\"\n                        sx={{\n                          height: '18px',\n                          fontSize: '10px',\n                          bgcolor: 'var(--accent-yellow)',\n                          color: '#000',\n                          fontWeight: 600,\n                        }}\n                      />\n                    )}\n                    {isClaudeModel(model) && claudeChip && (\n                      <Chip\n                        label={claudeChip}\n                        size=\"small\"\n                        sx={{\n                          height: '18px',\n                          fontSize: '10px',\n                          bgcolor: 'rgba(255,255,255,0.08)',\n                          color: 'var(--muted-text)',\n                          fontWeight: 600,\n                        }}\n                      />\n                    )}\n                  </Box>\n                }\n                secondary={model.description}\n                secondaryTypographyProps={{\n                  sx: { fontSize: '12px', color: 'var(--muted-text)' }\n                }}\n              />\n            </MenuItem>\n          ))}\n        </Menu>\n\n        <ClaudeCapDialog\n          open={claudeQuotaExhausted}\n          plan={quota?.plan ?? 'free'}\n          cap={quota?.claudeDailyCap ?? 1}\n          onClose={handleCapDialogClose}\n          onUseFreeModel={handleUseFreeModel}\n        />\n      </Box>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/ExpiredBanner.tsx",
    "content": "/**\n * Shown inline in a chat when the backend no longer recognizes the\n * session id (typically: Space was restarted). Lets the user catch the\n * agent up with a summary of the prior conversation, or start over.\n */\nimport { useState, useCallback } from 'react';\nimport { Box, Button, CircularProgress, Typography } from '@mui/material';\nimport { apiFetch } from '@/utils/api';\nimport { useSessionStore } from '@/store/sessionStore';\nimport { useAgentStore } from '@/store/agentStore';\nimport { loadBackendMessages } from '@/lib/backend-message-store';\nimport { loadMessages } from '@/lib/chat-message-store';\nimport { uiMessagesToLLMMessages } from '@/lib/convert-llm-messages';\nimport { logger } from '@/utils/logger';\n\ninterface Props {\n  sessionId: string;\n}\n\nexport default function ExpiredBanner({ sessionId }: Props) {\n  const { renameSession, deleteSession } = useSessionStore();\n  const [busy, setBusy] = useState<'catch-up' | 'start-over' | null>(null);\n  const [error, setError] = useState<string | null>(null);\n\n  const handleCatchUp = useCallback(async () => {\n    setBusy('catch-up');\n    setError(null);\n    try {\n      // Prefer the raw backend-message cache; fall back to reconstructing\n      // from UIMessages (for sessions that predate the backend cache).\n      let messages = loadBackendMessages(sessionId);\n      if (!messages || messages.length === 0) {\n        const uiMsgs = loadMessages(sessionId);\n        if (uiMsgs.length > 0) messages = uiMessagesToLLMMessages(uiMsgs);\n      }\n      if (!messages || messages.length === 0) {\n        setError('Nothing to summarize from this chat.');\n        setBusy(null);\n        return;\n      }\n\n      const res = await apiFetch('/api/session/restore-summary', {\n        method: 'POST',\n        body: JSON.stringify({ messages }),\n      });\n      if (!res.ok) throw new Error(`restore-summary failed: ${res.status}`);\n      const data = await res.json();\n      const newId = data.session_id as string | undefined;\n      if (!newId) throw new Error('no session_id in response');\n\n      useAgentStore.getState().clearSessionState(sessionId);\n      renameSession(sessionId, newId);\n    } catch (e) {\n      logger.warn('Catch-up failed:', e);\n      setError(\"Couldn't catch up — try starting over.\");\n      setBusy(null);\n    }\n  }, [sessionId, renameSession]);\n\n  const handleStartOver = useCallback(() => {\n    setBusy('start-over');\n    useAgentStore.getState().clearSessionState(sessionId);\n    deleteSession(sessionId);\n  }, [sessionId, deleteSession]);\n\n  return (\n    <Box\n      sx={{\n        mx: { xs: 2, md: 'auto' },\n        my: 2,\n        maxWidth: 720,\n        p: 2.5,\n        borderRadius: 2,\n        border: '1px solid',\n        borderColor: 'divider',\n        bgcolor: 'background.paper',\n        boxShadow: '0 1px 3px rgba(0,0,0,0.06)',\n      }}\n    >\n      <Typography variant=\"body1\" sx={{ fontWeight: 600, mb: 0.5 }}>\n        Where were we?\n      </Typography>\n      <Typography variant=\"body2\" sx={{ color: 'text.secondary', mb: 2 }}>\n        Let me skim the conversation so far and pick up right where we left\n        off — or we can start something new.\n      </Typography>\n      <Box sx={{ display: 'flex', gap: 1, flexWrap: 'wrap' }}>\n        <Button\n          variant=\"contained\"\n          onClick={handleCatchUp}\n          disabled={busy !== null}\n          startIcon={busy === 'catch-up' ? <CircularProgress size={16} color=\"inherit\" /> : null}\n          sx={{ textTransform: 'none' }}\n        >\n          {busy === 'catch-up' ? 'Catching up…' : 'Catch me up'}\n        </Button>\n        <Button\n          variant=\"outlined\"\n          onClick={handleStartOver}\n          disabled={busy !== null}\n          sx={{ textTransform: 'none' }}\n        >\n          Start fresh\n        </Button>\n      </Box>\n      {error && (\n        <Typography variant=\"caption\" sx={{ display: 'block', mt: 1.5, color: 'error.main' }}>\n          {error}\n        </Typography>\n      )}\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/MarkdownContent.tsx",
    "content": "import { useMemo, useRef, useState, useEffect } from 'react';\nimport { Box } from '@mui/material';\nimport ReactMarkdown from 'react-markdown';\nimport remarkGfm from 'remark-gfm';\nimport type { SxProps, Theme } from '@mui/material/styles';\n\ninterface MarkdownContentProps {\n  content: string;\n  sx?: SxProps<Theme>;\n  /** When true, shows a blinking cursor and throttles renders. */\n  isStreaming?: boolean;\n}\n\n/** Shared markdown styles — adapts to light/dark via CSS variables. */\nconst markdownSx: SxProps<Theme> = {\n  fontSize: '0.925rem',\n  lineHeight: 1.7,\n  color: 'var(--text)',\n  wordBreak: 'break-word',\n\n  '& p': { m: 0, mb: 1.5, '&:last-child': { mb: 0 } },\n\n  '& h1, & h2, & h3, & h4': { mt: 2.5, mb: 1, fontWeight: 600, lineHeight: 1.3 },\n  '& h1': { fontSize: '1.35rem' },\n  '& h2': { fontSize: '1.15rem' },\n  '& h3': { fontSize: '1.05rem' },\n\n  '& pre': {\n    bgcolor: 'var(--code-bg)',\n    p: 2,\n    borderRadius: 2,\n    overflow: 'auto',\n    fontSize: '0.82rem',\n    lineHeight: 1.6,\n    border: '1px solid var(--tool-border)',\n    my: 2,\n  },\n  '& code': {\n    bgcolor: 'var(--hover-bg)',\n    px: 0.75,\n    py: 0.25,\n    borderRadius: 0.5,\n    fontSize: '0.84rem',\n    fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n  },\n  '& pre code': { bgcolor: 'transparent', p: 0 },\n\n  '& a': {\n    color: 'var(--accent-yellow)',\n    textDecoration: 'none',\n    fontWeight: 500,\n    '&:hover': { textDecoration: 'underline' },\n  },\n\n  '& ul, & ol': { pl: 3, my: 1 },\n  '& li': { mb: 0.5 },\n  '& li::marker': { color: 'var(--muted-text)' },\n\n  '& blockquote': {\n    borderLeft: '3px solid var(--accent-yellow)',\n    pl: 2,\n    ml: 0,\n    my: 1.5,\n    color: 'var(--muted-text)',\n    fontStyle: 'italic',\n  },\n\n  '& table': {\n    borderCollapse: 'collapse',\n    width: '100%',\n    my: 2,\n    fontSize: '0.85rem',\n    display: 'block',\n    overflowX: 'auto',\n    WebkitOverflowScrolling: 'touch',\n  },\n  '& thead': {\n    position: 'sticky',\n    top: 0,\n  },\n  '& th': {\n    borderBottom: '2px solid var(--border-hover)',\n    bgcolor: 'var(--hover-bg)',\n    textAlign: 'left',\n    px: 1.5,\n    py: 0.75,\n    fontWeight: 600,\n    whiteSpace: 'nowrap',\n  },\n  '& td': {\n    borderBottom: '1px solid var(--tool-border)',\n    px: 1.5,\n    py: 0.75,\n  },\n  '& tr:nth-of-type(even) td': {\n    bgcolor: 'color-mix(in srgb, var(--hover-bg) 50%, transparent)',\n  },\n\n  '& hr': {\n    border: 'none',\n    borderTop: '1px solid var(--border)',\n    my: 2,\n  },\n\n  '& img': {\n    maxWidth: '100%',\n    borderRadius: 2,\n  },\n};\n\n/**\n * Throttled content for streaming: render the full markdown through\n * ReactMarkdown but only re-parse every ~80ms to avoid layout thrashing.\n * This is the Claude approach — always render as markdown, never split\n * into raw text. The parser handles incomplete tables gracefully.\n */\nfunction useThrottledValue(value: string, isStreaming: boolean, intervalMs = 80): string {\n  const [throttled, setThrottled] = useState(value);\n  const lastUpdate = useRef(0);\n  const pending = useRef<ReturnType<typeof setTimeout> | null>(null);\n  const latestValue = useRef(value);\n  latestValue.current = value;\n\n  useEffect(() => {\n    if (!isStreaming) {\n      // Not streaming — always use latest value immediately\n      setThrottled(value);\n      return;\n    }\n\n    const now = Date.now();\n    const elapsed = now - lastUpdate.current;\n\n    if (elapsed >= intervalMs) {\n      // Enough time passed — update immediately\n      setThrottled(value);\n      lastUpdate.current = now;\n    } else {\n      // Schedule an update for the remaining time\n      if (pending.current) clearTimeout(pending.current);\n      pending.current = setTimeout(() => {\n        setThrottled(latestValue.current);\n        lastUpdate.current = Date.now();\n        pending.current = null;\n      }, intervalMs - elapsed);\n    }\n\n    return () => {\n      if (pending.current) clearTimeout(pending.current);\n    };\n  }, [value, isStreaming, intervalMs]);\n\n  // When streaming ends, flush immediately\n  useEffect(() => {\n    if (!isStreaming) {\n      setThrottled(latestValue.current);\n    }\n  }, [isStreaming]);\n\n  return throttled;\n}\n\nexport default function MarkdownContent({ content, sx, isStreaming = false }: MarkdownContentProps) {\n  // Throttle re-parses during streaming to ~12fps (every 80ms)\n  const displayContent = useThrottledValue(content, isStreaming);\n\n  const remarkPlugins = useMemo(() => [remarkGfm], []);\n\n  return (\n    <Box sx={[markdownSx, ...(Array.isArray(sx) ? sx : sx ? [sx] : [])]}>\n      <ReactMarkdown remarkPlugins={remarkPlugins}>{displayContent}</ReactMarkdown>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/MessageBubble.tsx",
    "content": "import UserMessage from './UserMessage';\nimport AssistantMessage from './AssistantMessage';\nimport type { UIMessage } from 'ai';\n\ninterface MessageBubbleProps {\n  message: UIMessage;\n  isLastTurn?: boolean;\n  onUndoTurn?: () => void;\n  onEditAndRegenerate?: (messageId: string, newText: string) => void | Promise<void>;\n  isProcessing?: boolean;\n  isStreaming?: boolean;\n  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;\n}\n\nexport default function MessageBubble({\n  message,\n  isLastTurn = false,\n  onUndoTurn,\n  onEditAndRegenerate,\n  isProcessing = false,\n  isStreaming = false,\n  approveTools,\n}: MessageBubbleProps) {\n  if (message.role === 'user') {\n    return (\n      <UserMessage\n        message={message}\n        isLastTurn={isLastTurn}\n        onUndoTurn={onUndoTurn}\n        onEditAndRegenerate={onEditAndRegenerate}\n        isProcessing={isProcessing}\n      />\n    );\n  }\n\n  if (message.role === 'assistant') {\n    return (\n      <AssistantMessage\n        message={message}\n        isStreaming={isStreaming}\n        approveTools={approveTools}\n      />\n    );\n  }\n\n  return null;\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/MessageList.tsx",
    "content": "import { useCallback, useEffect, useRef, useMemo } from 'react';\nimport { Box, Stack, Typography } from '@mui/material';\nimport MessageBubble from './MessageBubble';\nimport ActivityStatusBar from './ActivityStatusBar';\nimport { useAgentStore } from '@/store/agentStore';\nimport type { UIMessage } from 'ai';\n\ninterface MessageListProps {\n  messages: UIMessage[];\n  isProcessing: boolean;\n  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null }>) => Promise<boolean>;\n  onUndoLastTurn: () => void | Promise<void>;\n  onEditAndRegenerate?: (messageId: string, newText: string) => void | Promise<void>;\n}\n\nfunction getGreeting(): string {\n  const h = new Date().getHours();\n  if (h < 12) return 'Morning';\n  if (h < 17) return 'Afternoon';\n  return 'Evening';\n}\n\nfunction WelcomeGreeting() {\n  const { user } = useAgentStore();\n  const firstName = user?.name?.split(' ')[0] || user?.username;\n  const greeting = firstName ? `${getGreeting()}, ${firstName}` : getGreeting();\n\n  return (\n    <Box\n      sx={{\n        flex: 1,\n        display: 'flex',\n        flexDirection: 'column',\n        alignItems: 'center',\n        justifyContent: 'center',\n        py: 8,\n        gap: 1.5,\n      }}\n    >\n      <Typography\n        sx={{\n          fontFamily: 'monospace',\n          fontSize: '1.6rem',\n          color: 'var(--text)',\n          fontWeight: 600,\n        }}\n      >\n        {greeting}\n      </Typography>\n      <Typography\n        color=\"text.secondary\"\n        sx={{ fontFamily: 'monospace', fontSize: '0.9rem' }}\n      >\n        Let's build something impressive?\n      </Typography>\n    </Box>\n  );\n}\n\nexport default function MessageList({ messages, isProcessing, approveTools, onUndoLastTurn, onEditAndRegenerate }: MessageListProps) {\n  const scrollContainerRef = useRef<HTMLDivElement>(null);\n  const stickToBottom = useRef(true);\n\n  const scrollToBottom = useCallback(() => {\n    const el = scrollContainerRef.current;\n    if (el) el.scrollTop = el.scrollHeight;\n  }, []);\n\n  useEffect(() => {\n    const el = scrollContainerRef.current;\n    if (!el) return;\n    const onScroll = () => {\n      const distFromBottom = el.scrollHeight - el.scrollTop - el.clientHeight;\n      stickToBottom.current = distFromBottom < 80;\n    };\n    el.addEventListener('scroll', onScroll, { passive: true });\n    return () => el.removeEventListener('scroll', onScroll);\n  }, []);\n\n  useEffect(() => {\n    if (stickToBottom.current) scrollToBottom();\n  }, [messages, isProcessing, scrollToBottom]);\n\n  useEffect(() => {\n    const el = scrollContainerRef.current;\n    if (!el) return;\n    const observer = new MutationObserver(() => {\n      if (stickToBottom.current) el.scrollTop = el.scrollHeight;\n    });\n    observer.observe(el, { childList: true, subtree: true, characterData: true });\n    return () => observer.disconnect();\n  }, []);\n\n  const lastUserMsgId = useMemo(() => {\n    for (let i = messages.length - 1; i >= 0; i--) {\n      if (messages[i].role === 'user') return messages[i].id;\n    }\n    return null;\n  }, [messages]);\n\n  // The last assistant message is \"streaming\" when we're processing\n  const lastAssistantId = useMemo(() => {\n    for (let i = messages.length - 1; i >= 0; i--) {\n      if (messages[i].role === 'assistant') return messages[i].id;\n    }\n    return null;\n  }, [messages]);\n\n  return (\n    <Box\n      ref={scrollContainerRef}\n      sx={{\n        flex: 1,\n        overflow: 'auto',\n        px: { xs: 0.5, sm: 1, md: 2 },\n        py: { xs: 2, md: 3 },\n        display: 'flex',\n        flexDirection: 'column',\n      }}\n    >\n      <Stack\n        spacing={3}\n        sx={{\n          maxWidth: 880,\n          mx: 'auto',\n          width: '100%',\n          flex: messages.length === 0 && !isProcessing ? 1 : undefined,\n        }}\n      >\n        {messages.length === 0 && !isProcessing ? (\n          <WelcomeGreeting />\n        ) : (\n          messages.map((msg) => (\n            <MessageBubble\n              key={msg.id}\n              message={msg}\n              isLastTurn={msg.id === lastUserMsgId}\n              onUndoTurn={onUndoLastTurn}\n              onEditAndRegenerate={onEditAndRegenerate}\n              isProcessing={isProcessing}\n              isStreaming={isProcessing && msg.id === lastAssistantId}\n              approveTools={approveTools}\n            />\n          ))\n        )}\n\n        <ActivityStatusBar />\n\n        <div />\n      </Stack>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/ThinkingIndicator.tsx",
    "content": "import { Box, Typography } from '@mui/material';\n\n/** Pulsing dots shown while the agent is processing. */\nexport default function ThinkingIndicator() {\n  return (\n    <Box sx={{ pt: 0.75 }}>\n      <Typography\n        variant=\"caption\"\n        sx={{\n          fontWeight: 700,\n          fontSize: '0.72rem',\n          color: 'var(--muted-text)',\n          textTransform: 'uppercase',\n          letterSpacing: '0.04em',\n          display: 'flex',\n          alignItems: 'center',\n          gap: 0.75,\n        }}\n      >\n        Thinking\n        <Box\n          component=\"span\"\n          sx={{\n            display: 'inline-flex',\n            gap: '3px',\n            '& span': {\n              width: 4,\n              height: 4,\n              borderRadius: '50%',\n              bgcolor: 'primary.main',\n              animation: 'dotPulse 1.4s ease-in-out infinite',\n            },\n            '& span:nth-of-type(2)': { animationDelay: '0.2s' },\n            '& span:nth-of-type(3)': { animationDelay: '0.4s' },\n            '@keyframes dotPulse': {\n              '0%, 80%, 100%': { opacity: 0.25, transform: 'scale(0.8)' },\n              '40%': { opacity: 1, transform: 'scale(1)' },\n            },\n          }}\n        >\n          <span />\n          <span />\n          <span />\n        </Box>\n      </Typography>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/ToolCallGroup.tsx",
    "content": "import { useCallback, useEffect, useMemo, useRef, useState } from 'react';\nimport { Box, Stack, Typography, Chip, Button, TextField, IconButton, Link, CircularProgress } from '@mui/material';\nimport CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline';\nimport ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';\nimport OpenInNewIcon from '@mui/icons-material/OpenInNew';\nimport HourglassEmptyIcon from '@mui/icons-material/HourglassEmpty';\nimport LaunchIcon from '@mui/icons-material/Launch';\nimport SendIcon from '@mui/icons-material/Send';\nimport BlockIcon from '@mui/icons-material/Block';\nimport { useAgentStore, type ResearchAgentState } from '@/store/agentStore';\nimport { useLayoutStore } from '@/store/layoutStore';\nimport { logger } from '@/utils/logger';\nimport { RESEARCH_MAX_STEPS } from '@/lib/research-store';\nimport type { UIMessage } from 'ai';\n\n// ---------------------------------------------------------------------------\n// Type helpers — extract the dynamic-tool part type from UIMessage\n// ---------------------------------------------------------------------------\ntype DynamicToolPart = Extract<UIMessage['parts'][number], { type: 'dynamic-tool' }>;\n\ntype ToolPartState = DynamicToolPart['state'];\n\n/** Check if a tool part was cancelled (output-error with cancellation message). */\nfunction isCancelledTool(tool: DynamicToolPart): boolean {\n  return tool.state === 'output-error' &&\n    typeof (tool as Record<string, unknown>).errorText === 'string' &&\n    ((tool as Record<string, unknown>).errorText as string).includes('Cancelled by user');\n}\n\ninterface ToolCallGroupProps {\n  tools: DynamicToolPart[];\n  approveTools: (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => Promise<boolean>;\n}\n\n// ---------------------------------------------------------------------------\n// Research sub-steps (inline under the research tool row)\n// ---------------------------------------------------------------------------\n\n/** Hook that forces a re-render every second while enabled — used so each\n * research card can compute its own elapsed seconds synchronously from\n * Date.now() without needing its own timer. */\nfunction useSecondTick(enabled: boolean): void {\n  const [, setTick] = useState(0);\n  useEffect(() => {\n    if (!enabled) return;\n    const id = setInterval(() => setTick(t => t + 1), 1000);\n    return () => clearInterval(id);\n  }, [enabled]);\n}\n\n/** Compute elapsed seconds from startedAt (or null). Call under useSecondTick. */\nfunction computeElapsed(startedAt: number | null): number | null {\n  if (startedAt === null) return null;\n  return Math.round((Date.now() - startedAt) / 1000);\n}\n\n/** Format token count like the CLI: \"12.4k\" or \"800\". */\nfunction formatTokens(tokens: number): string {\n  return tokens >= 1000 ? `${(tokens / 1000).toFixed(1)}k` : String(tokens);\n}\n\n/** Format elapsed seconds like the CLI: \"18s\" or \"2m 5s\". */\nfunction formatElapsed(seconds: number): string {\n  if (seconds < 60) return `${seconds}s`;\n  return `${Math.floor(seconds / 60)}m ${seconds % 60}s`;\n}\n\n/** Build the research stats chip label. */\nfunction researchChipLabel(\n  stats: { toolCount: number; tokenCount: number; startedAt: number | null; finalElapsed: number | null },\n  liveElapsed: number | null,\n): string | null {\n  const elapsed = stats.finalElapsed ?? liveElapsed;\n  if (elapsed === null && stats.toolCount === 0) return null;\n  const parts: string[] = [];\n  if (stats.startedAt !== null) parts.push('running');\n  if (stats.toolCount > 0) parts.push(`${stats.toolCount} tools`);\n  if (stats.tokenCount > 0) parts.push(`${formatTokens(stats.tokenCount)} tokens`);\n  if (elapsed !== null) parts.push(formatElapsed(elapsed));\n  return parts.join(' \\u00B7 ');\n}\n\n/** Parse JSON args from a step string like \"tool_name  {json}\" (may be truncated at 80 chars). */\nfunction parseStepArgs(step: string): Record<string, string> {\n  const jsonStart = step.indexOf('{');\n  if (jsonStart < 0) return {};\n  const jsonStr = step.slice(jsonStart);\n  try {\n    const parsed = JSON.parse(jsonStr);\n    const result: Record<string, string> = {};\n    for (const [k, v] of Object.entries(parsed)) {\n      if (typeof v === 'string') result[k] = v;\n    }\n    return result;\n  } catch {\n    // JSON likely truncated — extract key-value pairs via regex\n    const result: Record<string, string> = {};\n    // Match complete \"key\": \"value\" pairs\n    for (const m of jsonStr.matchAll(/\"(\\w+)\":\\s*\"([^\"]*)\"/g)) {\n      result[m[1]] = m[2];\n    }\n    // Match truncated trailing value: \"key\": \"value... (no closing quote)\n    if (Object.keys(result).length === 0 || !result.query) {\n      const trunc = jsonStr.match(/\"(\\w+)\":\\s*\"([^\"]+)$/);\n      if (trunc && !result[trunc[1]]) {\n        result[trunc[1]] = trunc[2];\n      }\n    }\n    return result;\n  }\n}\n\n/** Pretty labels for research sub-agent tool calls */\nfunction formatResearchStep(raw: string): { label: string } {\n  // Backend sends logs like \"▸ tool_name  {args}\" — strip the prefix\n  const step = raw.replace(/^▸\\s*/, '');\n  const args = parseStepArgs(step);\n\n  if (step.startsWith('github_find_examples')) {\n    const detail = (args.keyword) || (args.repo);\n    return { label: detail ? `Finding examples: ${detail}` : 'Finding examples' };\n  }\n  if (step.startsWith('github_read_file')) {\n    const path = (args.path) || '';\n    const filename = path.split('/').pop() || path;\n    return { label: filename ? `Reading ${filename}` : 'Reading file' };\n  }\n  if (step.startsWith('explore_hf_docs')) {\n    const endpoint = (args.endpoint) || (args.query);\n    return { label: endpoint ? `Exploring docs: ${endpoint}` : 'Exploring docs' };\n  }\n  if (step.startsWith('fetch_hf_docs')) {\n    const url = (args.url) || '';\n    const page = url.split('/').pop()?.replace(/\\.md$/, '');\n    return { label: page ? `Reading docs: ${page}` : 'Fetching docs' };\n  }\n  if (step.startsWith('hf_inspect_dataset')) {\n    const dataset = (args.dataset);\n    return { label: dataset ? `Inspecting dataset: ${dataset}` : 'Inspecting dataset' };\n  }\n  if (step.startsWith('hf_papers')) {\n    const op = args.operation as string;\n    const detail = (args.query) || (args.arxiv_id);\n    const opLabels: Record<string, string> = {\n      trending: 'Browsing trending papers',\n      search: 'Searching papers',\n      paper_details: 'Reading paper details',\n      read_paper: 'Reading paper',\n      citation_graph: 'Tracing citations',\n      snippet_search: 'Searching paper snippets',\n      recommend: 'Finding related papers',\n      find_datasets: 'Finding paper datasets',\n      find_models: 'Finding paper models',\n      find_collections: 'Finding paper collections',\n      find_all_resources: 'Finding paper resources',\n    };\n    const base = (op && opLabels[op]) || 'Searching papers';\n    return { label: detail ? `${base}: ${detail}` : base };\n  }\n  if (step.startsWith('find_hf_api')) {\n    const detail = (args.query) || (args.tag);\n    return { label: detail ? `Finding API: ${detail}` : 'Finding API endpoints' };\n  }\n  if (step.startsWith('hf_repo_files')) {\n    const repo = (args.repo_id) || (args.repo);\n    return { label: repo ? `Reading ${repo} files` : 'Reading repo files' };\n  }\n  if (step.startsWith('read')) {\n    const path = (args.path) || '';\n    const filename = path.split('/').pop();\n    return { label: filename ? `Reading ${filename}` : 'Reading file' };\n  }\n  if (step.startsWith('bash')) {\n    const cmd = args.command as string;\n    const short = cmd && cmd.length > 40 ? cmd.slice(0, 40) + '...' : cmd;\n    return { label: short ? `Running: ${short}` : 'Running command' };\n  }\n  return { label: step.replace(/^▸\\s*/, '') };\n}\n\n/** Rolling display of research sub-tool calls for a single agent. */\nfunction ResearchSteps({ steps }: { steps: string[] }) {\n  const visible = steps.slice(-RESEARCH_MAX_STEPS);\n  if (visible.length === 0) return null;\n\n  return (\n    <Box sx={{ pl: 4.5, pr: 1.5, pb: 1, pt: 0.25 }}>\n      {visible.map((step, i) => {\n        const { label } = formatResearchStep(step);\n        const isLast = i === visible.length - 1;\n        return (\n          <Stack\n            key={i}\n            direction=\"row\"\n            alignItems=\"center\"\n            spacing={0.75}\n            sx={{ py: 0.2 }}\n          >\n            {isLast ? (\n              <CircularProgress size={10} thickness={5} sx={{ color: 'var(--accent-yellow)', flexShrink: 0 }} />\n            ) : (\n              <CheckCircleOutlineIcon sx={{ fontSize: 12, color: 'var(--muted-text)', flexShrink: 0 }} />\n            )}\n            <Typography\n              sx={{\n                fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, monospace',\n                fontSize: '0.68rem',\n                color: isLast ? 'var(--text)' : 'var(--muted-text)',\n                overflow: 'hidden',\n                textOverflow: 'ellipsis',\n                whiteSpace: 'nowrap',\n              }}\n            >\n              {label}\n            </Typography>\n          </Stack>\n        );\n      })}\n    </Box>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Hardware pricing ($/hr) — from HF Spaces & Jobs pricing\n// ---------------------------------------------------------------------------\nconst HARDWARE_PRICING: Record<string, string> = {\n  'cpu-basic': 'free',\n  'cpu-upgrade': '$0.03/hr',\n  't4-small': '$0.60/hr',\n  't4-medium': '$1.00/hr',\n  'a10g-small': '$1.05/hr',\n  'a10g-large': '$3.15/hr',\n  'a10g-largex2': '$6.30/hr',\n  'a10g-largex4': '$12.60/hr',\n  'a100-large': '$4.13/hr',\n  'a100x4': '$16.52/hr',\n  'a100x8': '$33.04/hr',\n  'l4x1': '$0.80/hr',\n  'l4x4': '$3.20/hr',\n  'l40sx1': '$1.80/hr',\n  'l40sx4': '$7.20/hr',\n  'l40sx8': '$14.40/hr',\n};\n\nfunction costLabel(hardware: string): string | null {\n  return HARDWARE_PRICING[hardware] || null;\n}\n\n// ---------------------------------------------------------------------------\n// Visual helpers\n// ---------------------------------------------------------------------------\n\nfunction StatusIcon({ state, cancelled, isRejected }: { state: ToolPartState; cancelled?: boolean; isRejected?: boolean }) {\n  if (cancelled || isRejected) {\n    return <BlockIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;\n  }\n  switch (state) {\n    case 'approval-requested':\n      return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;\n    case 'approval-responded':\n      return <CircularProgress size={14} thickness={5} sx={{ color: 'var(--accent-green)' }} />;\n    case 'output-available':\n      return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;\n    case 'output-error':\n      return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;\n    case 'output-denied':\n      return <BlockIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;\n    case 'input-streaming':\n    case 'input-available':\n    default:\n      return <CircularProgress size={14} thickness={5} sx={{ color: 'var(--accent-yellow)' }} />;\n  }\n}\n\nfunction statusLabel(state: ToolPartState): string | null {\n  switch (state) {\n    case 'approval-requested': return 'awaiting approval';\n    case 'approval-responded': return 'approved';\n    case 'input-streaming':\n    case 'input-available': return 'running';\n    case 'output-denied': return 'denied';\n    case 'output-error': return 'error';\n    default: return null;\n  }\n}\n\nfunction statusColor(state: ToolPartState): string {\n  switch (state) {\n    case 'approval-requested': return 'var(--accent-yellow)';\n    case 'approval-responded': return 'var(--accent-green)';\n    case 'output-available': return 'var(--accent-green)';\n    case 'output-error': return 'var(--accent-red)';\n    case 'output-denied': return 'var(--muted-text)';\n    default: return 'var(--accent-yellow)';\n  }\n}\n\n// ---------------------------------------------------------------------------\n// Inline approval UI (per-tool)\n// ---------------------------------------------------------------------------\n\nfunction InlineApproval({\n  toolCallId,\n  toolName,\n  input,\n  scriptLabel,\n  onResolve,\n}: {\n  toolCallId: string;\n  toolName: string;\n  input: unknown;\n  scriptLabel: string;\n  onResolve: (toolCallId: string, approved: boolean, feedback?: string) => void;\n}) {\n  const [feedback, setFeedback] = useState('');\n  const args = input as Record<string, unknown> | undefined;\n  const { setPanel, getEditedScript } = useAgentStore();\n  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();\n  const hasEditedScript = !!getEditedScript(toolCallId);\n\n  const handleScriptClick = useCallback(() => {\n    if (toolName === 'hf_jobs' && args?.script) {\n      const scriptContent = getEditedScript(toolCallId) || String(args.script);\n      setPanel(\n        { title: scriptLabel, script: { content: scriptContent, language: 'python' }, parameters: { tool_call_id: toolCallId } },\n        'script',\n        true,\n      );\n      setRightPanelOpen(true);\n      setLeftSidebarOpen(false);\n    }\n  }, [toolCallId, toolName, args, scriptLabel, setPanel, getEditedScript, setRightPanelOpen, setLeftSidebarOpen]);\n\n  return (\n    <Box sx={{ px: 1.5, py: 1.5, borderTop: '1px solid var(--tool-border)' }}>\n      {toolName === 'sandbox_create' && args && (() => {\n        const hw = String(args.hardware || 'cpu-basic');\n        const cost = costLabel(hw);\n        return (\n          <Box sx={{ mb: 1.5 }}>\n            <Typography variant=\"body2\" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 0.5 }}>\n              Create a remote dev environment on{' '}\n              <Box component=\"span\" sx={{ fontWeight: 500, color: 'var(--text)' }}>\n                {hw}\n              </Box>\n              {cost && (\n                <Box component=\"span\" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>\n                  {' '}({cost})\n                </Box>\n              )}\n              {!!args.private && (\n                <Box component=\"span\" sx={{ color: 'var(--muted-text)' }}>{' (private)'}</Box>\n              )}\n            </Typography>\n            <Typography variant=\"body2\" sx={{ color: 'var(--muted-text)', fontSize: '0.7rem', opacity: 0.7 }}>\n              Creates a temporary HF Space to develop and test scripts before running jobs. Takes 1-2 min to start.\n            </Typography>\n          </Box>\n        );\n      })()}\n\n      {toolName === 'hf_jobs' && args && (() => {\n        const hw = String(args.hardware_flavor || 'cpu-basic');\n        const cost = costLabel(hw);\n        return (\n        <Box sx={{ mb: 1.5 }}>\n          <Typography variant=\"body2\" sx={{ color: 'var(--muted-text)', fontSize: '0.75rem', mb: 1 }}>\n            Execute <Box component=\"span\" sx={{ color: 'var(--accent-yellow)', fontWeight: 500 }}>{scriptLabel.replace('Script', 'Job')}</Box> on{' '}\n            <Box component=\"span\" sx={{ fontWeight: 500, color: 'var(--text)' }}>\n              {hw}\n            </Box>\n            {cost && (\n              <Box component=\"span\" sx={{ color: cost === 'free' ? 'var(--accent-green)' : 'var(--accent-yellow)', fontWeight: 500 }}>\n                {' '}({cost})\n              </Box>\n            )}\n            {!!args.timeout && (\n              <> for up to <Box component=\"span\" sx={{ fontWeight: 500, color: 'var(--text)' }}>\n                {String(args.timeout)}\n              </Box></>\n            )}\n          </Typography>\n          {typeof args.script === 'string' && args.script && (\n            <Box\n              onClick={handleScriptClick}\n              sx={{\n                mt: 0.5,\n                p: 1.5,\n                bgcolor: 'var(--code-panel-bg)',\n                border: '1px solid var(--tool-border)',\n                borderRadius: '8px',\n                cursor: 'pointer',\n                transition: 'border-color 0.15s ease',\n                '&:hover': { borderColor: 'var(--accent-yellow)' },\n              }}\n            >\n              <Box\n                component=\"pre\"\n                sx={{\n                  m: 0,\n                  fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, monospace',\n                  fontSize: '0.7rem',\n                  lineHeight: 1.5,\n                  color: 'var(--text)',\n                  overflow: 'hidden',\n                  display: '-webkit-box',\n                  WebkitLineClamp: 3,\n                  WebkitBoxOrient: 'vertical',\n                  whiteSpace: 'pre-wrap',\n                  wordBreak: 'break-all',\n                }}\n              >\n                {String(args.script).trim()}\n              </Box>\n              <Typography\n                variant=\"caption\"\n                sx={{\n                  display: 'flex',\n                  alignItems: 'center',\n                  gap: 0.5,\n                  mt: 1,\n                  fontSize: '0.65rem',\n                  color: 'var(--muted-text)',\n                  '&:hover': { color: 'var(--accent-yellow)' },\n                }}\n              >\n                Click to view & edit\n              </Typography>\n            </Box>\n          )}\n        </Box>\n        );\n      })()}\n\n      <Box sx={{ display: 'flex', gap: 1, mb: 1 }}>\n        <TextField\n          fullWidth\n          size=\"small\"\n          placeholder=\"Feedback (optional)\"\n          value={feedback}\n          onChange={(e) => setFeedback(e.target.value)}\n          variant=\"outlined\"\n          sx={{\n            '& .MuiOutlinedInput-root': {\n              bgcolor: 'var(--hover-bg)',\n              fontFamily: 'inherit',\n              fontSize: '0.8rem',\n              '& fieldset': { borderColor: 'var(--tool-border)' },\n              '&:hover fieldset': { borderColor: 'var(--border-hover)' },\n              '&.Mui-focused fieldset': { borderColor: 'var(--accent-yellow)' },\n            },\n            '& .MuiOutlinedInput-input': {\n              color: 'var(--text)',\n              '&::placeholder': { color: 'var(--muted-text)', opacity: 0.7 },\n            },\n          }}\n        />\n        <IconButton\n          onClick={() => onResolve(toolCallId, false, feedback || 'Rejected by user')}\n          disabled={!feedback}\n          size=\"small\"\n          sx={{\n            color: 'var(--accent-red)',\n            border: '1px solid var(--tool-border)',\n            borderRadius: '6px',\n            '&:hover': { bgcolor: 'rgba(224,90,79,0.1)', borderColor: 'var(--accent-red)' },\n            '&.Mui-disabled': { color: 'var(--muted-text)', opacity: 0.3 },\n          }}\n        >\n          <SendIcon sx={{ fontSize: 14 }} />\n        </IconButton>\n      </Box>\n\n      <Box sx={{ display: 'flex', gap: 1 }}>\n        <Button\n          size=\"small\"\n          onClick={() => onResolve(toolCallId, false, feedback || 'Rejected by user')}\n          sx={{\n            flex: 1,\n            textTransform: 'none',\n            border: '1px solid rgba(255,255,255,0.05)',\n            color: 'var(--accent-red)',\n            fontSize: '0.75rem',\n            py: 0.75,\n            borderRadius: '8px',\n            '&:hover': { bgcolor: 'rgba(224,90,79,0.05)', borderColor: 'var(--accent-red)' },\n          }}\n        >\n          Reject\n        </Button>\n        <Button\n          size=\"small\"\n          onClick={() => onResolve(toolCallId, true)}\n          sx={{\n            flex: 1,\n            textTransform: 'none',\n            border: hasEditedScript ? '1px solid var(--accent-green)' : '1px solid rgba(255,255,255,0.05)',\n            color: 'var(--accent-green)',\n            fontSize: '0.75rem',\n            py: 0.75,\n            borderRadius: '8px',\n            bgcolor: hasEditedScript ? 'rgba(47,204,113,0.08)' : 'transparent',\n            '&:hover': { bgcolor: 'rgba(47,204,113,0.05)', borderColor: 'var(--accent-green)' },\n          }}\n        >\n          {hasEditedScript ? 'Approve (edited)' : 'Approve'}\n        </Button>\n      </Box>\n    </Box>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// Main component\n// ---------------------------------------------------------------------------\n\nconst EMPTY_AGENTS: Record<string, ResearchAgentState> = {};\n\nexport default function ToolCallGroup({ tools, approveTools }: ToolCallGroupProps) {\n  const { setPanel, lockPanel, getJobUrl, getEditedScript, setJobStatus, getJobStatus, setToolError, getToolError, setToolRejected, getToolRejected } = useAgentStore();\n  const researchAgents = useAgentStore(s => {\n    const activeId = s.activeSessionId;\n    return (activeId && s.sessionStates[activeId]?.researchAgents) || EMPTY_AGENTS;\n  });\n  // Tick once per second while any research agent is running so each card's\n  // elapsed seconds update in real time.\n  const anyResearchRunning = useMemo(\n    () => Object.values(researchAgents).some(a => a.stats.startedAt !== null),\n    [researchAgents],\n  );\n  useSecondTick(anyResearchRunning);\n\n  const isProcessing = useAgentStore(s => s.isProcessing);\n  const { setRightPanelOpen, setLeftSidebarOpen } = useLayoutStore();\n\n  // ── Batch approval state ──────────────────────────────────────────\n  const pendingTools = useMemo(\n    () => tools.filter(t => t.state === 'approval-requested'),\n    [tools],\n  );\n\n  const [decisions, setDecisions] = useState<Record<string, { approved: boolean; feedback?: string }>>({});\n  const [isSubmitting, setIsSubmitting] = useState(false);\n  const submittingRef = useRef(false);\n\n  // Track which toolCallIds we've already submitted so we can detect new approval rounds\n  const submittedIdsRef = useRef<Set<string>>(new Set());\n\n  // ── Panel lock state (for auto-follow vs user-selected) ───────────\n  const [lockedToolId, setLockedToolId] = useState<string | null>(null);\n\n  // Reset submission state when new (unseen) pending tools arrive — e.g. second approval round\n  useEffect(() => {\n    if (!isSubmitting || pendingTools.length === 0) return;\n    const hasNewPending = pendingTools.some(t => !submittedIdsRef.current.has(t.toolCallId));\n    if (hasNewPending) {\n      submittingRef.current = false;\n      setIsSubmitting(false);\n      setDecisions({});\n    }\n  }, [pendingTools, isSubmitting]);\n\n  // Clean up stale decisions for tools that are no longer pending\n  useEffect(() => {\n    const pendingIds = new Set(pendingTools.map(t => t.toolCallId));\n    const decisionIds = Object.keys(decisions);\n    const hasStale = decisionIds.some(id => !pendingIds.has(id));\n    if (hasStale) {\n      setDecisions(prev => {\n        const cleaned = { ...prev };\n        for (const id of decisionIds) {\n          if (!pendingIds.has(id)) delete cleaned[id];\n        }\n        return cleaned;\n      });\n    }\n  }, [pendingTools, decisions]);\n\n  // Persist error states when tools error\n  useEffect(() => {\n    for (const tool of tools) {\n      const currentlyHasError = tool.state === 'output-error';\n      const persistedError = getToolError(tool.toolCallId);\n\n      // Persist error state if we detect it and haven't already\n      if (currentlyHasError && !persistedError) {\n        setToolError(tool.toolCallId, true);\n      }\n    }\n  }, [tools, setToolError, getToolError]);\n\n  const { scriptLabelMap, toolDisplayMap } = useMemo(() => {\n    const hfJobs = tools.filter(t => t.toolName === 'hf_jobs' && (t.input as Record<string, unknown>)?.script);\n    const scriptMap: Record<string, string> = {};\n    const displayMap: Record<string, string> = {};\n    for (let i = 0; i < hfJobs.length; i++) {\n      const id = hfJobs[i].toolCallId;\n      if (hfJobs.length > 1) {\n        scriptMap[id] = `Script ${i + 1}`;\n        displayMap[id] = `hf_jobs #${i + 1}`;\n      } else {\n        scriptMap[id] = 'Script';\n        displayMap[id] = 'hf_jobs';\n      }\n    }\n    // Pretty name for research tool\n    for (const t of tools) {\n      if (t.toolName === 'research') {\n        displayMap[t.toolCallId] = 'research';\n      }\n    }\n    return { scriptLabelMap: scriptMap, toolDisplayMap: displayMap };\n  }, [tools]);\n\n  // ── Send all decisions as a single batch ──────────────────────────\n  const sendBatch = useCallback(\n    async (batch: Record<string, { approved: boolean; feedback?: string }>) => {\n      if (submittingRef.current) return;\n      submittingRef.current = true;\n      setIsSubmitting(true);\n\n      const approvals = Object.entries(batch).map(([toolCallId, d]) => {\n        const editedScript = d.approved ? (getEditedScript(toolCallId) ?? null) : null;\n        if (editedScript) {\n          logger.log(`Sending edited script for ${toolCallId} (${editedScript.length} chars)`);\n        }\n        // Mark tool as rejected if not approved\n        if (!d.approved) {\n          setToolRejected(toolCallId, true);\n        }\n        return {\n          tool_call_id: toolCallId,\n          approved: d.approved,\n          feedback: d.approved ? null : (d.feedback || 'Rejected by user'),\n          edited_script: editedScript,\n        };\n      });\n\n      const ok = await approveTools(approvals);\n      if (ok) {\n        // Track which tool IDs were submitted so we can detect new approval rounds\n        for (const a of approvals) submittedIdsRef.current.add(a.tool_call_id);\n        lockPanel();\n      } else {\n        logger.error('Batch approval failed');\n        submittingRef.current = false;\n        setIsSubmitting(false);\n      }\n    },\n    [approveTools, lockPanel, getEditedScript, setToolRejected],\n  );\n\n  const handleApproveAll = useCallback(() => {\n    const batch: Record<string, { approved: boolean }> = {};\n    for (const t of pendingTools) batch[t.toolCallId] = { approved: true };\n    sendBatch(batch);\n  }, [pendingTools, sendBatch]);\n\n  const handleRejectAll = useCallback(() => {\n    const batch: Record<string, { approved: boolean }> = {};\n    for (const t of pendingTools) batch[t.toolCallId] = { approved: false };\n    sendBatch(batch);\n  }, [pendingTools, sendBatch]);\n\n  const handleIndividualDecision = useCallback(\n    (toolCallId: string, approved: boolean, feedback?: string) => {\n      setDecisions(prev => {\n        const next = { ...prev, [toolCallId]: { approved, feedback } };\n        if (pendingTools.every(t => next[t.toolCallId])) {\n          queueMicrotask(() => sendBatch(next));\n        }\n        return next;\n      });\n    },\n    [pendingTools, sendBatch],\n  );\n\n  const undoDecision = useCallback((toolCallId: string) => {\n    setDecisions(prev => {\n      const next = { ...prev };\n      delete next[toolCallId];\n      return next;\n    });\n  }, []);\n\n  // ── Show tool panel (shared logic) ────────────────────────────────\n  const showToolPanel = useCallback(\n    (tool: DynamicToolPart) => {\n      const args = tool.input as Record<string, unknown> | undefined;\n      const displayName = toolDisplayMap[tool.toolCallId] || tool.toolName;\n\n      if (tool.toolName === 'hf_jobs' && args?.script) {\n        const jobOutput = tool.output ?? (tool.state === 'output-error' ? (tool as Record<string, unknown>).errorText : undefined);\n        const hasOutput = (tool.state === 'output-available' || tool.state === 'output-error') && jobOutput;\n        const scriptContent = getEditedScript(tool.toolCallId) || String(args.script);\n        setPanel(\n          {\n            title: displayName,\n            script: { content: scriptContent, language: 'python' },\n            ...(hasOutput ? { output: { content: String(jobOutput), language: 'markdown' } } : {}),\n            parameters: { tool_call_id: tool.toolCallId },\n          },\n          hasOutput ? 'output' : 'script',\n        );\n        setRightPanelOpen(true);\n        setLeftSidebarOpen(false);\n        return;\n      }\n\n      const inputSection = args ? { content: JSON.stringify(args, null, 2), language: 'json' } : undefined;\n\n      const outputText = tool.output ?? (tool.state === 'output-error' ? (tool as Record<string, unknown>).errorText : undefined);\n\n      const hasCompleted = tool.state === 'output-available' || tool.state === 'output-error' || tool.state === 'output-denied';\n\n      if (outputText) {\n        // Tool has output - show it (regardless of state)\n        let language = 'text';\n        const content = String(outputText);\n        if (content.trim().startsWith('{') || content.trim().startsWith('[')) language = 'json';\n        else if (content.includes('```')) language = 'markdown';\n\n        setPanel({ title: displayName, output: { content, language }, input: inputSection }, 'output');\n        setRightPanelOpen(true);\n      } else if (tool.state === 'output-error') {\n        const content = `Tool \\`${tool.toolName}\\` returned an error with no output message.`;\n        setPanel({ title: displayName, output: { content, language: 'markdown' }, input: inputSection }, 'output');\n        setRightPanelOpen(true);\n      } else if (hasCompleted && args) {\n        // Tool completed but has no output - show input as fallback\n        setPanel({ title: displayName, output: { content: JSON.stringify(args, null, 2), language: 'json' }, input: inputSection }, 'output');\n        setRightPanelOpen(true);\n      } else if (args) {\n        const runningMessages = [\n          'Crunching numbers and herding tensors...',\n          'Teaching the model some new tricks...',\n          'Consulting the GPU oracle...',\n          'Wrangling data into submission...',\n          'Brewing a fresh batch of predictions...',\n          'Negotiating with the transformer heads...',\n          'Polishing the attention weights...',\n          'Aligning the embedding stars...',\n        ];\n        const funMsg = runningMessages[Math.floor(Math.random() * runningMessages.length)];\n        setPanel({ title: displayName, output: { content: funMsg, language: 'text' }, input: inputSection }, 'output');\n        setRightPanelOpen(true);\n      }\n    },\n    [toolDisplayMap, setPanel, getEditedScript, setRightPanelOpen, setLeftSidebarOpen],\n  );\n\n  // ── Panel click handler ───────────────────────────────────────────\n  const handleClick = useCallback(\n    (tool: DynamicToolPart) => {\n      // Toggle lock: if clicking the same tool that's already locked, unlock it\n      if (lockedToolId === tool.toolCallId) {\n        setLockedToolId(null);\n        return;\n      }\n\n      // Lock this tool\n      setLockedToolId(tool.toolCallId);\n\n      // Show the panel\n      showToolPanel(tool);\n    },\n    [lockedToolId, showToolPanel],\n  );\n\n  // ── Auto-follow currently active tool when not locked ─────────────\n  const activeToolIdRef = useRef<string | null>(null);\n\n  useEffect(() => {\n    if (lockedToolId !== null) return; // User has locked a tool, don't auto-follow\n\n    // Find the currently running tool (latest tool that's in progress)\n    const runningTool = tools.slice().reverse().find(t =>\n      t.state === 'input-available' ||\n      t.state === 'input-streaming' ||\n      t.state === 'approval-responded'\n    );\n\n    if (runningTool) {\n      // Track this as the active tool and show its panel\n      activeToolIdRef.current = runningTool.toolCallId;\n      showToolPanel(runningTool);\n    } else if (activeToolIdRef.current) {\n      // No running tool, but we were following one - check if it completed\n      const completedTool = tools.find(t => t.toolCallId === activeToolIdRef.current);\n      if (completedTool && (completedTool.state === 'output-available' || completedTool.state === 'output-error')) {\n        // The tool we were following has completed - update its panel\n        showToolPanel(completedTool);\n      }\n    }\n  }, [tools, lockedToolId, showToolPanel]);\n\n  // ── Parse hf_jobs metadata from output ────────────────────────────\n  function parseJobMeta(output: unknown): { jobUrl?: string; jobStatus?: string } {\n    if (typeof output !== 'string') return {};\n    const urlMatch = output.match(/\\*\\*View at:\\*\\*\\s*(https:\\/\\/[^\\s\\n]+)/);\n    const statusMatch = output.match(/\\*\\*Final Status:\\*\\*\\s*([^\\n]+)/);\n    return {\n      jobUrl: urlMatch?.[1],\n      jobStatus: statusMatch?.[1]?.trim(),\n    };\n  }\n\n  // ── Render ────────────────────────────────────────────────────────\n  const decidedCount = pendingTools.filter(t => decisions[t.toolCallId]).length;\n\n  return (\n    <Box\n      sx={{\n        borderRadius: 2,\n        border: '1px solid var(--tool-border)',\n        bgcolor: 'var(--tool-bg)',\n        overflow: 'hidden',\n        my: 1,\n      }}\n    >\n      {/* Batch approval header — hidden once user starts deciding individually */}\n      {pendingTools.length > 1 && !isSubmitting && decidedCount === 0 && (\n        <Box\n          sx={{\n            display: 'flex',\n            alignItems: 'center',\n            gap: 1,\n            px: 1.5,\n            py: 1,\n            borderBottom: '1px solid var(--tool-border)',\n          }}\n        >\n          <Typography\n            variant=\"body2\"\n            sx={{ fontSize: '0.72rem', color: 'var(--muted-text)', mr: 'auto', whiteSpace: 'nowrap' }}\n          >\n            {`${pendingTools.length} tool${pendingTools.length > 1 ? 's' : ''} pending`}\n          </Typography>\n          <Button\n            size=\"small\"\n            onClick={handleRejectAll}\n            sx={{\n              textTransform: 'none',\n              color: 'var(--accent-red)',\n              border: '1px solid rgba(255,255,255,0.05)',\n              fontSize: '0.72rem',\n              py: 0.5,\n              px: 1.5,\n              borderRadius: '8px',\n              '&:hover': { bgcolor: 'rgba(224,90,79,0.05)', borderColor: 'var(--accent-red)' },\n            }}\n          >\n            Reject all\n          </Button>\n          <Button\n            size=\"small\"\n            onClick={handleApproveAll}\n            sx={{\n              textTransform: 'none',\n              color: 'var(--accent-green)',\n              border: '1px solid var(--accent-green)',\n              fontSize: '0.72rem',\n              fontWeight: 600,\n              py: 0.5,\n              px: 1.5,\n              borderRadius: '8px',\n              '&:hover': { bgcolor: 'rgba(47,204,113,0.1)' },\n            }}\n          >\n            Approve all{pendingTools.length > 1 ? ` (${pendingTools.length})` : ''}\n          </Button>\n        </Box>\n      )}\n\n      {/* Tool list */}\n      <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>\n        {tools.map((tool) => {\n          const state = tool.state;\n          const isPending = state === 'approval-requested';\n          const clickable =\n            state === 'output-available' ||\n            state === 'output-error' ||\n            !!tool.input ||\n            (!isProcessing && (state === 'input-available' || state === 'input-streaming'));\n          const localDecision = decisions[tool.toolCallId];\n\n          const cancelled = isCancelledTool(tool);\n          const currentlyHasError = state === 'output-error';\n          const persistedError = getToolError(tool.toolCallId);\n          const persistedRejection = getToolRejected(tool.toolCallId);\n\n          // Stale in-progress tools after page reload: treat as completed\n          const stale = !isProcessing && (state === 'input-available' || state === 'input-streaming');\n          const displayState = stale ? 'output-available'\n            : isPending && localDecision\n              ? (localDecision.approved ? 'input-available' : 'output-denied')\n              : state;\n          const isRejected = displayState === 'output-denied' || persistedRejection;\n          const hasError = (persistedError || currentlyHasError) && !isRejected;\n          const label = cancelled ? 'cancelled'\n            : isRejected ? 'rejected'\n            : hasError ? 'error'\n            : statusLabel(displayState as ToolPartState);\n\n          // Parse job metadata from hf_jobs output and store\n          const jobUrlFromStore = tool.toolName === 'hf_jobs' ? getJobUrl(tool.toolCallId) : undefined;\n          const jobStatusFromStore = tool.toolName === 'hf_jobs' ? getJobStatus(tool.toolCallId) : undefined;\n\n          const jobMetaFromOutput = tool.toolName === 'hf_jobs' && (tool.output || (tool as Record<string, unknown>).errorText)\n            ? parseJobMeta(tool.output ?? (tool as Record<string, unknown>).errorText)\n            : {};\n\n          // Store job status if we just parsed it and don't have it stored yet\n          if (tool.toolName === 'hf_jobs' && jobMetaFromOutput.jobStatus && !jobStatusFromStore) {\n            setJobStatus(tool.toolCallId, jobMetaFromOutput.jobStatus);\n          }\n\n          // Combine job URL and status from store (persisted) with output metadata (freshly parsed)\n          // Prefer stored values to ensure they persist across renders\n          const jobMeta = {\n            jobUrl: jobUrlFromStore || jobMetaFromOutput.jobUrl,\n            jobStatus: jobStatusFromStore || jobMetaFromOutput.jobStatus,\n          };\n\n          return (\n            <Box key={tool.toolCallId}>\n              {/* Main tool row */}\n              <Stack\n                direction=\"row\"\n                alignItems=\"center\"\n                spacing={1}\n                onClick={() => !isPending && handleClick(tool)}\n                sx={{\n                  px: 1.5,\n                  py: 1,\n                  cursor: isPending ? 'default' : clickable ? 'pointer' : 'default',\n                  transition: 'background-color 0.15s',\n                  bgcolor: lockedToolId === tool.toolCallId ? 'var(--hover-bg)' : 'transparent',\n                  borderLeft: lockedToolId === tool.toolCallId ? '3px solid var(--accent-yellow)' : '3px solid transparent',\n                  '&:hover': clickable && !isPending ? { bgcolor: 'var(--hover-bg)' } : {},\n                }}\n              >\n                <StatusIcon\n                  cancelled={cancelled}\n                  isRejected={isRejected}\n                  state={\n                    hasError\n                      ? 'output-error'\n                      : ((tool.toolName === 'hf_jobs' && jobMeta.jobStatus && ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus))\n                        ? 'output-error'\n                        : displayState as ToolPartState)\n                  }\n                />\n\n                <Typography\n                  variant=\"body2\"\n                  sx={{\n                    fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, monospace',\n                    fontWeight: 600,\n                    fontSize: '0.78rem',\n                    color: 'var(--text)',\n                    flex: 1,\n                    minWidth: 0,\n                    overflow: 'hidden',\n                    textOverflow: 'ellipsis',\n                    whiteSpace: 'nowrap',\n                  }}\n                >\n                  {toolDisplayMap[tool.toolCallId] || tool.toolName}\n                </Typography>\n\n                {/* Status chip (non hf_jobs, or hf_jobs without final status) */}\n                {(() => {\n                  // Research tool: override chip label with this card's agent stats\n                  const agentState: ResearchAgentState | undefined = tool.toolName === 'research'\n                    ? researchAgents[tool.toolCallId]\n                    : undefined;\n                  const researchDone = cancelled || state === 'output-available' || state === 'output-error' || state === 'output-denied';\n                  const liveElapsed = agentState ? computeElapsed(agentState.stats.startedAt) : null;\n                  const researchLabel = tool.toolName === 'research' && agentState\n                    ? (researchDone && agentState.stats.finalElapsed !== null\n                        ? researchChipLabel({ ...agentState.stats, startedAt: null }, null)\n                        : researchChipLabel(agentState.stats, liveElapsed))\n                    : null;\n                  const chipLabel = researchLabel || label;\n                  if (!chipLabel || (tool.toolName === 'hf_jobs' && jobMeta.jobStatus)) return null;\n\n                  return (\n                    <Chip\n                      label={chipLabel}\n                      size=\"small\"\n                      sx={{\n                        height: 20,\n                        fontSize: '0.65rem',\n                        fontWeight: 600,\n                        bgcolor: (cancelled || isRejected) ? 'rgba(255,255,255,0.05)'\n                          : hasError ? 'rgba(224,90,79,0.12)'\n                          : (researchLabel && displayState === 'output-available') ? 'rgba(47,204,113,0.12)'\n                          : 'var(--accent-yellow-weak)',\n                        color: (cancelled || isRejected) ? 'var(--muted-text)'\n                          : hasError ? 'var(--accent-red)'\n                          : statusColor(displayState as ToolPartState),\n                        letterSpacing: '0.03em',\n                      }}\n                    />\n                  );\n                })()}\n\n                {/* HF Jobs: final status chip from job metadata */}\n                {tool.toolName === 'hf_jobs' && jobMeta.jobStatus && (\n                  <Chip\n                    label={jobMeta.jobStatus}\n                    size=\"small\"\n                    sx={{\n                      height: 20,\n                      fontSize: '0.65rem',\n                      fontWeight: 600,\n                      bgcolor: jobMeta.jobStatus === 'COMPLETED'\n                        ? 'rgba(47,204,113,0.12)'\n                        : ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus!)\n                          ? 'rgba(224,90,79,0.12)'\n                          : 'rgba(255,193,59,0.12)',\n                      color: jobMeta.jobStatus === 'COMPLETED'\n                        ? 'var(--accent-green)'\n                        : ['ERROR', 'FAILED', 'CANCELLED'].includes(jobMeta.jobStatus!)\n                          ? 'var(--accent-red)'\n                          : 'var(--accent-yellow)',\n                      letterSpacing: '0.03em',\n                    }}\n                  />\n                )}\n\n                {/* View on HF link — single place, shown whenever URL is available */}\n                {tool.toolName === 'hf_jobs' && jobMeta.jobUrl && (\n                  <Link\n                    href={jobMeta.jobUrl}\n                    target=\"_blank\"\n                    rel=\"noopener noreferrer\"\n                    onClick={(e) => e.stopPropagation()}\n                    sx={{\n                      display: 'inline-flex',\n                      alignItems: 'center',\n                      gap: 0.5,\n                      color: 'var(--accent-yellow)',\n                      fontSize: '0.68rem',\n                      textDecoration: 'none',\n                      ml: 0.5,\n                      '&:hover': { textDecoration: 'underline' },\n                    }}\n                  >\n                    <LaunchIcon sx={{ fontSize: 12 }} />\n                    View on HF\n                  </Link>\n                )}\n\n                {clickable && !isPending && (\n                  <OpenInNewIcon sx={{ fontSize: 14, color: 'var(--muted-text)', opacity: 0.6 }} />\n                )}\n              </Stack>\n\n              {/* Research sub-agent rolling steps (visible only while running) */}\n              {tool.toolName === 'research' && !cancelled && state !== 'output-available' && state !== 'output-error' && state !== 'output-denied' && researchAgents[tool.toolCallId] && (\n                <ResearchSteps steps={researchAgents[tool.toolCallId].steps} />\n              )}\n\n              {/* Per-tool approval: undecided */}\n              {isPending && !localDecision && !isSubmitting && (\n                <InlineApproval\n                  toolCallId={tool.toolCallId}\n                  toolName={tool.toolName}\n                  input={tool.input}\n                  scriptLabel={scriptLabelMap[tool.toolCallId] || 'Script'}\n                  onResolve={handleIndividualDecision}\n                />\n              )}\n\n              {/* Per-tool approval: locally decided (undo available) */}\n              {isPending && localDecision && !isSubmitting && (\n                <Box\n                  sx={{\n                    display: 'flex',\n                    alignItems: 'center',\n                    justifyContent: 'space-between',\n                    px: 1.5,\n                    py: 0.75,\n                    borderTop: '1px solid var(--tool-border)',\n                  }}\n                >\n                  <Typography variant=\"body2\" sx={{ fontSize: '0.72rem', color: 'var(--muted-text)' }}>\n                    {localDecision.approved\n                      ? 'Marked for approval'\n                      : `Marked for rejection${localDecision.feedback ? `: ${localDecision.feedback}` : ''}`}\n                  </Typography>\n                  <Button\n                    size=\"small\"\n                    onClick={() => undoDecision(tool.toolCallId)}\n                    sx={{\n                      textTransform: 'none',\n                      fontSize: '0.7rem',\n                      color: 'var(--muted-text)',\n                      minWidth: 'auto',\n                      px: 1,\n                      '&:hover': { color: 'var(--text)' },\n                    }}\n                  >\n                    Undo\n                  </Button>\n                </Box>\n              )}\n            </Box>\n          );\n        })}\n      </Stack>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Chat/UserMessage.tsx",
    "content": "import { useState, useRef, useEffect } from 'react';\nimport { Box, Stack, Typography, IconButton, Tooltip, TextField } from '@mui/material';\nimport CloseIcon from '@mui/icons-material/Close';\nimport EditIcon from '@mui/icons-material/Edit';\nimport CheckIcon from '@mui/icons-material/Check';\nimport type { UIMessage } from 'ai';\nimport type { MessageMeta } from '@/types/agent';\n\ninterface UserMessageProps {\n  message: UIMessage;\n  isLastTurn?: boolean;\n  onUndoTurn?: () => void;\n  onEditAndRegenerate?: (messageId: string, newText: string) => void | Promise<void>;\n  isProcessing?: boolean;\n}\n\nfunction extractText(message: UIMessage): string {\n  return message.parts\n    .filter((p): p is Extract<typeof p, { type: 'text' }> => p.type === 'text')\n    .map(p => p.text)\n    .join('');\n}\n\nexport default function UserMessage({\n  message,\n  isLastTurn = false,\n  onUndoTurn,\n  onEditAndRegenerate,\n  isProcessing = false,\n}: UserMessageProps) {\n  const showUndo = isLastTurn && !isProcessing && !!onUndoTurn;\n  const showEdit = !isProcessing && !!onEditAndRegenerate;\n  const text = extractText(message);\n  const meta = message.metadata as MessageMeta | undefined;\n  const timeStr = meta?.createdAt\n    ? new Date(meta.createdAt).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })\n    : null;\n\n  const [isEditing, setIsEditing] = useState(false);\n  const [editText, setEditText] = useState(text);\n  const inputRef = useRef<HTMLTextAreaElement>(null);\n\n  useEffect(() => {\n    if (isEditing && inputRef.current) {\n      inputRef.current.focus();\n      inputRef.current.selectionStart = inputRef.current.value.length;\n    }\n  }, [isEditing]);\n\n  const handleStartEdit = () => {\n    setEditText(text);\n    setIsEditing(true);\n  };\n\n  const handleConfirmEdit = () => {\n    const trimmed = editText.trim();\n    if (!trimmed || trimmed === text) {\n      setIsEditing(false);\n      return;\n    }\n    setIsEditing(false);\n    onEditAndRegenerate?.(message.id, trimmed);\n  };\n\n  const handleCancelEdit = () => {\n    setIsEditing(false);\n    setEditText(text);\n  };\n\n  const handleKeyDown = (e: React.KeyboardEvent) => {\n    if (e.key === 'Enter' && !e.shiftKey) {\n      e.preventDefault();\n      handleConfirmEdit();\n    } else if (e.key === 'Escape') {\n      handleCancelEdit();\n    }\n  };\n\n  return (\n    <Stack\n      direction=\"row\"\n      spacing={1.5}\n      justifyContent=\"flex-end\"\n      alignItems=\"flex-start\"\n      sx={{\n        '& .action-btn': {\n          opacity: 0,\n          transition: 'opacity 0.15s ease',\n        },\n        '&:hover .action-btn': {\n          opacity: 1,\n        },\n      }}\n    >\n      {!isEditing && (showUndo || showEdit) && (\n        <Stack className=\"action-btn\" direction=\"row\" spacing={0.25} sx={{ mt: 0.75 }}>\n          {showEdit && (\n            <Tooltip title=\"Edit & regenerate\" placement=\"left\">\n              <IconButton\n                onClick={handleStartEdit}\n                size=\"small\"\n                sx={{\n                  width: 24,\n                  height: 24,\n                  color: 'var(--muted-text)',\n                  '&:hover': {\n                    color: 'var(--accent-yellow)',\n                    bgcolor: 'rgba(255,157,0,0.08)',\n                  },\n                }}\n              >\n                <EditIcon sx={{ fontSize: 14 }} />\n              </IconButton>\n            </Tooltip>\n          )}\n          {showUndo && (\n            <Tooltip title=\"Remove this turn\" placement=\"left\">\n              <IconButton\n                onClick={onUndoTurn}\n                size=\"small\"\n                sx={{\n                  width: 24,\n                  height: 24,\n                  color: 'var(--muted-text)',\n                  '&:hover': {\n                    color: 'var(--accent-red)',\n                    bgcolor: 'rgba(244,67,54,0.08)',\n                  },\n                }}\n              >\n                <CloseIcon sx={{ fontSize: 14 }} />\n              </IconButton>\n            </Tooltip>\n          )}\n        </Stack>\n      )}\n\n      <Box\n        sx={{\n          maxWidth: { xs: '88%', md: '72%' },\n          bgcolor: 'var(--surface)',\n          borderRadius: 1.5,\n          borderTopRightRadius: 4,\n          px: { xs: 1.5, md: 2.5 },\n          py: 1.5,\n          border: '1px solid var(--border)',\n        }}\n      >\n        {isEditing ? (\n          <Stack spacing={1}>\n            <TextField\n              inputRef={inputRef}\n              multiline\n              fullWidth\n              value={editText}\n              onChange={(e) => setEditText(e.target.value)}\n              onKeyDown={handleKeyDown}\n              variant=\"outlined\"\n              size=\"small\"\n              sx={{\n                '& .MuiOutlinedInput-root': {\n                  fontFamily: 'inherit',\n                  fontSize: '0.925rem',\n                  lineHeight: 1.65,\n                  color: 'var(--text)',\n                  '& fieldset': { borderColor: 'var(--accent-yellow)', borderWidth: 1.5 },\n                  '&:hover fieldset': { borderColor: 'var(--accent-yellow)' },\n                  '&.Mui-focused fieldset': { borderColor: 'var(--accent-yellow)' },\n                },\n              }}\n            />\n            <Stack direction=\"row\" spacing={0.5} justifyContent=\"flex-end\">\n              <Tooltip title=\"Cancel (Esc)\">\n                <IconButton\n                  onClick={handleCancelEdit}\n                  size=\"small\"\n                  sx={{ color: 'var(--muted-text)', '&:hover': { color: 'var(--accent-red)' } }}\n                >\n                  <CloseIcon sx={{ fontSize: 16 }} />\n                </IconButton>\n              </Tooltip>\n              <Tooltip title=\"Confirm (Enter)\">\n                <IconButton\n                  onClick={handleConfirmEdit}\n                  size=\"small\"\n                  sx={{ color: 'var(--accent-green)', '&:hover': { bgcolor: 'rgba(47,204,113,0.1)' } }}\n                >\n                  <CheckIcon sx={{ fontSize: 16 }} />\n                </IconButton>\n              </Tooltip>\n            </Stack>\n          </Stack>\n        ) : (\n          <Typography\n            variant=\"body1\"\n            sx={{\n              fontSize: '0.925rem',\n              lineHeight: 1.65,\n              color: 'var(--text)',\n              whiteSpace: 'pre-wrap',\n              wordBreak: 'break-word',\n            }}\n          >\n            {text}\n          </Typography>\n        )}\n\n        {timeStr && !isEditing && (\n          <Typography\n            variant=\"caption\"\n            sx={{ color: 'var(--muted-text)', mt: 0.5, display: 'block', textAlign: 'right', fontSize: '0.7rem' }}\n          >\n            {timeStr}\n          </Typography>\n        )}\n      </Box>\n    </Stack>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/ClaudeCapDialog.tsx",
    "content": "import {\n  Box,\n  Button,\n  Dialog,\n  DialogActions,\n  DialogContent,\n  DialogContentText,\n  DialogTitle,\n  Typography,\n} from '@mui/material';\nimport type { PlanTier } from '@/hooks/useUserQuota';\n\nconst HF_PRICING_URL = 'https://huggingface.co/pricing';\nconst PRO_CAP = 20;\n\ninterface ClaudeCapDialogProps {\n  open: boolean;\n  plan: PlanTier;\n  cap: number;\n  onClose: () => void;\n  onUseFreeModel: () => void;\n}\n\nexport default function ClaudeCapDialog({\n  open,\n  plan,\n  cap,\n  onClose,\n  onUseFreeModel,\n}: ClaudeCapDialogProps) {\n  // plan not surfaced in copy right now — Pro users see the same dialog and\n  // can upgrade their org if they're also capped.\n  void plan;\n\n  return (\n    <Dialog\n      open={open}\n      onClose={onClose}\n      slotProps={{\n        backdrop: { sx: { backgroundColor: 'rgba(0,0,0,0.5)', backdropFilter: 'blur(4px)' } },\n      }}\n      PaperProps={{\n        sx: {\n          bgcolor: 'var(--panel)',\n          border: '1px solid var(--border)',\n          borderRadius: 'var(--radius-md)',\n          boxShadow: 'var(--shadow-1)',\n          maxWidth: 460,\n          mx: 2,\n        },\n      }}\n    >\n      <DialogTitle\n        sx={{ color: 'var(--text)', fontWeight: 700, fontSize: '1rem', pt: 2.5, pb: 0, px: 3 }}\n      >\n        You've hit your Opus limit\n      </DialogTitle>\n      <DialogContent sx={{ px: 3, pt: 1.25, pb: 0 }}>\n        <DialogContentText\n          sx={{ color: 'var(--muted-text)', fontSize: '0.85rem', lineHeight: 1.6 }}\n        >\n          Opus costs an arm and a leg, so we unfortunately have to cap you at {cap}{' '}\n          {cap === 1 ? 'session' : 'sessions'} a day. Give Kimi, MiniMax, or GLM a spin —\n          they are genuinely good and we use them all the time.\n        </DialogContentText>\n        <Box\n          sx={{\n            mt: 2,\n            p: 1.5,\n            borderRadius: '8px',\n            bgcolor: 'var(--accent-yellow-weak)',\n            border: '1px solid var(--border)',\n          }}\n        >\n          <Typography\n            variant=\"caption\"\n            sx={{\n              display: 'block',\n              fontWeight: 700,\n              color: 'var(--text)',\n              fontSize: '0.78rem',\n              mb: 0.5,\n              letterSpacing: '0.02em',\n            }}\n          >\n            HF Pro ($9/mo) — more Opus, more everything\n          </Typography>\n          <Typography\n            variant=\"caption\"\n            sx={{ display: 'block', color: 'var(--muted-text)', fontSize: '0.78rem', lineHeight: 1.55 }}\n          >\n            {PRO_CAP} Opus sessions/day here, 20× HF Inference credits, ZeroGPU access,\n            and priority on Spaces hardware.\n          </Typography>\n        </Box>\n      </DialogContent>\n      <DialogActions sx={{ px: 3, pb: 2.5, pt: 2, gap: 1 }}>\n        <Button\n          component=\"a\"\n          href={HF_PRICING_URL}\n          target=\"_blank\"\n          rel=\"noopener noreferrer\"\n          variant=\"contained\"\n          size=\"small\"\n          sx={{\n            fontSize: '0.82rem',\n            px: 2.5,\n            bgcolor: 'var(--accent-yellow)',\n            color: '#000',\n            textTransform: 'none',\n            fontWeight: 700,\n            boxShadow: 'none',\n            '&:hover': { bgcolor: '#FFB340', boxShadow: 'none' },\n          }}\n        >\n          Upgrade to Pro\n        </Button>\n        <Button\n          onClick={onUseFreeModel}\n          size=\"small\"\n          sx={{\n            color: 'var(--muted-text)',\n            fontSize: '0.82rem',\n            px: 2,\n            textTransform: 'none',\n            '&:hover': { bgcolor: 'var(--hover-bg)' },\n          }}\n        >\n          Use a free model\n        </Button>\n      </DialogActions>\n    </Dialog>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/CodePanel/CodePanel.tsx",
    "content": "import { useRef, useEffect, useMemo, useState, useCallback } from 'react';\nimport { Box, Stack, Typography, IconButton, Button, Tooltip } from '@mui/material';\nimport CloseIcon from '@mui/icons-material/Close';\nimport RadioButtonUncheckedIcon from '@mui/icons-material/RadioButtonUnchecked';\nimport CheckCircleIcon from '@mui/icons-material/CheckCircle';\nimport PlayCircleOutlineIcon from '@mui/icons-material/PlayCircleOutline';\nimport CodeIcon from '@mui/icons-material/Code';\nimport ArticleIcon from '@mui/icons-material/Article';\nimport EditIcon from '@mui/icons-material/Edit';\nimport UndoIcon from '@mui/icons-material/Undo';\nimport ContentCopyIcon from '@mui/icons-material/ContentCopy';\nimport CheckIcon from '@mui/icons-material/Check';\nimport { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';\nimport { vscDarkPlus, vs } from 'react-syntax-highlighter/dist/esm/styles/prism';\nimport ReactMarkdown from 'react-markdown';\nimport remarkGfm from 'remark-gfm';\nimport { useAgentStore } from '@/store/agentStore';\nimport { useLayoutStore } from '@/store/layoutStore';\nimport { processLogs } from '@/utils/logProcessor';\nimport type { PanelView } from '@/store/agentStore';\n\n// ── Helpers ──────────────────────────────────────────────────────\n\nfunction PlanStatusIcon({ status }: { status: string }) {\n  if (status === 'completed') return <CheckCircleIcon sx={{ fontSize: 16, color: 'var(--accent-green)' }} />;\n  if (status === 'in_progress') return <PlayCircleOutlineIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;\n  return <RadioButtonUncheckedIcon sx={{ fontSize: 16, color: 'var(--muted-text)', opacity: 0.5 }} />;\n}\n\n// ── Markdown styles (adapts via CSS vars) ────────────────────────\nconst markdownSx = {\n  color: 'var(--text)',\n  fontSize: '13px',\n  lineHeight: 1.6,\n  '& p': { m: 0, mb: 1.5, '&:last-child': { mb: 0 } },\n  '& pre': {\n    bgcolor: 'var(--code-bg)',\n    p: 1.5,\n    borderRadius: 1,\n    overflow: 'auto',\n    fontSize: '12px',\n    border: '1px solid var(--tool-border)',\n  },\n  '& code': {\n    bgcolor: 'var(--hover-bg)',\n    px: 0.5,\n    py: 0.25,\n    borderRadius: 0.5,\n    fontSize: '12px',\n    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n  },\n  '& pre code': { bgcolor: 'transparent', p: 0 },\n  '& a': {\n    color: 'var(--accent-yellow)',\n    textDecoration: 'none',\n    '&:hover': { textDecoration: 'underline' },\n  },\n  '& ul, & ol': { pl: 2.5, my: 1 },\n  '& li': { mb: 0.5 },\n  '& table': {\n    borderCollapse: 'collapse',\n    width: '100%',\n    my: 2,\n    fontSize: '12px',\n    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n  },\n  '& th': {\n    borderBottom: '2px solid var(--border-hover)',\n    textAlign: 'left',\n    p: 1,\n    fontWeight: 600,\n  },\n  '& td': {\n    borderBottom: '1px solid var(--tool-border)',\n    p: 1,\n  },\n  '& h1, & h2, & h3, & h4': { mt: 2, mb: 1, fontWeight: 600 },\n  '& h1': { fontSize: '1.25rem' },\n  '& h2': { fontSize: '1.1rem' },\n  '& h3': { fontSize: '1rem' },\n  '& blockquote': {\n    borderLeft: '3px solid var(--accent-yellow)',\n    pl: 2,\n    ml: 0,\n    color: 'var(--muted-text)',\n  },\n} as const;\n\n// ── View toggle button ──────────────────────────────────────────\n\nfunction ViewToggle({ view, icon, label, isActive, onClick }: {\n  view: PanelView;\n  icon: React.ReactNode;\n  label: string;\n  isActive: boolean;\n  onClick: (v: PanelView) => void;\n}) {\n  return (\n    <Box\n      onClick={() => onClick(view)}\n      sx={{\n        display: 'flex',\n        alignItems: 'center',\n        gap: 0.5,\n        px: 1.5,\n        py: 0.75,\n        borderRadius: 1,\n        cursor: 'pointer',\n        fontSize: '0.7rem',\n        fontWeight: 600,\n        textTransform: 'uppercase',\n        letterSpacing: '0.05em',\n        whiteSpace: 'nowrap',\n        color: isActive ? 'var(--text)' : 'var(--muted-text)',\n        bgcolor: isActive ? 'var(--tab-active-bg)' : 'transparent',\n        border: '1px solid',\n        borderColor: isActive ? 'var(--tab-active-border)' : 'transparent',\n        transition: 'all 0.15s ease',\n        '&:hover': { bgcolor: 'var(--tab-hover-bg)' },\n      }}\n    >\n      {icon}\n      <span>{label}</span>\n    </Box>\n  );\n}\n\n// ── Component ────────────────────────────────────────────────────\n\nexport default function CodePanel() {\n  const { panelData, panelView, panelEditable, setPanelView, updatePanelScript, setEditedScript, plan } =\n    useAgentStore();\n  const { setRightPanelOpen, themeMode } = useLayoutStore();\n  const scrollRef = useRef<HTMLDivElement>(null);\n  const textareaRef = useRef<HTMLTextAreaElement>(null);\n  const [isEditing, setIsEditing] = useState(false);\n  const [editedContent, setEditedContent] = useState('');\n  const [originalContent, setOriginalContent] = useState('');\n  const [copied, setCopied] = useState(false);\n  const [showInput, setShowInput] = useState(false);\n\n  const isDark = themeMode === 'dark';\n  const syntaxTheme = isDark ? vscDarkPlus : vs;\n\n  const activeSection = panelView === 'script' ? panelData?.script : panelData?.output;\n  const hasScript = !!panelData?.script;\n  const hasOutput = !!panelData?.output;\n  const hasBothViews = hasScript && hasOutput;\n\n  const isEditableScript = panelView === 'script' && panelEditable;\n  const hasUnsavedChanges = isEditing && editedContent !== originalContent;\n\n  // Reset input toggle when panel data changes\n  useEffect(() => {\n    setShowInput(false);\n  }, [panelData]);\n\n  // Sync edited content when panel data changes\n  useEffect(() => {\n    if (panelData?.script?.content && panelView === 'script' && panelEditable) {\n      setOriginalContent(panelData.script.content);\n      if (!isEditing) {\n        setEditedContent(panelData.script.content);\n      }\n    }\n  }, [panelData?.script?.content, panelView, panelEditable, isEditing]);\n\n  // Exit editing when switching away from script view or losing editable\n  useEffect(() => {\n    if (!isEditableScript && isEditing) {\n      setIsEditing(false);\n    }\n  }, [isEditableScript, isEditing]);\n\n  const handleStartEdit = useCallback(() => {\n    if (panelData?.script?.content) {\n      setEditedContent(panelData.script.content);\n      setOriginalContent(panelData.script.content);\n      setIsEditing(true);\n      setTimeout(() => textareaRef.current?.focus(), 0);\n    }\n  }, [panelData?.script?.content]);\n\n  const handleCancelEdit = useCallback(() => {\n    setEditedContent(originalContent);\n    setIsEditing(false);\n  }, [originalContent]);\n\n  const handleSaveEdit = useCallback(() => {\n    if (editedContent !== originalContent) {\n      updatePanelScript(editedContent);\n      const toolCallId = panelData?.parameters?.tool_call_id as string | undefined;\n      if (toolCallId) {\n        setEditedScript(toolCallId, editedContent);\n      }\n      setOriginalContent(editedContent);\n    }\n    setIsEditing(false);\n  }, [panelData?.parameters?.tool_call_id, editedContent, originalContent, updatePanelScript, setEditedScript]);\n\n  const handleCopy = useCallback(async () => {\n    const contentToCopy = isEditing ? editedContent : (activeSection?.content || '');\n    if (contentToCopy) {\n      try {\n        await navigator.clipboard.writeText(contentToCopy);\n        setCopied(true);\n        setTimeout(() => setCopied(false), 2000);\n      } catch (err) {\n        console.error('Failed to copy:', err);\n      }\n    }\n  }, [isEditing, editedContent, activeSection?.content]);\n\n  const visibleSection = (showInput && panelData?.input) ? panelData.input : activeSection;\n\n  const displayContent = useMemo(() => {\n    if (!visibleSection?.content) return '';\n    if (!visibleSection.language || visibleSection.language === 'text') {\n      return processLogs(visibleSection.content);\n    }\n    return visibleSection.content;\n  }, [visibleSection?.content, visibleSection?.language]);\n\n  // Auto-scroll only for live log streaming, not when opening panel\n  const hasAutoScrolled = useRef(false);\n  useEffect(() => {\n    hasAutoScrolled.current = false;\n  }, [panelData]);\n  useEffect(() => {\n    if (scrollRef.current && panelView === 'output' && hasAutoScrolled.current) {\n      scrollRef.current.scrollTop = scrollRef.current.scrollHeight;\n    }\n    hasAutoScrolled.current = true;\n  }, [displayContent, panelView]);\n\n  // ── Syntax-highlighted code block (DRY) ────────────────────────\n  const renderSyntaxBlock = (language: string) => (\n    <SyntaxHighlighter\n      language={language}\n      style={syntaxTheme}\n      customStyle={{\n        margin: 0,\n        padding: 0,\n        background: 'transparent',\n        fontSize: '13px',\n        fontFamily: 'inherit',\n      }}\n      wrapLines\n      wrapLongLines\n    >\n      {displayContent}\n    </SyntaxHighlighter>\n  );\n\n  // ── Content renderer ───────────────────────────────────────────\n  const renderContent = () => {\n    if (!visibleSection?.content) {\n      return (\n        <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%', opacity: 0.5 }}>\n          <Typography variant=\"caption\">NO CONTENT TO DISPLAY</Typography>\n        </Box>\n      );\n    }\n\n    if (!showInput && isEditing && isEditableScript) {\n      return (\n        <Box sx={{ position: 'relative', width: '100%', height: '100%' }}>\n          <SyntaxHighlighter\n            language={activeSection?.language === 'python' ? 'python' : activeSection?.language === 'json' ? 'json' : 'text'}\n            style={syntaxTheme}\n            customStyle={{\n              margin: 0,\n              padding: 0,\n              background: 'transparent',\n              fontSize: '13px',\n              fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n              lineHeight: 1.55,\n              pointerEvents: 'none',\n            }}\n            wrapLines\n            wrapLongLines\n          >\n            {editedContent || ' '}\n          </SyntaxHighlighter>\n          <textarea\n            ref={textareaRef}\n            value={editedContent}\n            onChange={(e) => setEditedContent(e.target.value)}\n            spellCheck={false}\n            style={{\n              position: 'absolute',\n              top: 0,\n              left: 0,\n              width: '100%',\n              height: '100%',\n              background: 'transparent',\n              border: 'none',\n              outline: 'none',\n              resize: 'none',\n              color: 'transparent',\n              caretColor: 'var(--text)',\n              fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n              fontSize: '13px',\n              lineHeight: 1.55,\n              overflow: 'hidden',\n            }}\n          />\n        </Box>\n      );\n    }\n\n    const lang = visibleSection.language;\n    if (lang === 'python') return renderSyntaxBlock('python');\n    if (lang === 'json') return renderSyntaxBlock('json');\n\n    if (lang === 'markdown') {\n      return (\n        <Box sx={markdownSx}>\n          <ReactMarkdown remarkPlugins={[remarkGfm]}>{displayContent}</ReactMarkdown>\n        </Box>\n      );\n    }\n\n    return (\n      <Box\n        component=\"pre\"\n        sx={{ m: 0, fontFamily: 'inherit', color: 'var(--text)', whiteSpace: 'pre-wrap', wordBreak: 'break-all' }}\n      >\n        <code>{displayContent}</code>\n      </Box>\n    );\n  };\n\n  return (\n    <Box sx={{ height: '100%', display: 'flex', flexDirection: 'column', bgcolor: 'var(--panel)' }}>\n      {/* ── Header ─────────────────────────────────────────────── */}\n      <Box\n        sx={{\n          height: 60,\n          display: 'flex',\n          alignItems: 'center',\n          justifyContent: 'space-between',\n          px: 2,\n          borderBottom: '1px solid var(--border)',\n          flexShrink: 0,\n        }}\n      >\n        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, flex: 1, minWidth: 0 }}>\n          {panelData ? (\n            <>\n              <Typography\n                variant=\"caption\"\n                sx={{\n                  fontWeight: 600,\n                  color: 'var(--muted-text)',\n                  textTransform: 'uppercase',\n                  letterSpacing: '0.05em',\n                  fontSize: '0.7rem',\n                  flexShrink: 0,\n                }}\n              >\n                {panelData.title}\n              </Typography>\n              {hasBothViews && (\n                <Box sx={{ display: 'flex', gap: 0.5, ml: 1 }}>\n                  <ViewToggle\n                    view=\"script\"\n                    icon={<CodeIcon sx={{ fontSize: 14 }} />}\n                    label=\"Script\"\n                    isActive={panelView === 'script'}\n                    onClick={setPanelView}\n                  />\n                  <ViewToggle\n                    view=\"output\"\n                    icon={<ArticleIcon sx={{ fontSize: 14 }} />}\n                    label=\"Result\"\n                    isActive={panelView === 'output'}\n                    onClick={setPanelView}\n                  />\n                </Box>\n              )}\n            </>\n          ) : (\n            <Typography\n              variant=\"caption\"\n              sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}\n            >\n              Code Panel\n            </Typography>\n          )}\n        </Box>\n\n        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>\n          {activeSection?.content && (\n            <Tooltip title={copied ? 'Copied!' : 'Copy'} placement=\"top\">\n              <IconButton\n                size=\"small\"\n                onClick={handleCopy}\n                sx={{\n                  color: copied ? 'var(--accent-green)' : 'var(--muted-text)',\n                  '&:hover': { color: 'var(--accent-yellow)', bgcolor: 'var(--hover-bg)' },\n                }}\n              >\n                {copied ? <CheckIcon sx={{ fontSize: 18 }} /> : <ContentCopyIcon sx={{ fontSize: 18 }} />}\n              </IconButton>\n            </Tooltip>\n          )}\n          {isEditableScript && !isEditing && (\n            <Button\n              size=\"small\"\n              startIcon={<EditIcon sx={{ fontSize: 14 }} />}\n              onClick={handleStartEdit}\n              sx={{\n                textTransform: 'none',\n                color: 'var(--muted-text)',\n                fontSize: '0.75rem',\n                py: 0.5,\n                '&:hover': { color: 'var(--accent-yellow)', bgcolor: 'var(--hover-bg)' },\n              }}\n            >\n              Edit\n            </Button>\n          )}\n          {isEditing && (\n            <>\n              <Button\n                size=\"small\"\n                startIcon={<UndoIcon sx={{ fontSize: 14 }} />}\n                onClick={handleCancelEdit}\n                sx={{\n                  textTransform: 'none',\n                  color: 'var(--muted-text)',\n                  fontSize: '0.75rem',\n                  py: 0.5,\n                  '&:hover': { color: 'var(--accent-red)', bgcolor: 'var(--hover-bg)' },\n                }}\n              >\n                Cancel\n              </Button>\n              <Button\n                size=\"small\"\n                variant=\"contained\"\n                onClick={handleSaveEdit}\n                disabled={!hasUnsavedChanges}\n                sx={{\n                  textTransform: 'none',\n                  fontSize: '0.75rem',\n                  py: 0.5,\n                  bgcolor: hasUnsavedChanges ? 'var(--accent-yellow)' : 'var(--hover-bg)',\n                  color: hasUnsavedChanges ? '#000' : 'var(--muted-text)',\n                  '&:hover': {\n                    bgcolor: hasUnsavedChanges ? 'var(--accent-yellow)' : 'var(--hover-bg)',\n                    opacity: 0.9,\n                  },\n                  '&.Mui-disabled': {\n                    bgcolor: 'var(--hover-bg)',\n                    color: 'var(--muted-text)',\n                    opacity: 0.5,\n                  },\n                }}\n              >\n                Save\n              </Button>\n            </>\n          )}\n          <IconButton size=\"small\" onClick={() => setRightPanelOpen(false)} sx={{ color: 'var(--muted-text)' }}>\n            <CloseIcon fontSize=\"small\" />\n          </IconButton>\n        </Box>\n      </Box>\n\n      {/* ── Main content area ─────────────────────────────────── */}\n      <Box sx={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}>\n        {!panelData ? (\n          <Box sx={{ flex: 1, display: 'flex', alignItems: 'center', justifyContent: 'center', p: 4 }}>\n            <Typography variant=\"body2\" color=\"text.secondary\" sx={{ opacity: 0.5 }}>\n              NO DATA LOADED\n            </Typography>\n          </Box>\n        ) : (\n          <Box sx={{ flex: 1, overflow: 'hidden', p: 2 }}>\n            <Box\n              ref={scrollRef}\n              className=\"code-panel\"\n              sx={{\n                bgcolor: 'var(--code-panel-bg)',\n                borderRadius: 'var(--radius-md)',\n                p: '18px',\n                border: '1px solid var(--border)',\n                fontFamily: '\"JetBrains Mono\", ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n                fontSize: '13px',\n                lineHeight: 1.55,\n                height: '100%',\n                overflow: 'auto',\n              }}\n            >\n              {/* Input / Output toggle */}\n              {panelData?.input && panelView === 'output' && (\n                <Box sx={{ display: 'flex', gap: 0.5, mb: 1.5 }}>\n                  {['input', 'output'].map((tab) => (\n                    <Typography\n                      key={tab}\n                      onClick={() => setShowInput(tab === 'input')}\n                      variant=\"caption\"\n                      sx={{\n                        fontSize: '0.65rem',\n                        fontWeight: 600,\n                        textTransform: 'uppercase',\n                        letterSpacing: '0.05em',\n                        cursor: 'pointer',\n                        px: 1,\n                        py: 0.25,\n                        borderRadius: 0.5,\n                        color: (tab === 'input') === showInput ? 'var(--text)' : 'var(--muted-text)',\n                        bgcolor: (tab === 'input') === showInput ? 'var(--hover-bg)' : 'transparent',\n                        transition: 'all 0.12s ease',\n                        '&:hover': { color: 'var(--text)' },\n                      }}\n                    >\n                      {tab}\n                    </Typography>\n                  ))}\n                </Box>\n              )}\n              {renderContent()}\n            </Box>\n          </Box>\n        )}\n      </Box>\n\n      {/* ── Plan display (bottom) ─────────────────────────────── */}\n      {plan && plan.length > 0 && (\n        <Box\n          sx={{\n            borderTop: '1px solid var(--border)',\n            bgcolor: 'var(--plan-bg)',\n            maxHeight: '30%',\n            display: 'flex',\n            flexDirection: 'column',\n          }}\n        >\n          <Box\n            sx={{\n              p: 1.5,\n              borderBottom: '1px solid var(--border)',\n              display: 'flex',\n              alignItems: 'center',\n              gap: 1,\n            }}\n          >\n            <Typography\n              variant=\"caption\"\n              sx={{ fontWeight: 600, color: 'var(--muted-text)', textTransform: 'uppercase', letterSpacing: '0.05em' }}\n            >\n              CURRENT PLAN\n            </Typography>\n          </Box>\n\n          <Stack spacing={1} sx={{ p: 2, overflow: 'auto' }}>\n            {plan.map((item) => (\n              <Stack key={item.id} direction=\"row\" alignItems=\"flex-start\" spacing={1.5}>\n                <Box sx={{ mt: 0.2 }}>\n                  <PlanStatusIcon status={item.status} />\n                </Box>\n                <Typography\n                  variant=\"body2\"\n                  sx={{\n                    fontSize: '13px',\n                    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, monospace',\n                    color: item.status === 'completed' ? 'var(--muted-text)' : 'var(--text)',\n                    textDecoration: item.status === 'completed' ? 'line-through' : 'none',\n                    opacity: item.status === 'pending' ? 0.7 : 1,\n                  }}\n                >\n                  {item.content}\n                </Typography>\n              </Stack>\n            ))}\n          </Stack>\n        </Box>\n      )}\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/Layout/AppLayout.tsx",
    "content": "import { useCallback, useRef, useEffect, useState } from 'react';\nimport {\n  Avatar,\n  Box,\n  Drawer,\n  Typography,\n  IconButton,\n  Alert,\n  AlertTitle,\n  Snackbar,\n  useMediaQuery,\n  useTheme,\n} from '@mui/material';\nimport MenuIcon from '@mui/icons-material/Menu';\nimport ChevronLeftIcon from '@mui/icons-material/ChevronLeft';\nimport DragIndicatorIcon from '@mui/icons-material/DragIndicator';\nimport DarkModeOutlinedIcon from '@mui/icons-material/DarkModeOutlined';\nimport LightModeOutlinedIcon from '@mui/icons-material/LightModeOutlined';\n\nimport { useSessionStore } from '@/store/sessionStore';\nimport { useAgentStore } from '@/store/agentStore';\nimport { useLayoutStore } from '@/store/layoutStore';\nimport SessionSidebar from '@/components/SessionSidebar/SessionSidebar';\nimport SessionChat from '@/components/SessionChat';\nimport CodePanel from '@/components/CodePanel/CodePanel';\nimport WelcomeScreen from '@/components/WelcomeScreen/WelcomeScreen';\nimport { apiFetch } from '@/utils/api';\n\nconst DRAWER_WIDTH = 260;\n\nexport default function AppLayout() {\n  const { sessions, activeSessionId, markExpired } = useSessionStore();\n  const { isConnected, llmHealthError, setLlmHealthError, user } = useAgentStore();\n  const {\n    isLeftSidebarOpen,\n    isRightPanelOpen,\n    rightPanelWidth,\n    themeMode,\n    setRightPanelWidth,\n    setLeftSidebarOpen,\n    toggleLeftSidebar,\n    toggleTheme,\n  } = useLayoutStore();\n\n  const theme = useTheme();\n  const isMobile = useMediaQuery(theme.breakpoints.down('md'));\n\n  const [showExpiredToast, setShowExpiredToast] = useState(false);\n  const disconnectTimer = useRef<ReturnType<typeof setTimeout> | null>(null);\n\n  const isResizing = useRef(false);\n\n  const handleMouseMove = useCallback((e: MouseEvent) => {\n    if (!isResizing.current) return;\n    const newWidth = window.innerWidth - e.clientX;\n    const maxWidth = window.innerWidth * 0.6;\n    const minWidth = 300;\n    if (newWidth > minWidth && newWidth < maxWidth) {\n      setRightPanelWidth(newWidth);\n    }\n  }, [setRightPanelWidth]);\n\n  const stopResizing = useCallback(() => {\n    isResizing.current = false;\n    document.removeEventListener('mousemove', handleMouseMove);\n    document.removeEventListener('mouseup', stopResizing);\n    document.body.style.cursor = 'default';\n  }, [handleMouseMove]);\n\n  const startResizing = useCallback((e: React.MouseEvent) => {\n    e.preventDefault();\n    isResizing.current = true;\n    document.addEventListener('mousemove', handleMouseMove);\n    document.addEventListener('mouseup', stopResizing);\n    document.body.style.cursor = 'col-resize';\n  }, [handleMouseMove, stopResizing]);\n\n  useEffect(() => {\n    return () => {\n      document.removeEventListener('mousemove', handleMouseMove);\n      document.removeEventListener('mouseup', stopResizing);\n    };\n  }, [handleMouseMove, stopResizing]);\n\n  // -- LLM health check on mount -----------------------------------------\n  useEffect(() => {\n    let cancelled = false;\n    (async () => {\n      try {\n        const res = await apiFetch('/api/health/llm');\n        const data = await res.json();\n        if (!cancelled && data.status === 'error') {\n          setLlmHealthError({\n            error: data.error || 'Unknown LLM error',\n            errorType: data.error_type || 'unknown',\n            model: data.model,\n          });\n        } else if (!cancelled) {\n          setLlmHealthError(null);\n        }\n      } catch {\n        // Backend unreachable -- not an LLM issue, ignore\n      }\n    })();\n    return () => { cancelled = true; };\n  }, []); // eslint-disable-line react-hooks/exhaustive-deps\n\n  const hasAnySessions = sessions.length > 0;\n\n  // Debounced \"session expired\" toast\n  useEffect(() => {\n    if (!isConnected && activeSessionId) {\n      disconnectTimer.current = setTimeout(() => setShowExpiredToast(true), 2000);\n    } else {\n      if (disconnectTimer.current) clearTimeout(disconnectTimer.current);\n      disconnectTimer.current = null;\n      setShowExpiredToast(false);\n    }\n    return () => {\n      if (disconnectTimer.current) clearTimeout(disconnectTimer.current);\n    };\n  }, [isConnected, activeSessionId]);\n\n  const handleSessionDead = useCallback(\n    (deadSessionId: string) => {\n      // Backend lost this session — mark it expired so the chat shows a\n      // recovery banner instead of either silently failing or eagerly\n      // creating a new backend session (which would pay a summary-call\n      // cost for sessions the user may never revisit).\n      markExpired(deadSessionId);\n    },\n    [markExpired],\n  );\n\n  // Close sidebar on mobile after selecting a session\n  const handleSidebarClose = useCallback(() => {\n    if (isMobile) setLeftSidebarOpen(false);\n  }, [isMobile, setLeftSidebarOpen]);\n\n  // -- LLM error toast helper --------------------------------------------\n  const llmErrorTitle = llmHealthError\n    ? llmHealthError.errorType === 'credits'\n      ? 'API Credits Exhausted'\n      : llmHealthError.errorType === 'auth'\n      ? 'Invalid API Key'\n      : llmHealthError.errorType === 'rate_limit'\n      ? 'Rate Limited'\n      : llmHealthError.errorType === 'network'\n      ? 'LLM Provider Unreachable'\n      : 'LLM Error'\n    : '';\n\n  // -- Welcome screen: no sessions at all ---------------------------------\n  if (!hasAnySessions) {\n    return (\n      <Box sx={{ width: '100%', height: '100%', display: 'flex', flexDirection: 'column' }}>\n        <WelcomeScreen />\n      </Box>\n    );\n  }\n\n  // -- Sidebar drawer -----------------------------------------------------\n  const sidebarDrawer = (\n    <Drawer\n      variant={isMobile ? 'temporary' : 'persistent'}\n      anchor=\"left\"\n      open={isLeftSidebarOpen}\n      onClose={() => setLeftSidebarOpen(false)}\n      ModalProps={{ keepMounted: true }}\n      sx={{\n        '& .MuiDrawer-paper': {\n          boxSizing: 'border-box',\n          width: DRAWER_WIDTH,\n          borderRight: '1px solid',\n          borderColor: 'divider',\n          top: 0,\n          height: '100%',\n          bgcolor: 'var(--panel)',\n        },\n      }}\n    >\n      <SessionSidebar onClose={handleSidebarClose} />\n    </Drawer>\n  );\n\n  // -- Main chat interface ------------------------------------------------\n  return (\n    <Box sx={{ display: 'flex', width: '100%', height: '100%' }}>\n      {/* -- Left Sidebar ------------------------------------------------- */}\n      {isMobile ? (\n        sidebarDrawer\n      ) : (\n        <Box\n          component=\"nav\"\n          sx={{\n            width: isLeftSidebarOpen ? DRAWER_WIDTH : 0,\n            flexShrink: 0,\n            transition: isResizing.current ? 'none' : 'width 0.2s',\n            overflow: 'hidden',\n          }}\n        >\n          {sidebarDrawer}\n        </Box>\n      )}\n\n      {/* -- Main Content (header + chat + code panel) -------------------- */}\n      <Box\n        sx={{\n          flexGrow: 1,\n          height: '100%',\n          display: 'flex',\n          flexDirection: 'column',\n          transition: isResizing.current ? 'none' : 'width 0.2s',\n          overflow: 'hidden',\n          minWidth: 0,\n        }}\n      >\n        {/* -- Top Header Bar --------------------------------------------- */}\n        <Box sx={{\n          height: { xs: 52, md: 60 },\n          px: { xs: 1, md: 2 },\n          display: 'flex',\n          alignItems: 'center',\n          borderBottom: 1,\n          borderColor: 'divider',\n          bgcolor: 'background.default',\n          zIndex: 1200,\n          flexShrink: 0,\n        }}>\n          <IconButton onClick={toggleLeftSidebar} size=\"small\">\n            {isLeftSidebarOpen && !isMobile ? <ChevronLeftIcon /> : <MenuIcon />}\n          </IconButton>\n\n          <Box sx={{ flex: 1, display: 'flex', justifyContent: 'center', alignItems: 'center', gap: 0.75 }}>\n            <Box\n              component=\"img\"\n              src=\"/smolagents.webp\"\n              alt=\"smolagents\"\n              sx={{ width: { xs: 20, md: 22 }, height: { xs: 20, md: 22 } }}\n            />\n            <Typography\n              variant=\"subtitle1\"\n              sx={{\n                fontWeight: 700,\n                color: 'var(--text)',\n                letterSpacing: '-0.01em',\n                fontSize: { xs: '0.88rem', md: '0.95rem' },\n              }}\n            >\n              ML Intern\n            </Typography>\n          </Box>\n\n          <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>\n            <IconButton\n              onClick={toggleTheme}\n              size=\"small\"\n              sx={{\n                color: 'text.secondary',\n                '&:hover': { color: 'primary.main' },\n              }}\n            >\n              {themeMode === 'dark' ? <LightModeOutlinedIcon fontSize=\"small\" /> : <DarkModeOutlinedIcon fontSize=\"small\" />}\n            </IconButton>\n\n            {user?.picture ? (\n              <Avatar\n                src={user.picture}\n                alt={user.username || 'User'}\n                sx={{ width: 28, height: 28, ml: 0.5 }}\n              />\n            ) : user?.username ? (\n              <Avatar\n                sx={{\n                  width: 28,\n                  height: 28,\n                  ml: 0.5,\n                  bgcolor: 'primary.main',\n                  fontSize: '0.75rem',\n                  fontWeight: 700,\n                }}\n              >\n                {user.username[0].toUpperCase()}\n              </Avatar>\n            ) : null}\n          </Box>\n        </Box>\n\n        {/* -- Chat + Code Panel ------------------------------------------ */}\n        <Box\n          sx={{\n            flexGrow: 1,\n            display: 'flex',\n            overflow: 'hidden',\n          }}\n        >\n          {/* Chat area */}\n          <Box\n            component=\"main\"\n            className=\"chat-pane\"\n            sx={{\n              flexGrow: 1,\n              display: 'flex',\n              flexDirection: 'column',\n              overflow: 'hidden',\n              background: 'var(--body-gradient)',\n              p: { xs: 1.5, sm: 2, md: 3 },\n              minWidth: 0,\n            }}\n          >\n            {activeSessionId ? (\n              // Render ALL sessions — each owns its own useAgentChat.\n              // Only the active one renders visible UI (others return null).\n              sessions.map((s) => (\n                <SessionChat\n                  key={s.id}\n                  sessionId={s.id}\n                  isActive={s.id === activeSessionId}\n                  onSessionDead={handleSessionDead}\n                />\n              ))\n            ) : (\n              <Box\n                sx={{\n                  flex: 1,\n                  display: 'flex',\n                  alignItems: 'center',\n                  justifyContent: 'center',\n                  flexDirection: 'column',\n                  gap: 2,\n                  px: 2,\n                }}\n              >\n                <Typography variant=\"h5\" color=\"text.secondary\" sx={{ fontFamily: 'monospace', fontSize: { xs: '1rem', md: '1.5rem' } }}>\n                  NO SESSION SELECTED\n                </Typography>\n                <Typography variant=\"body2\" color=\"text.secondary\" sx={{ fontFamily: 'monospace', fontSize: { xs: '0.75rem', md: '0.875rem' } }}>\n                  Initialize a session via the sidebar\n                </Typography>\n              </Box>\n            )}\n          </Box>\n\n          {/* Code panel -- inline on desktop, overlay drawer on mobile */}\n          {isRightPanelOpen && !isMobile && (\n            <>\n              <Box\n                onMouseDown={startResizing}\n                sx={{\n                  width: '4px',\n                  cursor: 'col-resize',\n                  bgcolor: 'divider',\n                  display: 'flex',\n                  alignItems: 'center',\n                  justifyContent: 'center',\n                  transition: 'background-color 0.2s',\n                  flexShrink: 0,\n                  '&:hover': { bgcolor: 'primary.main' },\n                }}\n              >\n                <DragIndicatorIcon\n                  sx={{ fontSize: '0.8rem', color: 'text.secondary', pointerEvents: 'none' }}\n                />\n              </Box>\n              <Box\n                sx={{\n                  width: rightPanelWidth,\n                  flexShrink: 0,\n                  height: '100%',\n                  overflow: 'hidden',\n                  borderLeft: '1px solid',\n                  borderColor: 'divider',\n                  bgcolor: 'var(--panel)',\n                }}\n              >\n                <CodePanel />\n              </Box>\n            </>\n          )}\n        </Box>\n      </Box>\n\n      {/* Code panel -- drawer overlay on mobile */}\n      {isMobile && (\n        <Drawer\n          anchor=\"bottom\"\n          open={isRightPanelOpen}\n          onClose={() => useLayoutStore.getState().setRightPanelOpen(false)}\n          sx={{\n            '& .MuiDrawer-paper': {\n              height: '75vh',\n              borderTopLeftRadius: 16,\n              borderTopRightRadius: 16,\n              bgcolor: 'var(--panel)',\n            },\n          }}\n        >\n          <CodePanel />\n        </Drawer>\n      )}\n      <Snackbar\n        open={showExpiredToast}\n        anchorOrigin={{ vertical: 'bottom', horizontal: 'center' }}\n        onClose={() => setShowExpiredToast(false)}\n      >\n        <Alert\n          severity=\"warning\"\n          variant=\"filled\"\n          onClose={() => setShowExpiredToast(false)}\n          sx={{ fontFamily: 'monospace', fontSize: '0.8rem' }}\n        >\n          Task expired — create a new task to continue.\n        </Alert>\n      </Snackbar>\n      <Snackbar\n        open={!!llmHealthError}\n        anchorOrigin={{ vertical: 'top', horizontal: 'center' }}\n        onClose={() => setLlmHealthError(null)}\n      >\n        <Alert\n          severity=\"error\"\n          variant=\"filled\"\n          onClose={() => setLlmHealthError(null)}\n          sx={{ fontSize: '0.8rem', maxWidth: 480 }}\n        >\n          <AlertTitle sx={{ fontWeight: 700, fontSize: '0.85rem' }}>\n            {llmErrorTitle}\n          </AlertTitle>\n          {llmHealthError && (\n            <Typography variant=\"body2\" sx={{ fontSize: '0.78rem', opacity: 0.9 }}>\n              {llmHealthError.model} — {llmHealthError.error.slice(0, 150)}\n            </Typography>\n          )}\n        </Alert>\n      </Snackbar>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/SessionChat.tsx",
    "content": "/**\n * Per-session chat component.\n *\n * Each session renders its own SessionChat. The hook (useAgentChat) always\n * runs — processing events — but only the active session renders visible\n * UI (MessageList + ChatInput).\n */\nimport { useCallback, useEffect } from 'react';\nimport { useAgentChat } from '@/hooks/useAgentChat';\nimport { useAgentStore } from '@/store/agentStore';\nimport { useSessionStore } from '@/store/sessionStore';\nimport MessageList from '@/components/Chat/MessageList';\nimport ChatInput from '@/components/Chat/ChatInput';\nimport ExpiredBanner from '@/components/Chat/ExpiredBanner';\nimport { apiFetch } from '@/utils/api';\nimport { logger } from '@/utils/logger';\n\ninterface SessionChatProps {\n  sessionId: string;\n  isActive: boolean;\n  onSessionDead: (sessionId: string) => void;\n}\n\nexport default function SessionChat({ sessionId, isActive, onSessionDead }: SessionChatProps) {\n  const { isConnected, isProcessing, activityStatus, updateSession } = useAgentStore();\n  const { updateSessionTitle, sessions } = useSessionStore();\n  const isExpired = sessions.find((s) => s.id === sessionId)?.expired === true;\n\n  const { messages, sendMessage, stop, status, undoLastTurn, editAndRegenerate, approveTools } = useAgentChat({\n    sessionId,\n    isActive,\n    onReady: () => logger.log(`Session ${sessionId} ready`),\n    onError: (error) => logger.error(`Session ${sessionId} error:`, error),\n    onSessionDead,\n  });\n\n  // When this session becomes active, restore its per-session state to the\n  // global flat fields. The per-session state map is kept up-to-date by\n  // side-channel callbacks even while the session is in the background.\n  useEffect(() => {\n    if (isActive) {\n      useAgentStore.getState().switchActiveSession(sessionId);\n      useAgentStore.getState().setConnected(true);\n    }\n  }, [isActive, sessionId]);\n\n  // Re-sync state when the browser tab regains focus (Chrome throttles\n  // timers in background tabs which can stall the AI SDK's update flushing).\n  // Fires for ALL sessions so background sessions also recover after sleep.\n  useEffect(() => {\n    const onVisible = () => {\n      if (document.visibilityState === 'visible' && isActive) {\n        useAgentStore.getState().switchActiveSession(sessionId);\n      }\n    };\n    document.addEventListener('visibilitychange', onVisible);\n    return () => document.removeEventListener('visibilitychange', onVisible);\n  }, [isActive, sessionId]);\n\n  // Wrap stop to show cancelled shimmer\n  const handleStop = useCallback(() => {\n    stop();\n    updateSession(sessionId, { activityStatus: { type: 'cancelled' } });\n  }, [stop, updateSession, sessionId]);\n\n  // SDK status is the ground truth — if it's streaming/submitted, agent is busy\n  const sdkBusy = status === 'streaming' || status === 'submitted';\n  const busy = isProcessing || sdkBusy;\n\n  const handleSendMessage = useCallback(\n    async (text: string) => {\n      if (!text.trim() || busy) return;\n\n      updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'thinking' } });\n      sendMessage({ text: text.trim(), metadata: { createdAt: new Date().toISOString() } });\n\n      // Auto-title the session from the first user message\n      const isFirstMessage = messages.filter((m) => m.role === 'user').length === 0;\n      if (isFirstMessage) {\n        apiFetch('/api/title', {\n          method: 'POST',\n          body: JSON.stringify({ session_id: sessionId, text: text.trim() }),\n        })\n          .then((res) => res.json())\n          .then((data) => {\n            if (data.title) updateSessionTitle(sessionId, data.title);\n          })\n          .catch(() => {\n            const raw = text.trim();\n            updateSessionTitle(sessionId, raw.length > 40 ? raw.slice(0, 40) + '\\u2026' : raw);\n          });\n      }\n    },\n    [sessionId, sendMessage, messages, updateSessionTitle, busy, updateSession],\n  );\n\n  // Don't render UI for background sessions — hooks still run\n  if (!isActive) return null;\n\n  return (\n    <>\n      <MessageList\n        messages={messages}\n        isProcessing={busy}\n        approveTools={approveTools}\n        onUndoLastTurn={undoLastTurn}\n        onEditAndRegenerate={editAndRegenerate}\n      />\n      {isExpired ? (\n        <ExpiredBanner sessionId={sessionId} />\n      ) : (\n        <ChatInput\n          sessionId={sessionId}\n          onSend={handleSendMessage}\n          onStop={handleStop}\n          isProcessing={busy}\n          disabled={!isConnected || activityStatus.type === 'waiting-approval'}\n          placeholder={\n            activityStatus.type === 'waiting-approval'\n              ? 'Approve or reject pending tools first...'\n              : undefined\n          }\n        />\n      )}\n    </>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/SessionSidebar/SessionSidebar.tsx",
    "content": "import { useCallback, useState } from 'react';\nimport {\n  Alert,\n  Box,\n  Button,\n  Dialog,\n  DialogActions,\n  DialogContent,\n  DialogContentText,\n  DialogTitle,\n  IconButton,\n  Typography,\n  CircularProgress,\n  Divider,\n} from '@mui/material';\nimport AddIcon from '@mui/icons-material/Add';\nimport DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';\nimport ChatBubbleOutlineIcon from '@mui/icons-material/ChatBubbleOutline';\nimport { useSessionStore } from '@/store/sessionStore';\nimport { useAgentStore } from '@/store/agentStore';\nimport { apiFetch } from '@/utils/api';\n\ninterface SessionSidebarProps {\n  onClose?: () => void;\n}\n\nexport default function SessionSidebar({ onClose }: SessionSidebarProps) {\n  const { sessions, activeSessionId, createSession, deleteSession, switchSession } =\n    useSessionStore();\n  const { setPlan, clearPanel } =\n    useAgentStore();\n  const [isCreatingSession, setIsCreatingSession] = useState(false);\n  const [capacityError, setCapacityError] = useState<string | null>(null);\n\n  // -- Handlers -----------------------------------------------------------\n\n  const handleNewSession = useCallback(async () => {\n    if (isCreatingSession) return;\n    setIsCreatingSession(true);\n    setCapacityError(null);\n    try {\n      const response = await apiFetch('/api/session', { method: 'POST' });\n      if (response.status === 503) {\n        const data = await response.json();\n        setCapacityError(data.detail || 'Server is at capacity.');\n        return;\n      }\n      const data = await response.json();\n      createSession(data.session_id);\n      setPlan([]);\n      clearPanel();\n      onClose?.();\n    } catch {\n      setCapacityError('Failed to create session.');\n    } finally {\n      setIsCreatingSession(false);\n    }\n  }, [isCreatingSession, createSession, setPlan, clearPanel, onClose]);\n\n  // -- Delete with dialog confirmation ------------------------------------\n  const [confirmDeleteId, setConfirmDeleteId] = useState<string | null>(null);\n  const [isDeleting, setIsDeleting] = useState(false);\n\n  const handleDeleteClick = useCallback(\n    (sessionId: string, e: React.MouseEvent) => {\n      e.stopPropagation();\n      setConfirmDeleteId(sessionId);\n    },\n    [],\n  );\n\n  const handleDeleteConfirm = useCallback(async () => {\n    if (!confirmDeleteId || isDeleting) return;\n    const sessionId = confirmDeleteId;\n    setIsDeleting(true);\n\n    const isLastSession = sessions.length === 1;\n\n    useAgentStore.getState().clearSessionState(sessionId);\n    try {\n      await apiFetch(`/api/session/${sessionId}`, { method: 'DELETE' });\n      deleteSession(sessionId);\n    } catch {\n      deleteSession(sessionId);\n    }\n\n    // If this was the last session, create a new one\n    if (isLastSession) {\n      try {\n        const response = await apiFetch('/api/session', { method: 'POST' });\n        if (response.ok) {\n          const data = await response.json();\n          createSession(data.session_id);\n          setPlan([]);\n          clearPanel();\n        }\n      } catch (error) {\n        console.error('Failed to create new session after deleting last one:', error);\n      }\n    }\n\n    setIsDeleting(false);\n    setConfirmDeleteId(null);\n  }, [deleteSession, confirmDeleteId, isDeleting, sessions, createSession, setPlan, clearPanel]);\n\n  const handleSelect = useCallback(\n    (sessionId: string) => {\n      switchSession(sessionId);\n      // Per-session state (plan, panel, activity) is restored automatically\n      // by SessionChat's useEffect when isActive flips to true.\n      onClose?.();\n    },\n    [switchSession, onClose],\n  );\n\n  const formatTime = (d: string) =>\n    new Date(d).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });\n\n  // -- Render -------------------------------------------------------------\n\n  return (\n    <Box\n      sx={{\n        height: '100%',\n        display: 'flex',\n        flexDirection: 'column',\n        bgcolor: 'var(--panel)',\n      }}\n    >\n      {/* -- Header -------------------------------------------------------- */}\n      <Box sx={{ px: 1.75, pt: 2, pb: 0 }}>\n        <Typography\n          variant=\"caption\"\n          sx={{\n            color: 'var(--muted-text)',\n            fontSize: '0.65rem',\n            fontWeight: 600,\n            textTransform: 'uppercase',\n            letterSpacing: '0.08em',\n          }}\n        >\n          Recent chats\n        </Typography>\n      </Box>\n\n      {/* -- Capacity error ------------------------------------------------ */}\n      {capacityError && (\n        <Alert\n          severity=\"warning\"\n          variant=\"outlined\"\n          onClose={() => setCapacityError(null)}\n          sx={{\n            m: 1,\n            fontSize: '0.7rem',\n            py: 0.25,\n            '& .MuiAlert-message': { py: 0 },\n            borderColor: '#FF9D00',\n            color: 'var(--text)',\n          }}\n        >\n          {capacityError}\n        </Alert>\n      )}\n\n      {/* -- Session list -------------------------------------------------- */}\n      <Box\n        sx={{\n          flex: 1,\n          overflow: 'auto',\n          py: 1,\n          '&::-webkit-scrollbar': { width: 4 },\n          '&::-webkit-scrollbar-thumb': {\n            bgcolor: 'var(--scrollbar-thumb)',\n            borderRadius: 2,\n          },\n        }}\n      >\n        {sessions.length === 0 ? (\n          <Box\n            sx={{\n              display: 'flex',\n              flexDirection: 'column',\n              alignItems: 'center',\n              justifyContent: 'center',\n              py: 8,\n              px: 3,\n              gap: 1.5,\n            }}\n          >\n            <ChatBubbleOutlineIcon\n              sx={{ fontSize: 28, color: 'var(--muted-text)', opacity: 0.25 }}\n            />\n            <Typography\n              variant=\"caption\"\n              sx={{\n                color: 'var(--muted-text)',\n                opacity: 0.5,\n                textAlign: 'center',\n                lineHeight: 1.5,\n                fontSize: '0.72rem',\n              }}\n            >\n              No sessions yet\n            </Typography>\n          </Box>\n        ) : (\n          [...sessions].reverse().map((session, index) => {\n            const num = sessions.length - index;\n            const isSelected = session.id === activeSessionId;\n\n            return (\n              <Box\n                key={session.id}\n                onClick={() => handleSelect(session.id)}\n                sx={{\n                  display: 'flex',\n                  alignItems: 'center',\n                  gap: 1,\n                  px: 1.5,\n                  py: 0.875,\n                  mx: 0.75,\n                  mb: 0.2,\n                  borderRadius: '10px',\n                  cursor: 'pointer',\n                  transition: 'background-color 0.12s ease',\n                  bgcolor: isSelected\n                    ? 'var(--hover-bg)'\n                    : 'transparent',\n                  '&:hover': {\n                    bgcolor: 'var(--hover-bg)',\n                  },\n                  '& .delete-btn': {\n                    opacity: 0,\n                    transition: 'opacity 0.12s',\n                  },\n                  '&:hover .delete-btn': {\n                    opacity: 1,\n                  },\n                }}\n              >\n                <ChatBubbleOutlineIcon\n                  sx={{\n                    fontSize: 15,\n                    color: isSelected ? 'var(--text)' : 'var(--muted-text)',\n                    opacity: isSelected ? 0.8 : 0.4,\n                    flexShrink: 0,\n                  }}\n                />\n\n                <Box sx={{ flex: 1, minWidth: 0 }}>\n                  <Typography\n                    variant=\"body2\"\n                    sx={{\n                      fontWeight: isSelected ? 600 : 400,\n                      color: 'var(--text)',\n                      fontSize: '0.84rem',\n                      lineHeight: 1.4,\n                      whiteSpace: 'nowrap',\n                      overflow: 'hidden',\n                      textOverflow: 'ellipsis',\n                    }}\n                  >\n                    {session.title.startsWith('Chat ') ? `Session ${String(num).padStart(2, '0')}` : session.title}\n                  </Typography>\n                  <Typography\n                    variant=\"caption\"\n                    sx={{\n                      color: 'var(--muted-text)',\n                      fontSize: '0.65rem',\n                      lineHeight: 1.2,\n                    }}\n                  >\n                    {session.expired ? 'needs a catch-up' : formatTime(session.createdAt)}\n                  </Typography>\n                </Box>\n\n                {/* Attention badge — pulsing dot when background session needs approval */}\n                {session.needsAttention && !isSelected && (\n                  <Box\n                    sx={{\n                      width: 8,\n                      height: 8,\n                      borderRadius: '50%',\n                      bgcolor: 'var(--accent-yellow)',\n                      flexShrink: 0,\n                      animation: 'pulse 2s ease-in-out infinite',\n                      '@keyframes pulse': {\n                        '0%, 100%': { opacity: 1, transform: 'scale(1)' },\n                        '50%': { opacity: 0.5, transform: 'scale(0.8)' },\n                      },\n                    }}\n                  />\n                )}\n\n                <IconButton\n                  className=\"delete-btn\"\n                  size=\"small\"\n                  onClick={(e) => handleDeleteClick(session.id, e)}\n                  sx={{\n                    color: 'var(--muted-text)',\n                    width: 26,\n                    height: 26,\n                    flexShrink: 0,\n                    '&:hover': { color: 'var(--accent-red)', bgcolor: 'rgba(244,67,54,0.08)' },\n                  }}\n                >\n                  <DeleteOutlineIcon sx={{ fontSize: 15 }} />\n                </IconButton>\n              </Box>\n            );\n          })\n        )}\n      </Box>\n\n      {/* -- Footer: New Task + status ------------------------------------- */}\n      <Divider sx={{ opacity: 0.5 }} />\n      <Box\n        sx={{\n          px: 1.5,\n          py: 1.5,\n          display: 'flex',\n          flexDirection: 'column',\n          gap: 1,\n          flexShrink: 0,\n        }}\n      >\n        <Box\n          component=\"button\"\n          onClick={handleNewSession}\n          disabled={isCreatingSession}\n          sx={{\n            display: 'inline-flex',\n            alignItems: 'center',\n            justifyContent: 'center',\n            gap: 0.75,\n            width: '100%',\n            px: 1.5,\n            py: 1.25,\n            border: 'none',\n            borderRadius: '10px',\n            bgcolor: '#FF9D00',\n            color: '#000',\n            fontSize: '0.85rem',\n            fontWeight: 700,\n            cursor: 'pointer',\n            transition: 'all 0.12s ease',\n            '&:hover': {\n              bgcolor: '#FFB340',\n            },\n            '&:disabled': {\n              opacity: 0.5,\n              cursor: 'not-allowed',\n            },\n          }}\n        >\n          {isCreatingSession ? (\n            <>\n              <CircularProgress size={12} sx={{ color: '#000' }} />\n              Creating...\n            </>\n          ) : (\n            <>\n              <AddIcon sx={{ fontSize: 16 }} />\n              New Task\n            </>\n          )}\n        </Box>\n\n      </Box>\n      {/* Delete confirmation dialog */}\n      <Dialog\n        open={!!confirmDeleteId}\n        onClose={() => !isDeleting && setConfirmDeleteId(null)}\n        slotProps={{\n          backdrop: { sx: { backgroundColor: 'rgba(0,0,0,0.5)', backdropFilter: 'blur(4px)' } },\n        }}\n        PaperProps={{\n          sx: {\n            bgcolor: 'var(--panel)',\n            border: '1px solid var(--border)',\n            borderRadius: 'var(--radius-md)',\n            boxShadow: 'var(--shadow-1)',\n            maxWidth: 340,\n            mx: 2,\n          },\n        }}\n      >\n        <DialogTitle\n          sx={{\n            color: 'var(--text)',\n            fontWeight: 700,\n            fontSize: '0.95rem',\n            pb: 0,\n            pt: 2.5,\n            px: 3,\n          }}\n        >\n          Delete conversation?\n        </DialogTitle>\n        <DialogContent sx={{ px: 3, pt: 1 }}>\n          <DialogContentText\n            sx={{\n              color: 'var(--muted-text)',\n              fontSize: '0.82rem',\n              lineHeight: 1.6,\n            }}\n          >\n            This will permanently remove this conversation and its history.\n          </DialogContentText>\n        </DialogContent>\n        <DialogActions sx={{ px: 3, pb: 2.5, gap: 1 }}>\n          <Button\n            onClick={() => setConfirmDeleteId(null)}\n            size=\"small\"\n            disabled={isDeleting}\n            sx={{\n              color: 'var(--muted-text)',\n              fontSize: '0.82rem',\n              px: 2,\n              '&:hover': { bgcolor: 'var(--hover-bg)' },\n            }}\n          >\n            Cancel\n          </Button>\n          <Button\n            onClick={handleDeleteConfirm}\n            variant=\"contained\"\n            size=\"small\"\n            disabled={isDeleting}\n            startIcon={isDeleting ? <CircularProgress size={16} sx={{ color: '#fff' }} /> : undefined}\n            sx={{\n              fontSize: '0.82rem',\n              px: 2.5,\n              bgcolor: 'var(--accent-red)',\n              color: '#fff',\n              boxShadow: 'none',\n              '&:hover': {\n                bgcolor: 'var(--accent-red)',\n                filter: 'brightness(1.15)',\n                boxShadow: 'none',\n              },\n              '&.Mui-disabled': {\n                bgcolor: 'var(--accent-red)',\n                color: '#fff',\n                opacity: 0.7,\n              },\n            }}\n          >\n            {isDeleting ? 'Deleting...' : 'Delete'}\n          </Button>\n        </DialogActions>\n      </Dialog>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/components/WelcomeScreen/WelcomeScreen.tsx",
    "content": "import { useState, useCallback, useEffect, useRef, type ReactNode } from 'react';\nimport {\n  Box,\n  Typography,\n  Button,\n  CircularProgress,\n  Alert,\n} from '@mui/material';\nimport CheckCircleIcon from '@mui/icons-material/CheckCircle';\nimport OpenInNewIcon from '@mui/icons-material/OpenInNew';\nimport GroupAddIcon from '@mui/icons-material/GroupAdd';\nimport LoginIcon from '@mui/icons-material/Login';\nimport RocketLaunchIcon from '@mui/icons-material/RocketLaunch';\nimport { useSessionStore } from '@/store/sessionStore';\nimport { useAgentStore } from '@/store/agentStore';\nimport { apiFetch } from '@/utils/api';\nimport { isInIframe, triggerLogin } from '@/hooks/useAuth';\nimport { useOrgMembership } from '@/hooks/useOrgMembership';\n\nconst HF_ORANGE = '#FF9D00';\nconst ORG_JOIN_URL =\n  'https://huggingface.co/organizations/ml-agent-explorers/share/GzPMJUivoFPlfkvFtIqEouZKSytatKQSZT';\n\n// ---------------------------------------------------------------------------\n// ChecklistStep sub-component\n// ---------------------------------------------------------------------------\n\ntype StepStatus = 'completed' | 'active' | 'locked';\n\ninterface ChecklistStepProps {\n  stepNumber: number;\n  title: string;\n  description: string;\n  status: StepStatus;\n  lockedReason?: string;\n  actionLabel?: string;\n  onAction?: () => void;\n  actionIcon?: ReactNode;\n  actionHref?: string;\n  loading?: boolean;\n  isLast?: boolean;\n}\n\nfunction StepIndicator({ status, stepNumber }: { status: StepStatus; stepNumber: number }) {\n  if (status === 'completed') {\n    return <CheckCircleIcon sx={{ fontSize: 28, color: 'var(--accent-green)' }} />;\n  }\n  return (\n    <Box\n      sx={{\n        width: 28,\n        height: 28,\n        borderRadius: '50%',\n        display: 'flex',\n        alignItems: 'center',\n        justifyContent: 'center',\n        fontSize: '0.8rem',\n        fontWeight: 700,\n        ...(status === 'active'\n          ? { bgcolor: HF_ORANGE, color: '#000' }\n          : { bgcolor: 'transparent', border: '2px solid var(--border)', color: 'var(--muted-text)' }),\n      }}\n    >\n      {stepNumber}\n    </Box>\n  );\n}\n\nfunction ChecklistStep({\n  stepNumber,\n  title,\n  description,\n  status,\n  lockedReason,\n  actionLabel,\n  onAction,\n  actionIcon,\n  actionHref,\n  loading = false,\n  isLast = false,\n}: ChecklistStepProps) {\n  const btnSx = {\n    px: 3,\n    py: 0.75,\n    fontSize: '0.85rem',\n    fontWeight: 700,\n    textTransform: 'none' as const,\n    borderRadius: '10px',\n    whiteSpace: 'nowrap' as const,\n    textDecoration: 'none',\n    ...(status === 'active'\n      ? {\n          bgcolor: HF_ORANGE,\n          color: '#000',\n          boxShadow: '0 2px 12px rgba(255, 157, 0, 0.25)',\n          '&:hover': { bgcolor: '#FFB340', boxShadow: '0 4px 20px rgba(255, 157, 0, 0.4)' },\n        }\n      : {\n          bgcolor: 'rgba(255,255,255,0.04)',\n          color: 'var(--muted-text)',\n          '&.Mui-disabled': { bgcolor: 'rgba(255,255,255,0.04)', color: 'var(--muted-text)' },\n        }),\n  };\n\n  return (\n    <Box\n      sx={{\n        display: 'flex',\n        alignItems: 'center',\n        gap: 2,\n        px: 3,\n        py: 2.5,\n        borderLeft: '3px solid',\n        borderLeftColor:\n          status === 'completed'\n            ? 'var(--accent-green)'\n            : status === 'active'\n              ? HF_ORANGE\n              : 'transparent',\n        ...(!isLast && { borderBottom: '1px solid var(--border)' }),\n        opacity: status === 'locked' ? 0.55 : 1,\n        transition: 'opacity 0.2s, border-color 0.2s',\n      }}\n    >\n      <StepIndicator status={status} stepNumber={stepNumber} />\n\n      <Box sx={{ flex: 1, minWidth: 0 }}>\n        <Typography\n          variant=\"subtitle2\"\n          sx={{\n            fontWeight: 600,\n            fontSize: '0.92rem',\n            color: status === 'completed' ? 'var(--muted-text)' : 'var(--text)',\n            ...(status === 'completed' && { textDecoration: 'line-through', textDecorationColor: 'var(--muted-text)' }),\n          }}\n        >\n          {title}\n        </Typography>\n        <Typography variant=\"body2\" sx={{ color: 'var(--muted-text)', fontSize: '0.8rem', mt: 0.25, lineHeight: 1.5 }}>\n          {status === 'locked' && lockedReason ? lockedReason : description}\n        </Typography>\n      </Box>\n\n      {status === 'completed' ? (\n        <Typography variant=\"caption\" sx={{ color: 'var(--accent-green)', fontWeight: 600, fontSize: '0.78rem', whiteSpace: 'nowrap' }}>\n          Done\n        </Typography>\n      ) : actionLabel ? (\n        actionHref ? (\n          <Button\n            variant=\"contained\"\n            size=\"small\"\n            component=\"a\"\n            href={actionHref}\n            target=\"_blank\"\n            rel=\"noopener noreferrer\"\n            disabled={status === 'locked'}\n            startIcon={actionIcon}\n            sx={btnSx}\n            onClick={onAction}\n          >\n            {actionLabel}\n          </Button>\n        ) : (\n          <Button\n            variant=\"contained\"\n            size=\"small\"\n            disabled={status === 'locked' || loading}\n            startIcon={loading ? <CircularProgress size={16} color=\"inherit\" /> : actionIcon}\n            onClick={onAction}\n            sx={btnSx}\n          >\n            {loading ? 'Loading...' : actionLabel}\n          </Button>\n        )\n      ) : null}\n    </Box>\n  );\n}\n\n// ---------------------------------------------------------------------------\n// WelcomeScreen\n// ---------------------------------------------------------------------------\n\nexport default function WelcomeScreen() {\n  const { createSession } = useSessionStore();\n  const { setPlan, clearPanel, user } = useAgentStore();\n  const [isCreating, setIsCreating] = useState(false);\n  const [error, setError] = useState<string | null>(null);\n\n  const inIframe = isInIframe();\n  const isAuthenticated = !!user?.authenticated;\n  const isDevUser = user?.username === 'dev';\n\n  // Iframe: localStorage-based org tracking (no auth token available)\n  const [iframeOrgJoined, setIframeOrgJoined] = useState(() => {\n    try { return localStorage.getItem('hf-agent-org-joined') === '1'; } catch { return false; }\n  });\n  const joinLinkOpened = useRef(false);\n\n  // Auto-advance when user returns from org join link (iframe only)\n  useEffect(() => {\n    if (!inIframe) return;\n    const handleVisibility = () => {\n      if (document.visibilityState !== 'visible' || !joinLinkOpened.current) return;\n      joinLinkOpened.current = false;\n      try { localStorage.setItem('hf-agent-org-joined', '1'); } catch { /* ignore */ }\n      setIframeOrgJoined(true);\n    };\n    document.addEventListener('visibilitychange', handleVisibility);\n    return () => document.removeEventListener('visibilitychange', handleVisibility);\n  }, [inIframe]);\n\n  const isOrgMember = inIframe ? iframeOrgJoined : !!user?.orgMember;\n\n  // Poll for org membership once authenticated (skipped in dev mode and iframe)\n  const popupRef = useOrgMembership(isAuthenticated && !isDevUser && !inIframe && !isOrgMember);\n\n  // ---- Actions ----\n\n  const handleJoinOrg = useCallback(() => {\n    if (inIframe) {\n      // Iframe: open link, track via visibilitychange + localStorage\n      joinLinkOpened.current = true;\n      window.open(ORG_JOIN_URL, '_blank', 'noopener,noreferrer');\n      return;\n    }\n    // Direct: open as popup, auto-close via polling\n    const popup = window.open(ORG_JOIN_URL, 'hf-org-join', 'noopener');\n    if (popup) {\n      popupRef.current = popup;\n    } else {\n      window.open(ORG_JOIN_URL, '_blank', 'noopener,noreferrer');\n    }\n  }, [popupRef, inIframe]);\n\n  const handleStartSession = useCallback(async () => {\n    if (isCreating) return;\n    setIsCreating(true);\n    setError(null);\n\n    try {\n      const response = await apiFetch('/api/session', { method: 'POST' });\n      if (response.status === 503) {\n        const data = await response.json();\n        setError(data.detail || 'Server is at capacity. Please try again later.');\n        return;\n      }\n      if (response.status === 401) {\n        triggerLogin();\n        return;\n      }\n      if (!response.ok) {\n        setError('Failed to create session. Please try again.');\n        return;\n      }\n      const data = await response.json();\n      createSession(data.session_id);\n      setPlan([]);\n      clearPanel();\n    } catch {\n      // Redirect may throw — ignore\n    } finally {\n      setIsCreating(false);\n    }\n  }, [isCreating, createSession, setPlan, clearPanel]);\n\n  // ---- Step status helpers ----\n\n  const signInStatus: StepStatus = isAuthenticated ? 'completed' : 'active';\n  const joinOrgStatus: StepStatus = isOrgMember ? 'completed' : isAuthenticated ? 'active' : 'locked';\n  const startStatus: StepStatus = isAuthenticated && isOrgMember ? 'active' : 'locked';\n\n  // Space URL for iframe \"Open ML Intern\" step\n  const spaceHost =\n    typeof window !== 'undefined'\n      ? window.location.hostname.includes('.hf.space')\n        ? window.location.origin\n        : 'https://smolagents-ml-intern.hf.space'\n      : '';\n\n  return (\n    <Box\n      sx={{\n        width: '100%',\n        height: '100%',\n        display: 'flex',\n        flexDirection: 'column',\n        alignItems: 'center',\n        justifyContent: 'center',\n        background: 'var(--body-gradient)',\n        py: 8,\n      }}\n    >\n      {/* Logo */}\n      <Box\n        component=\"img\"\n        src=\"/smolagents.webp\"\n        alt=\"smolagents\"\n        sx={{ width: 80, height: 80, mb: 2.5, display: 'block' }}\n      />\n\n      {/* Title */}\n      <Typography\n        variant=\"h2\"\n        sx={{\n          fontWeight: 800,\n          color: 'var(--text)',\n          mb: 1,\n          letterSpacing: '-0.02em',\n          fontSize: { xs: '1.8rem', md: '2.4rem' },\n        }}\n      >\n        ML Intern\n      </Typography>\n\n      {/* Description */}\n      <Typography\n        variant=\"body1\"\n        sx={{\n          color: 'var(--muted-text)',\n          maxWidth: 480,\n          mb: 4,\n          lineHeight: 1.7,\n          fontSize: '0.9rem',\n          textAlign: 'center',\n          px: 2,\n          '& strong': { color: 'var(--text)', fontWeight: 600 },\n        }}\n      >\n        Your personal <strong>ML agent</strong>. It reads <strong>papers</strong>, finds <strong>datasets</strong>, trains <strong>models</strong>, and iterates until the numbers go up. Instructions in. Trained model out.\n      </Typography>\n\n      {/* ── Checklist ──────────────────────────────────────────── */}\n      <Box\n        sx={{\n          width: '100%',\n          maxWidth: 520,\n          bgcolor: 'var(--surface)',\n          border: '1px solid var(--border)',\n          borderRadius: '12px',\n          overflow: 'hidden',\n          mx: 2,\n        }}\n      >\n        {isDevUser ? (\n          /* Dev mode: single step */\n          <ChecklistStep\n            stepNumber={1}\n            title=\"Start Session\"\n            description=\"Launch an AI agent session for ML engineering.\"\n            status=\"active\"\n            actionLabel=\"Start Session\"\n            actionIcon={<RocketLaunchIcon sx={{ fontSize: 16 }} />}\n            onAction={handleStartSession}\n            loading={isCreating}\n            isLast\n          />\n        ) : inIframe ? (\n          /* Iframe: 2 steps */\n          <>\n            <ChecklistStep\n              stepNumber={1}\n              title=\"Join ML Agent Explorers\"\n              description=\"Get free access to GPUs, inference APIs, and Hub resources.\"\n              status={isOrgMember ? 'completed' : 'active'}\n              actionLabel=\"Join Organization\"\n              actionIcon={<GroupAddIcon sx={{ fontSize: 16 }} />}\n              onAction={handleJoinOrg}\n            />\n            <ChecklistStep\n              stepNumber={2}\n              title=\"Open ML Intern\"\n              description=\"Open the agent in a full browser tab to get started.\"\n              status={isOrgMember ? 'active' : 'locked'}\n              lockedReason=\"Join the organization first.\"\n              actionLabel=\"Open ML Intern\"\n              actionIcon={<OpenInNewIcon sx={{ fontSize: 16 }} />}\n              actionHref={spaceHost}\n              isLast\n            />\n          </>\n        ) : (\n          /* Direct access: 3 steps */\n          <>\n            <ChecklistStep\n              stepNumber={1}\n              title=\"Sign in with Hugging Face\"\n              description=\"Authenticate to access GPU resources and model APIs.\"\n              status={signInStatus}\n              actionLabel=\"Sign in\"\n              actionIcon={<LoginIcon sx={{ fontSize: 16 }} />}\n              onAction={() => triggerLogin()}\n            />\n            <ChecklistStep\n              stepNumber={2}\n              title=\"Join ML Agent Explorers\"\n              description=\"Get free access to GPUs, inference APIs, and Hub resources.\"\n              status={joinOrgStatus}\n              lockedReason=\"Sign in first to continue.\"\n              actionLabel=\"Join Organization\"\n              actionIcon={<GroupAddIcon sx={{ fontSize: 16 }} />}\n              onAction={handleJoinOrg}\n            />\n            <ChecklistStep\n              stepNumber={3}\n              title=\"Start Session\"\n              description=\"Launch an AI agent session for ML engineering.\"\n              status={startStatus}\n              lockedReason=\"Complete the steps above to continue.\"\n              actionLabel=\"Start Session\"\n              actionIcon={<RocketLaunchIcon sx={{ fontSize: 16 }} />}\n              onAction={handleStartSession}\n              loading={isCreating}\n              isLast\n            />\n          </>\n        )}\n      </Box>\n\n      {/* Polling hint when waiting for org join */}\n      {isAuthenticated && !isOrgMember && !isDevUser && !inIframe && (\n        <Typography\n          variant=\"caption\"\n          sx={{ mt: 2, color: 'var(--muted-text)', fontSize: '0.75rem', textAlign: 'center' }}\n        >\n          This page updates automatically when you join the organization.\n        </Typography>\n      )}\n\n      {/* Error */}\n      {error && (\n        <Alert\n          severity=\"warning\"\n          variant=\"outlined\"\n          onClose={() => setError(null)}\n          sx={{\n            mt: 3,\n            maxWidth: 400,\n            fontSize: '0.8rem',\n            borderColor: HF_ORANGE,\n            color: 'var(--text)',\n          }}\n        >\n          {error}\n        </Alert>\n      )}\n\n      {/* Footnote */}\n      <Typography\n        variant=\"caption\"\n        sx={{ mt: 4, color: 'var(--muted-text)', opacity: 0.5, fontSize: '0.7rem' }}\n      >\n        Conversations are stored locally in your browser.\n      </Typography>\n    </Box>\n  );\n}\n"
  },
  {
    "path": "frontend/src/hooks/useAgentChat.ts",
    "content": "/**\n * Central hook wiring the Vercel AI SDK's useChat with our SSE-based\n * ChatTransport.\n *\n * In the per-session architecture, each session mounts its own instance\n * of this hook. Side-channel callbacks always update the session's own\n * state via `updateSession()`. If the session is currently active, the\n * store automatically mirrors updates to the flat global fields.\n */\nimport { useCallback, useEffect, useMemo, useRef } from 'react';\nimport { useChat } from '@ai-sdk/react';\nimport { type UIMessage, lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';\nimport { SSEChatTransport, type SideChannelCallbacks } from '@/lib/sse-chat-transport';\nimport { loadMessages, saveMessages } from '@/lib/chat-message-store';\nimport { saveBackendMessages } from '@/lib/backend-message-store';\nimport { saveResearch, loadResearch, clearResearch, RESEARCH_MAX_STEPS } from '@/lib/research-store';\nimport { llmMessagesToUIMessages } from '@/lib/convert-llm-messages';\nimport { apiFetch } from '@/utils/api';\nimport { useAgentStore } from '@/store/agentStore';\nimport { useSessionStore } from '@/store/sessionStore';\nimport { useLayoutStore } from '@/store/layoutStore';\nimport { logger } from '@/utils/logger';\n\ninterface UseAgentChatOptions {\n  sessionId: string;\n  isActive: boolean;\n  onReady?: () => void;\n  onError?: (error: string) => void;\n  onSessionDead?: (sessionId: string) => void;\n}\n\nexport function useAgentChat({ sessionId, isActive, onReady, onError, onSessionDead }: UseAgentChatOptions) {\n  const callbacksRef = useRef({ onReady, onError, onSessionDead });\n  callbacksRef.current = { onReady, onError, onSessionDead };\n\n  const isActiveRef = useRef(isActive);\n  isActiveRef.current = isActive;\n\n  const { setNeedsAttention } = useSessionStore();\n\n  // Helper: update this session's state (mirrors to globals if active)\n  const updateSession = useAgentStore.getState().updateSession;\n\n  // -- Build side-channel callbacks (stable ref) --------------------------\n  const sideChannel = useMemo<SideChannelCallbacks>(\n    () => ({\n      onReady: () => {\n        updateSession(sessionId, { isProcessing: false });\n        if (isActiveRef.current) {\n          useAgentStore.getState().setConnected(true);\n        }\n        useSessionStore.getState().setSessionActive(sessionId, true);\n        callbacksRef.current.onReady?.();\n      },\n      onShutdown: () => {\n        updateSession(sessionId, { isProcessing: false });\n        if (isActiveRef.current) {\n          useAgentStore.getState().setConnected(false);\n        }\n      },\n      onError: (error: string) => {\n        updateSession(sessionId, { isProcessing: false });\n        if (isActiveRef.current) {\n          useAgentStore.getState().setError(error);\n        }\n        callbacksRef.current.onError?.(error);\n      },\n      onProcessing: () => {\n        updateSession(sessionId, {\n          isProcessing: true,\n          activityStatus: { type: 'thinking' },\n        });\n      },\n      onProcessingDone: () => {\n        updateSession(sessionId, { isProcessing: false });\n      },\n      onUndoComplete: () => {\n        updateSession(sessionId, { isProcessing: false });\n      },\n      onCompacted: (oldTokens: number, newTokens: number) => {\n        logger.log(`Context compacted: ${oldTokens} -> ${newTokens} tokens`);\n      },\n      onPlanUpdate: (plan) => {\n        const typed = plan as Array<{ id: string; content: string; status: 'pending' | 'in_progress' | 'completed' }>;\n        updateSession(sessionId, { plan: typed });\n        if (isActiveRef.current && !useLayoutStore.getState().isRightPanelOpen) {\n          useLayoutStore.getState().setRightPanelOpen(true);\n        }\n      },\n      onToolLog: (tool: string, log: string, agentId?: string, label?: string) => {\n        // Research sub-agent: parse stats vs step logs (per-agent)\n        if (tool === 'research') {\n          const aid = agentId || 'research';\n          const sessState = useAgentStore.getState().getSessionState(sessionId);\n          const agents = { ...sessState.researchAgents };\n          const agent = agents[aid] || { label: label || 'research', steps: [], stats: { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null } };\n\n          if (log === 'Starting research sub-agent...') {\n            agents[aid] = {\n              label: label || 'research',\n              steps: [],\n              stats: { toolCount: 0, tokenCount: 0, startedAt: Date.now(), finalElapsed: null },\n            };\n            // Also update legacy flat fields (aggregate of all agents)\n            const allSteps = Object.values(agents).flatMap(a => a.steps);\n            const anyRunning = Object.values(agents).some(a => a.stats.startedAt !== null);\n            updateSession(sessionId, {\n              researchAgents: agents,\n              researchSteps: allSteps.slice(-RESEARCH_MAX_STEPS),\n              researchStats: anyRunning ? agents[aid].stats : sessState.researchStats,\n              activityStatus: { type: 'tool', toolName: 'research', description: label || log },\n            });\n            saveResearch(sessionId, allSteps.slice(-RESEARCH_MAX_STEPS), agents[aid].stats);\n          } else if (log.startsWith('tokens:')) {\n            agent.stats = { ...agent.stats, tokenCount: parseInt(log.slice(7), 10) };\n            agents[aid] = agent;\n            updateSession(sessionId, { researchAgents: agents });\n          } else if (log.startsWith('tools:')) {\n            agent.stats = { ...agent.stats, toolCount: parseInt(log.slice(6), 10) };\n            agents[aid] = agent;\n            updateSession(sessionId, { researchAgents: agents });\n          } else if (log === 'Research complete.') {\n            const elapsed = agent.stats.startedAt\n              ? Math.round((Date.now() - agent.stats.startedAt) / 1000)\n              : null;\n            agent.stats = { ...agent.stats, startedAt: null, finalElapsed: elapsed };\n            agents[aid] = agent;\n            const anyRunning = Object.values(agents).some(a => a.stats.startedAt !== null);\n            updateSession(sessionId, {\n              researchAgents: agents,\n              researchStats: anyRunning ? sessState.researchStats : agent.stats,\n              activityStatus: { type: 'tool', toolName: 'research', description: log },\n            });\n            // Clear persistence only when ALL agents are done\n            if (!anyRunning) clearResearch(sessionId);\n          } else {\n            // Regular tool call step — append to this agent\n            agent.steps = [...agent.steps, log].slice(-RESEARCH_MAX_STEPS);\n            agents[aid] = agent;\n            const allSteps = Object.values(agents).flatMap(a => a.steps);\n            updateSession(sessionId, {\n              researchAgents: agents,\n              researchSteps: allSteps.slice(-RESEARCH_MAX_STEPS),\n              activityStatus: { type: 'tool', toolName: 'research', description: log },\n            });\n            saveResearch(sessionId, allSteps.slice(-RESEARCH_MAX_STEPS), agent.stats);\n          }\n          return;\n        }\n\n        const STREAMABLE_TOOLS = new Set(['hf_jobs', 'sandbox', 'bash']);\n        if (!STREAMABLE_TOOLS.has(tool)) return;\n\n        const sessState = useAgentStore.getState().getSessionState(sessionId);\n        const existingOutput = sessState.panelData?.output?.content || '';\n\n        const newContent = existingOutput\n          ? existingOutput + '\\n' + log\n          : log;\n\n        if (!sessState.panelData) {\n          const title = tool === 'bash' ? 'Sandbox' : tool === 'sandbox' ? 'Sandbox' : 'Job Output';\n          updateSession(sessionId, {\n            panelData: { title, output: { content: newContent, language: 'text' } },\n            panelView: 'output',\n          });\n        } else {\n          updateSession(sessionId, {\n            panelData: { ...sessState.panelData, output: { content: newContent, language: 'text' } },\n            panelView: 'output',\n          });\n        }\n\n        if (isActiveRef.current && !useLayoutStore.getState().isRightPanelOpen) {\n          useLayoutStore.getState().setRightPanelOpen(true);\n        }\n      },\n      onConnectionChange: (connected: boolean) => {\n        if (isActiveRef.current) useAgentStore.getState().setConnected(connected);\n      },\n      onSessionDead: (deadSessionId: string) => {\n        logger.warn(`Session ${deadSessionId} dead, removing`);\n        callbacksRef.current.onSessionDead?.(deadSessionId);\n      },\n      onApprovalRequired: (tools) => {\n        if (!tools.length) return;\n        setNeedsAttention(sessionId, true);\n\n        updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });\n\n        // Build panel data for this session's pending approval\n        const firstTool = tools[0];\n        const args = firstTool.arguments as Record<string, string | undefined>;\n\n        let panelUpdate: Partial<import('@/store/agentStore').PerSessionState> | undefined;\n        if (firstTool.tool === 'hf_jobs' && args.script) {\n          panelUpdate = {\n            panelData: {\n              title: 'Script',\n              script: { content: args.script, language: 'python' },\n              parameters: firstTool.arguments as Record<string, unknown>,\n            },\n            panelView: 'script' as const,\n            panelEditable: true,\n          };\n        } else if (firstTool.tool === 'hf_repo_files' && args.content) {\n          const filename = args.path || 'file';\n          panelUpdate = {\n            panelData: {\n              title: filename.split('/').pop() || 'Content',\n              script: { content: args.content, language: filename.endsWith('.py') ? 'python' : 'text' },\n              parameters: firstTool.arguments as Record<string, unknown>,\n            },\n          };\n        } else {\n          panelUpdate = {\n            panelData: {\n              title: firstTool.tool,\n              output: { content: JSON.stringify(firstTool.arguments, null, 2), language: 'json' },\n            },\n            panelView: 'output' as const,\n          };\n        }\n        if (panelUpdate) updateSession(sessionId, panelUpdate);\n\n        if (isActiveRef.current) {\n          useLayoutStore.getState().setRightPanelOpen(true);\n          useLayoutStore.getState().setLeftSidebarOpen(false);\n        }\n      },\n      onToolCallPanel: (toolName: string, args: Record<string, unknown>) => {\n        if (toolName === 'hf_jobs' && args.operation && args.script) {\n          updateSession(sessionId, {\n            panelData: {\n              title: 'Script',\n              script: { content: String(args.script), language: 'python' },\n              parameters: args,\n            },\n            panelView: 'script',\n          });\n          if (isActiveRef.current) {\n            useLayoutStore.getState().setRightPanelOpen(true);\n            useLayoutStore.getState().setLeftSidebarOpen(false);\n          }\n        } else if (toolName === 'hf_repo_files' && args.operation === 'upload' && args.content) {\n          updateSession(sessionId, {\n            panelData: {\n              title: `File Upload: ${String(args.path || 'unnamed')}`,\n              script: { content: String(args.content), language: String(args.path || '').endsWith('.py') ? 'python' : 'text' },\n              parameters: args,\n            },\n          });\n          if (isActiveRef.current) {\n            useLayoutStore.getState().setRightPanelOpen(true);\n            useLayoutStore.getState().setLeftSidebarOpen(false);\n          }\n        } else if (toolName === 'bash' && args.command) {\n          updateSession(sessionId, {\n            panelData: {\n              title: 'Sandbox',\n              script: { content: String(args.command), language: 'bash' },\n            },\n            panelView: 'output',\n          });\n        }\n      },\n      onToolOutputPanel: (toolName: string, _toolCallId: string, output: string, success: boolean) => {\n        const sessState = useAgentStore.getState().getSessionState(sessionId);\n        if (toolName === 'hf_jobs' && output) {\n          updateSession(sessionId, {\n            panelData: sessState.panelData\n              ? { ...sessState.panelData, output: { content: output, language: 'markdown' } }\n              : { title: 'Output', output: { content: output, language: 'markdown' } },\n            panelView: !success ? 'output' : sessState.panelView,\n          });\n        } else if (toolName === 'bash') {\n          if (!success) {\n            updateSession(sessionId, { panelView: 'output' });\n          }\n        }\n      },\n      onStreaming: () => {\n        updateSession(sessionId, { activityStatus: { type: 'streaming' } });\n      },\n      onToolRunning: (toolName: string, description?: string) => {\n        const updates: Partial<import('@/store/agentStore').PerSessionState> = {\n          activityStatus: { type: 'tool', toolName, description },\n        };\n        // Clear research steps + stats when a new research call starts\n        if (toolName === 'research') {\n          updates.researchSteps = [];\n          updates.researchStats = { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null };\n        }\n        updateSession(sessionId, updates);\n      },\n      onInterrupted: () => { /* no-op — handled by stop() caller */ },\n    }),\n    // eslint-disable-next-line react-hooks/exhaustive-deps\n    [sessionId],\n  );\n\n  // -- Create transport (one per session, stable for lifetime) ------------\n  const transportRef = useRef<SSEChatTransport | null>(null);\n  if (!transportRef.current) {\n    transportRef.current = new SSEChatTransport(sessionId, sideChannel);\n  }\n\n  // Keep side-channel callbacks in sync\n  useEffect(() => {\n    transportRef.current?.updateSideChannel(sideChannel);\n  }, [sideChannel]);\n\n  // Destroy transport on unmount\n  useEffect(() => {\n    return () => {\n      transportRef.current?.destroy();\n      transportRef.current = null;\n    };\n  }, []);\n\n  // -- Restore persisted messages for this session ------------------------\n  const initialMessages = useMemo(\n    () => loadMessages(sessionId),\n    [sessionId],\n  );\n\n  // -- Ref for chat actions (used by sideChannel callbacks) ---------------\n  const chatActionsRef = useRef<{\n    setMessages: ((msgs: UIMessage[]) => void) | null;\n    messages: UIMessage[];\n  }>({ setMessages: null, messages: [] });\n\n  // -- useChat from Vercel AI SDK -----------------------------------------\n  const chat = useChat({\n    id: sessionId,\n    messages: initialMessages,\n    transport: transportRef.current!,\n    experimental_throttle: 80,\n    // On mount, the SDK calls transport.reconnectToStream() which checks\n    // is_processing and subscribes to the live event stream if the agent\n    // is mid-turn.  Without this, page refresh kills live updates.\n    resume: true,\n    // After all approval responses are set, auto-send to continue the agent loop.\n    // Without this, addToolApprovalResponse only updates the UI — it won't trigger\n    // sendMessages on the transport.\n    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,\n    onError: (error) => {\n      updateSession(sessionId, { isProcessing: false });\n      // Claude daily-cap: open the cap dialog instead of the generic error\n      // banner. Transport marks the error with this sentinel.\n      if (error.message === 'CLAUDE_QUOTA_EXHAUSTED') {\n        if (isActiveRef.current) {\n          useAgentStore.getState().setClaudeQuotaExhausted(true);\n        }\n        return;\n      }\n      logger.error('useChat error:', error);\n      if (isActiveRef.current) {\n        useAgentStore.getState().setError(error.message);\n      }\n    },\n  });\n\n  // Keep chatActionsRef in sync every render\n  chatActionsRef.current.setMessages = chat.setMessages;\n  chatActionsRef.current.messages = chat.messages;\n\n  // -- Hydrate from backend on mount (page refresh recovery) --------------\n  useEffect(() => {\n    let cancelled = false;\n    (async () => {\n      try {\n        const [msgsRes, infoRes] = await Promise.all([\n          apiFetch(`/api/session/${sessionId}/messages`),\n          apiFetch(`/api/session/${sessionId}`),\n        ]);\n        if (cancelled) return;\n\n        // If both endpoints say \"not found\", the backend lost this session\n        // (typically: Space restarted). Fire onSessionDead so AppLayout\n        // can flag it for the catch-up banner.\n        if (infoRes.status === 404 && msgsRes.status === 404) {\n          callbacksRef.current.onSessionDead?.(sessionId);\n          return;\n        }\n\n        let pendingIds: Set<string> | undefined;\n        let backendIsProcessing = false;\n        if (infoRes.ok) {\n          const info = await infoRes.json();\n          backendIsProcessing = !!info.is_processing;\n          if (info.pending_approval && Array.isArray(info.pending_approval)) {\n            pendingIds = new Set(\n              info.pending_approval.map((t: { tool_call_id: string }) => t.tool_call_id)\n            );\n            if (pendingIds.size > 0) {\n              setNeedsAttention(sessionId, true);\n            }\n          }\n        }\n\n        if (msgsRes.ok) {\n          const data = await msgsRes.json();\n          if (cancelled || !Array.isArray(data) || data.length === 0) return;\n          // Cache the raw backend messages so we can restore this session\n          // into a fresh backend if the Space restarts.\n          saveBackendMessages(sessionId, data);\n          const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);\n          if (uiMsgs.length > 0) {\n            chat.setMessages(uiMsgs);\n            saveMessages(sessionId, uiMsgs);\n          }\n        }\n\n        // Use the backend's is_processing flag as the source of truth.\n        // Message-based inference doesn't work because completed tool\n        // results make tools look \"done\" even when the agent is still\n        // mid-turn and about to call more tools.\n        if (backendIsProcessing) {\n          // Restore research sub-agent state alongside isProcessing in one\n          // atomic update so the UI never sees isProcessing=false with stale\n          // tool states (which would coerce them to 'output-available').\n          const savedResearch = loadResearch(sessionId);\n          updateSession(sessionId, {\n            isProcessing: true,\n            activityStatus: savedResearch?.stats.startedAt\n              ? { type: 'tool', toolName: 'research', description: 'Resuming research...' }\n              : { type: 'thinking' },\n            ...(savedResearch && {\n              researchSteps: savedResearch.steps,\n              researchStats: savedResearch.stats,\n            }),\n          });\n        } else if (pendingIds && pendingIds.size > 0) {\n          updateSession(sessionId, { activityStatus: { type: 'waiting-approval' } });\n          clearResearch(sessionId);\n        } else {\n          clearResearch(sessionId);\n        }\n      } catch {\n        /* backend unreachable -- localStorage fallback is fine */\n      }\n    })();\n    return () => { cancelled = true; };\n  }, [sessionId]); // eslint-disable-line react-hooks/exhaustive-deps\n\n  // -- Re-hydrate + reconnect on wake from sleep ----------------------------\n  // The Vercel AI SDK only calls reconnectToStream() on mount, NOT on\n  // visibility change.  So when the browser wakes from sleep and the SSE\n  // stream is dead, we must manually:\n  //   1. Re-hydrate messages (one-shot fetch from backend)\n  //   2. Subscribe to live events via GET /api/events/{sessionId}\n  //   3. Pipe those events through the side-channel callbacks for real-time UI\n  //   4. Poll messages every few seconds so chat.setMessages stays in sync\n  const reconnectAbortRef = useRef<AbortController | null>(null);\n  const pollTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);\n\n  useEffect(() => {\n    /** Fetch latest messages from backend and push into the SDK. */\n    const hydrateMessages = async () => {\n      try {\n        const [msgsRes, infoRes] = await Promise.all([\n          apiFetch(`/api/session/${sessionId}/messages`),\n          apiFetch(`/api/session/${sessionId}`),\n        ]);\n        if (!msgsRes.ok) return null;\n        const data = await msgsRes.json();\n        if (!Array.isArray(data) || data.length === 0) return null;\n\n        // Cache the raw backend messages so we can restore this session\n        // into a fresh backend if the Space restarts.\n        saveBackendMessages(sessionId, data);\n\n        let pendingIds: Set<string> | undefined;\n        if (infoRes.ok) {\n          const info = await infoRes.json();\n          if (info.pending_approval && Array.isArray(info.pending_approval)) {\n            pendingIds = new Set(\n              info.pending_approval.map((t: { tool_call_id: string }) => t.tool_call_id)\n            );\n            if (pendingIds.size > 0) setNeedsAttention(sessionId, true);\n          }\n          return { data, pendingIds, info };\n        }\n        return { data, pendingIds, info: null };\n      } catch {\n        return null;\n      }\n    };\n\n    /** Stop any running reconnection (event stream + poll). */\n    const stopReconnect = () => {\n      reconnectAbortRef.current?.abort();\n      reconnectAbortRef.current = null;\n      if (pollTimerRef.current) {\n        clearInterval(pollTimerRef.current);\n        pollTimerRef.current = null;\n      }\n    };\n\n    /** Read the event stream from GET /api/events and forward to side-channel. */\n    const consumeEventStream = async (signal: AbortSignal) => {\n      try {\n        const res = await apiFetch(`/api/events/${sessionId}`, {\n          headers: { 'Accept': 'text/event-stream' },\n          signal,\n        });\n        if (!res.ok || !res.body) return;\n\n        const reader = res.body.pipeThrough(new TextDecoderStream()).getReader();\n        let buf = '';\n        while (true) {\n          const { value, done } = await reader.read();\n          if (done || signal.aborted) break;\n          buf += value;\n          const lines = buf.split('\\n');\n          buf = lines.pop() || '';\n          for (const line of lines) {\n            const trimmed = line.trim();\n            if (!trimmed.startsWith('data: ')) continue;\n            try {\n              const event = JSON.parse(trimmed.slice(6));\n              // Forward to side-channel for real-time UI updates\n              const et = event.event_type as string;\n              if (et === 'processing') sideChannel.onProcessing();\n              else if (et === 'assistant_chunk') sideChannel.onStreaming();\n              else if (et === 'tool_call') {\n                const t = event.data?.tool as string;\n                const d = event.data?.arguments?.description as string | undefined;\n                sideChannel.onToolRunning(t, d);\n                sideChannel.onToolCallPanel(t, (event.data?.arguments || {}) as Record<string, unknown>);\n              } else if (et === 'tool_output') {\n                sideChannel.onToolOutputPanel(\n                  event.data?.tool as string,\n                  event.data?.tool_call_id as string,\n                  event.data?.output as string,\n                  event.data?.success as boolean,\n                );\n              } else if (et === 'tool_state_change') {\n                const state = event.data?.state as string;\n                const toolName = event.data?.tool as string;\n                if (state === 'running' && toolName) sideChannel.onToolRunning(toolName);\n              } else if (et === 'turn_complete' || et === 'error' || et === 'interrupted') {\n                sideChannel.onProcessingDone();\n                stopReconnect();\n                // Final hydration to get the complete message state\n                const result = await hydrateMessages();\n                if (result) {\n                  const uiMsgs = llmMessagesToUIMessages(result.data, result.pendingIds, chatActionsRef.current.messages);\n                  if (uiMsgs.length > 0) {\n                    chat.setMessages(uiMsgs);\n                    saveMessages(sessionId, uiMsgs);\n                  }\n                }\n                return;\n              } else if (et === 'approval_required') {\n                sideChannel.onApprovalRequired(\n                  (event.data?.tools || []) as Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>,\n                );\n                stopReconnect();\n                const result = await hydrateMessages();\n                if (result) {\n                  const uiMsgs = llmMessagesToUIMessages(result.data, result.pendingIds, chatActionsRef.current.messages);\n                  if (uiMsgs.length > 0) {\n                    chat.setMessages(uiMsgs);\n                    saveMessages(sessionId, uiMsgs);\n                  }\n                }\n                return;\n              }\n            } catch { /* ignore parse errors */ }\n          }\n        }\n      } catch {\n        /* stream ended or aborted */\n      }\n    };\n\n    const onVisible = async () => {\n      if (document.visibilityState !== 'visible') return;\n\n      // Always re-hydrate messages on wake\n      const result = await hydrateMessages();\n      if (!result) return;\n\n      const { data, pendingIds, info } = result;\n      const uiMsgs = llmMessagesToUIMessages(data, pendingIds, chatActionsRef.current.messages);\n      if (uiMsgs.length > 0) {\n        chat.setMessages(uiMsgs);\n        saveMessages(sessionId, uiMsgs);\n      }\n\n      // If the backend is still processing, reconnect to the live event stream\n      if (info?.is_processing) {\n        updateSession(sessionId, { isProcessing: true, activityStatus: { type: 'thinking' } });\n\n        // Stop any previous reconnection\n        stopReconnect();\n\n        // Start live event subscription\n        const abort = new AbortController();\n        reconnectAbortRef.current = abort;\n        consumeEventStream(abort.signal);\n\n        // Poll messages every 3 s so the chat message list stays up-to-date\n        // (the event stream gives us real-time status but not full message diffs)\n        pollTimerRef.current = setInterval(async () => {\n          const fresh = await hydrateMessages();\n          if (!fresh) return;\n          const msgs = llmMessagesToUIMessages(fresh.data, fresh.pendingIds, chatActionsRef.current.messages);\n\n          const currentCount = chatActionsRef.current.messages.length;\n          if (msgs.length > currentCount || currentCount === 0) {\n            chat.setMessages(msgs);\n            saveMessages(sessionId, msgs);\n          } \n\n          // If backend stopped processing, clean up\n          if (fresh.info && !fresh.info.is_processing) {\n            updateSession(sessionId, { isProcessing: false });\n            stopReconnect();\n          }\n        }, 3000);\n      }\n    };\n\n    document.addEventListener('visibilitychange', onVisible);\n    return () => {\n      document.removeEventListener('visibilitychange', onVisible);\n      stopReconnect();\n    };\n  }, [sessionId]); // eslint-disable-line react-hooks/exhaustive-deps\n\n  // -- Persist messages ---------------------------------------------------\n  const prevLenRef = useRef(initialMessages.length);\n  useEffect(() => {\n    if (chat.messages.length === 0) return;\n    if (chat.messages.length !== prevLenRef.current) {\n      prevLenRef.current = chat.messages.length;\n      saveMessages(sessionId, chat.messages);\n    } \n  }, [sessionId, chat.messages]);\n\n  // -- Undo last turn (REST call + client-side message removal) -----------\n  // With SSE there's no persistent connection to receive the undo_complete\n  // event, so we handle message removal on the frontend after a successful\n  // REST call to the backend.\n  const undoLastTurn = useCallback(async () => {\n    try {\n      const res = await apiFetch(`/api/undo/${sessionId}`, { method: 'POST' });\n      if (!res.ok) {\n        logger.error('Undo API returned', res.status);\n        return;\n      }\n      // Remove the last user turn + assistant response from the UI\n      const msgs = chatActionsRef.current.messages;\n      const setMsgs = chatActionsRef.current.setMessages;\n      if (setMsgs && msgs.length > 0) {\n        let lastUserIdx = -1;\n        for (let i = msgs.length - 1; i >= 0; i--) {\n          if (msgs[i].role === 'user') { lastUserIdx = i; break; }\n        }\n        const updated = lastUserIdx > 0 ? msgs.slice(0, lastUserIdx) : [];\n        setMsgs(updated);\n        saveMessages(sessionId, updated);\n      }\n      updateSession(sessionId, { isProcessing: false });\n    } catch (e) {\n      logger.error('Undo failed:', e);\n    }\n  }, [sessionId, updateSession]);\n\n  // -- Approve tools ------------------------------------------------------\n  const approveTools = useCallback(\n    async (approvals: Array<{ tool_call_id: string; approved: boolean; feedback?: string | null; edited_script?: string | null }>) => {\n      // Store edited scripts so the transport can read them when sendMessages is called\n      for (const a of approvals) {\n        if (a.edited_script) {\n          useAgentStore.getState().setEditedScript(a.tool_call_id, a.edited_script);\n        }\n      }\n\n      // Update SDK tool state — this triggers sendMessages() via the transport\n      for (const a of approvals) {\n        chat.addToolApprovalResponse({\n          id: `approval-${a.tool_call_id}`,\n          approved: a.approved,\n          reason: a.approved ? undefined : (a.feedback || 'Rejected by user'),\n        });\n      }\n\n      setNeedsAttention(sessionId, false);\n      const hasApproved = approvals.some(a => a.approved);\n      if (hasApproved) {\n        updateSession(sessionId, {\n          isProcessing: true,\n          activityStatus: { type: 'thinking' },\n        });\n      }\n\n      // Persist updated tool states so a page refresh during execution\n      // won't restore stale approval-requested state from localStorage.\n      saveMessages(sessionId, chatActionsRef.current.messages);\n\n      return true;\n    },\n    [sessionId, chat, updateSession, setNeedsAttention],\n  );\n\n  // -- Stop (interrupt backend agent loop, keep SSE open for events) --------\n  const stop = useCallback(() => {\n    // Don't call chat.stop() — keep the SSE stream open so the backend's\n    // tool_state_change(cancelled) and interrupted events reach the frontend.\n    // The stream closes naturally when the backend sends finish events.\n    updateSession(sessionId, { isProcessing: false });\n    apiFetch(`/api/interrupt/${sessionId}`, { method: 'POST' }).catch(() => {});\n  }, [sessionId, updateSession]);\n\n  // -- Edit message + regenerate from that point ----------------------------\n  const editAndRegenerate = useCallback(async (messageId: string, newText: string) => {\n    try {\n      const msgs = chatActionsRef.current.messages;\n      const setMsgs = chatActionsRef.current.setMessages;\n      if (!setMsgs) return;\n\n      // Find the target message and compute user message index (0-indexed, skipping system)\n      const msgIndex = msgs.findIndex(m => m.id === messageId);\n      if (msgIndex < 0) return;\n\n      let userMsgIndex = 0;\n      for (let i = 0; i < msgIndex; i++) {\n        if (msgs[i].role === 'user') userMsgIndex++;\n      }\n\n      // 1. Truncate backend history\n      const res = await apiFetch(`/api/truncate/${sessionId}`, {\n        method: 'POST',\n        body: JSON.stringify({ user_message_index: userMsgIndex }),\n        headers: { 'Content-Type': 'application/json' },\n      });\n      if (!res.ok) {\n        logger.error('Truncate API returned', res.status);\n        return;\n      }\n\n      // 2. Truncate frontend messages\n      const truncated = msgs.slice(0, msgIndex);\n      setMsgs(truncated);\n      saveMessages(sessionId, truncated);\n\n      // 3. Send the edited message (reuses existing transport + /api/chat)\n      chat.sendMessage({ text: newText, metadata: { createdAt: new Date().toISOString() } });\n    } catch (e) {\n      logger.error('Edit and regenerate failed:', e);\n    }\n  }, [sessionId, chat]);\n\n  return {\n    messages: chat.messages,\n    sendMessage: chat.sendMessage,\n    stop,\n    status: chat.status,\n    undoLastTurn,\n    editAndRegenerate,\n    approveTools,\n  };\n}\n"
  },
  {
    "path": "frontend/src/hooks/useAuth.ts",
    "content": "/**\n * Authentication hook — simple server-side OAuth.\n *\n * - Hors iframe: /auth/login redirect (cookies work fine)\n * - Dans iframe: show \"Open in full page\" link\n *\n * Token is stored via HttpOnly cookie by the backend.\n * In dev mode (no OAUTH_CLIENT_ID), auth is bypassed.\n */\n\nimport { useEffect } from 'react';\nimport { useAgentStore } from '@/store/agentStore';\nimport { logger } from '@/utils/logger';\n\n/** Check if we're running inside an iframe. */\nexport function isInIframe(): boolean {\n  try {\n    return window.top !== window.self;\n  } catch {\n    return true; // SecurityError = cross-origin iframe\n  }\n}\n\n/** Redirect to the server-side OAuth login. */\nexport function triggerLogin(): void {\n  window.location.href = '/auth/login';\n}\n\n/**\n * Hook: on mount, check if user is authenticated.\n * Sets user in the agent store.\n */\nexport function useAuth() {\n  const setUser = useAgentStore((s) => s.setUser);\n\n  useEffect(() => {\n    let cancelled = false;\n\n    async function checkAuth() {\n      try {\n        // Check if user is already authenticated (cookie-based)\n        const response = await fetch('/auth/me', { credentials: 'include' });\n        if (response.ok) {\n          const data = await response.json();\n          if (!cancelled && data.authenticated) {\n            setUser({\n              authenticated: true,\n              username: data.username,\n              name: data.name,\n              picture: data.picture,\n            });\n            logger.log('Authenticated as', data.username);\n            return;\n          }\n        }\n\n        // Not authenticated — check if auth is enabled\n        const statusRes = await fetch('/auth/status', { credentials: 'include' });\n        const statusData = await statusRes.json();\n        if (!statusData.auth_enabled) {\n          // Dev mode — no OAuth configured\n          if (!cancelled) setUser({ authenticated: true, username: 'dev' });\n          return;\n        }\n\n        // Auth enabled but not logged in — welcome screen will handle it\n        if (!cancelled) setUser(null);\n      } catch {\n        // Backend unreachable — assume dev mode\n        if (!cancelled) setUser({ authenticated: true, username: 'dev' });\n      }\n    }\n\n    checkAuth();\n    return () => { cancelled = true; };\n  }, [setUser]);\n}\n"
  },
  {
    "path": "frontend/src/hooks/useOrgMembership.ts",
    "content": "/**\n * Polls backend for org membership status.\n * When membership is detected, updates the user in the agent store\n * and closes any org-join popup that was opened.\n */\nimport { useEffect, useRef } from 'react';\nimport { useAgentStore } from '@/store/agentStore';\n\nconst POLL_INTERVAL_MS = 3000;\n\n/**\n * @param enabled  Only poll when true (user is authenticated but not yet confirmed as org member)\n * @returns popupRef — assign `window.open()` result to `.current` so the hook can auto-close it\n */\nexport function useOrgMembership(enabled: boolean) {\n  const user = useAgentStore((s) => s.user);\n  const setUser = useAgentStore((s) => s.setUser);\n  const popupRef = useRef<Window | null>(null);\n\n  useEffect(() => {\n    if (!enabled || user?.orgMember) return;\n\n    let cancelled = false;\n\n    const check = async () => {\n      try {\n        const res = await fetch('/auth/org-membership', { credentials: 'include' });\n        if (!res.ok || cancelled) return;\n        const data = await res.json();\n        if (cancelled) return;\n        if (data.is_member && user) {\n          setUser({ ...user, orgMember: true });\n          try { popupRef.current?.close(); } catch { /* cross-origin or already closed */ }\n          popupRef.current = null;\n        }\n      } catch { /* backend unreachable — skip */ }\n    };\n\n    check();\n    const id = setInterval(check, POLL_INTERVAL_MS);\n    return () => { cancelled = true; clearInterval(id); };\n  }, [enabled, user?.orgMember, user, setUser]);\n\n  return popupRef;\n}\n"
  },
  {
    "path": "frontend/src/hooks/useUserQuota.ts",
    "content": "/**\n * Reads the current user's Claude daily quota + plan tier from the backend.\n *\n * Fetches once when the user becomes authenticated, and exposes a `refresh()`\n * that callers invoke after a successful session-create / model-switch so the\n * chip reflects the new count without a full page reload.\n */\nimport { useCallback, useEffect, useState } from 'react';\nimport { useAgentStore } from '@/store/agentStore';\nimport { apiFetch } from '@/utils/api';\n\nexport type PlanTier = 'free' | 'pro' | 'org';\n\nexport interface UserQuota {\n  plan: PlanTier;\n  claudeUsedToday: number;\n  claudeDailyCap: number;\n  claudeRemaining: number;\n}\n\nexport function useUserQuota() {\n  const user = useAgentStore((s) => s.user);\n  const [quota, setQuota] = useState<UserQuota | null>(null);\n  const [loading, setLoading] = useState(false);\n\n  const refresh = useCallback(async () => {\n    if (!user?.authenticated) return;\n    setLoading(true);\n    try {\n      const res = await apiFetch('/api/user/quota');\n      if (!res.ok) return;\n      const data = await res.json();\n      setQuota({\n        plan: (data.plan ?? 'free') as PlanTier,\n        claudeUsedToday: data.claude_used_today ?? 0,\n        claudeDailyCap: data.claude_daily_cap ?? 1,\n        claudeRemaining: data.claude_remaining ?? 0,\n      });\n    } catch {\n      /* backend unreachable — leave previous value */\n    } finally {\n      setLoading(false);\n    }\n  }, [user?.authenticated]);\n\n  useEffect(() => {\n    refresh();\n  }, [refresh]);\n\n  return { quota, loading, refresh };\n}\n"
  },
  {
    "path": "frontend/src/lib/backend-message-store.ts",
    "content": "/**\n * localStorage cache of raw backend (litellm Message) dicts keyed by\n * session ID. Used to restore a session into a fresh backend after the\n * Space restarts — the browser-side UIMessages are what the user sees,\n * but the LLM needs the backend format to continue the conversation.\n */\nimport { logger } from '@/utils/logger';\n\nconst STORAGE_KEY = 'hf-agent-backend-messages';\nconst MAX_SESSIONS = 50;\n\ntype MessagesMap = Record<string, unknown[]>;\n\nfunction readAll(): MessagesMap {\n  try {\n    const raw = localStorage.getItem(STORAGE_KEY);\n    if (!raw) return {};\n    const parsed = JSON.parse(raw);\n    if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {\n      return parsed as MessagesMap;\n    }\n    return {};\n  } catch {\n    return {};\n  }\n}\n\nfunction writeAll(map: MessagesMap): void {\n  try {\n    localStorage.setItem(STORAGE_KEY, JSON.stringify(map));\n  } catch (e) {\n    // Quota exceeded is the most common reason — the cache is best-effort.\n    logger.warn('Failed to persist backend messages:', e);\n  }\n}\n\nexport function loadBackendMessages(sessionId: string): unknown[] {\n  const map = readAll();\n  return map[sessionId] ?? [];\n}\n\nexport function saveBackendMessages(sessionId: string, messages: unknown[]): void {\n  const map = readAll();\n  map[sessionId] = messages;\n\n  const keys = Object.keys(map);\n  if (keys.length > MAX_SESSIONS) {\n    const toRemove = keys.slice(0, keys.length - MAX_SESSIONS);\n    for (const k of toRemove) delete map[k];\n  }\n\n  writeAll(map);\n}\n\nexport function moveBackendMessages(fromId: string, toId: string): void {\n  const map = readAll();\n  if (!map[fromId]) return;\n  map[toId] = map[fromId];\n  delete map[fromId];\n  writeAll(map);\n}\n\nexport function deleteBackendMessages(sessionId: string): void {\n  const map = readAll();\n  delete map[sessionId];\n  writeAll(map);\n}\n"
  },
  {
    "path": "frontend/src/lib/chat-message-store.ts",
    "content": "/**\n * Lightweight localStorage persistence for UIMessage arrays,\n * keyed by session ID.\n *\n * Uses the same storage namespace (`hf-agent-messages`) that the\n * old Zustand-based store used, so existing data is compatible.\n */\nimport type { UIMessage } from 'ai';\nimport { logger } from '@/utils/logger';\n\nconst STORAGE_KEY = 'hf-agent-messages';\nconst MAX_SESSIONS = 50;\n\ntype MessagesMap = Record<string, UIMessage[]>;\n\nfunction readAll(): MessagesMap {\n  try {\n    const raw = localStorage.getItem(STORAGE_KEY);\n    if (!raw) return {};\n    const parsed = JSON.parse(raw);\n    // Legacy format was { messagesBySession: {...} }\n    if (parsed.messagesBySession) return parsed.messagesBySession;\n    // New flat format\n    if (typeof parsed === 'object' && !Array.isArray(parsed)) return parsed;\n    return {};\n  } catch {\n    return {};\n  }\n}\n\nfunction writeAll(map: MessagesMap): void {\n  try {\n    localStorage.setItem(STORAGE_KEY, JSON.stringify(map));\n  } catch (e) {\n    logger.warn('Failed to persist messages:', e);\n  }\n}\n\nexport function loadMessages(sessionId: string): UIMessage[] {\n  const map = readAll();\n  const messages = map[sessionId] ?? [];\n  return messages;\n}\n\nexport function saveMessages(sessionId: string, messages: UIMessage[]): void {\n  const map = readAll();\n  map[sessionId] = messages;\n\n  // Evict oldest sessions if we exceed the cap\n  const keys = Object.keys(map);\n  if (keys.length > MAX_SESSIONS) {\n    const toRemove = keys.slice(0, keys.length - MAX_SESSIONS);\n    for (const k of toRemove) delete map[k];\n  }\n\n  writeAll(map);\n}\n\nexport function deleteMessages(sessionId: string): void {\n  const map = readAll();\n  delete map[sessionId];\n  writeAll(map);\n}\n\nexport function moveMessages(fromId: string, toId: string): void {\n  const map = readAll();\n  if (!map[fromId]) return;\n  map[toId] = map[fromId];\n  delete map[fromId];\n  writeAll(map);\n}\n"
  },
  {
    "path": "frontend/src/lib/convert-llm-messages.ts",
    "content": "/**\n * Convert backend LLM messages (litellm format) to Vercel AI SDK UIMessage format.\n */\nimport type { UIMessage } from 'ai';\n\ninterface LLMToolCall {\n  id: string;\n  function: { name: string; arguments: string };\n}\n\ninterface LLMMessage {\n  role: 'user' | 'assistant' | 'tool' | 'system';\n  content: string | null;\n  tool_calls?: LLMToolCall[] | null;\n  tool_call_id?: string | null;\n  name?: string | null;\n}\n\n// Generate stable IDs based on message position to prevent duplicate renders\n// when the same message is re-converted multiple times (e.g., during polling)\nlet uiMessageCounter = 0;\nfunction nextId(): string {\n  return `msg-${++uiMessageCounter}`;\n}\n\n/**\n * @param pendingApprovalIds - Set of tool_call_ids that are waiting for approval.\n *   When provided, matching tool calls without results will get state\n *   'approval-requested' instead of 'input-available'.\n * @param existingUIMessages - Current UI messages to preserve IDs when content matches.\n *   This prevents React from re-rendering messages with new IDs during polling.\n */\nexport function llmMessagesToUIMessages(\n  messages: LLMMessage[],\n  pendingApprovalIds?: Set<string>,\n  existingUIMessages?: UIMessage[],\n): UIMessage[] {\n  // Build a map of tool_call_id -> tool result for pairing\n  const toolResults = new Map<string, { output: string; isError: boolean }>();\n  for (const msg of messages) {\n    if (msg.role === 'tool' && msg.tool_call_id) {\n      toolResults.set(msg.tool_call_id, {\n        output: msg.content || '',\n        isError: false,\n      });\n    }\n  }\n\n  const uiMessages: UIMessage[] = [];\n\n  // Helper to get existing message ID at a given position if roles match\n  const getExistingId = (index: number, role: 'user' | 'assistant'): string | null => {\n    if (!existingUIMessages || index >= existingUIMessages.length) return null;\n    const existing = existingUIMessages[index];\n    return existing.role === role ? existing.id : null;\n  };\n\n  for (const msg of messages) {\n    if (msg.role === 'system') continue;\n    if (msg.role === 'tool') continue; // handled via tool_calls pairing\n\n    if (msg.role === 'user') {\n      // Skip internal system-style nudges (doom-loop correction, compact\n      // hints, restore notices, etc.) — they're meant for the LLM, not\n      // the user. They always start with \"[SYSTEM:\".\n      if (typeof msg.content === 'string' && msg.content.trimStart().startsWith('[SYSTEM:')) {\n        continue;\n      }\n      // Try to reuse existing ID if the message at this position matches\n      const existingId = getExistingId(uiMessages.length, 'user');\n      uiMessages.push({\n        id: existingId || nextId(),\n        role: 'user',\n        parts: [{ type: 'text', text: msg.content || '' }],\n      });\n      continue;\n    }\n\n    if (msg.role === 'assistant') {\n      const parts: UIMessage['parts'] = [];\n\n      if (msg.content) {\n        parts.push({ type: 'text', text: msg.content });\n      }\n\n      if (msg.tool_calls) {\n        for (const tc of msg.tool_calls) {\n          let input: Record<string, unknown> = {};\n          try {\n            input = JSON.parse(tc.function.arguments);\n          } catch { /* malformed */ }\n\n          const result = toolResults.get(tc.id);\n          if (result) {\n            parts.push({\n              type: 'dynamic-tool',\n              toolCallId: tc.id,\n              toolName: tc.function.name,\n              state: 'output-available',\n              input,\n              output: result.output,\n            });\n          } else if (pendingApprovalIds?.has(tc.id)) {\n            parts.push({\n              type: 'dynamic-tool',\n              toolCallId: tc.id,\n              toolName: tc.function.name,\n              state: 'approval-requested',\n              input,\n              approval: { id: `approval-${tc.id}` },\n            });\n          } else {\n            parts.push({\n              type: 'dynamic-tool',\n              toolCallId: tc.id,\n              toolName: tc.function.name,\n              state: 'input-available',\n              input,\n            });\n          }\n        }\n      }\n\n      // During live streaming the SDK groups all text + tool parts between\n      // user messages into one assistant UIMessage (one start/finish pair per\n      // turn).  The backend stores multiple assistant messages per turn (one\n      // per LLM API call), so merge consecutive assistant messages to match.\n      const prev = uiMessages[uiMessages.length - 1];\n      if (prev && prev.role === 'assistant') {\n        prev.parts.push(...parts);\n      } else {\n        // Try to reuse existing ID if the message at this position matches\n        const existingId = getExistingId(uiMessages.length, 'assistant');\n        const newId = existingId || nextId();\n        uiMessages.push({\n          id: newId,\n          role: 'assistant',\n          parts,\n        });\n      }\n    }\n  }\n\n  return uiMessages;\n}\n\n\ninterface ToolPart {\n  type: string;\n  toolCallId?: string;\n  toolName?: string;\n  state?: string;\n  input?: unknown;\n  output?: unknown;\n  errorText?: string;\n}\n\nfunction joinText(parts: UIMessage['parts']): string {\n  return parts\n    .filter((p): p is { type: 'text'; text: string } => p.type === 'text')\n    .map((p) => p.text)\n    .join('');\n}\n\nfunction stringifyOutput(output: unknown): string {\n  if (output == null) return '';\n  if (typeof output === 'string') return output;\n  try {\n    return JSON.stringify(output);\n  } catch {\n    return String(output);\n  }\n}\n\n/**\n * Reverse of llmMessagesToUIMessages — used as a fallback when we need to\n * restore a session but only have the UIMessage cache (e.g. the session\n * predates the backend-message cache feature).\n *\n * Includes every tool call the assistant made, regardless of the part's\n * stored state. If we have a captured output (or errorText), we emit a\n * paired role=tool result. If we don't, we leave the tool_call dangling —\n * the backend's ContextManager patches those via _patch_dangling_tool_calls.\n */\nexport function uiMessagesToLLMMessages(uiMessages: UIMessage[]): LLMMessage[] {\n  const out: LLMMessage[] = [];\n  for (const msg of uiMessages) {\n    if (msg.role === 'user') {\n      const text = joinText(msg.parts);\n      if (text) out.push({ role: 'user', content: text });\n      continue;\n    }\n    if (msg.role === 'assistant') {\n      const text = joinText(msg.parts);\n      const toolCalls: LLMToolCall[] = [];\n      const pairedResults: Array<{ id: string; content: string }> = [];\n      for (const raw of msg.parts as ToolPart[]) {\n        if (!raw.type) continue;\n        const isTool = raw.type === 'dynamic-tool' || raw.type.startsWith('tool-');\n        if (!isTool) continue;\n        const toolCallId = raw.toolCallId;\n        const toolName =\n          raw.toolName ?? (raw.type.startsWith('tool-') ? raw.type.slice(5) : undefined);\n        if (!toolCallId || !toolName) continue;\n\n        toolCalls.push({\n          id: toolCallId,\n          function: {\n            name: toolName,\n            arguments: JSON.stringify(raw.input ?? {}),\n          },\n        });\n\n        // Prefer output; fall back to errorText for output-error /\n        // output-denied. A missing result leaves the tool_call dangling —\n        // the backend will patch it with a synthesized stub.\n        const result =\n          raw.output != null\n            ? stringifyOutput(raw.output)\n            : typeof raw.errorText === 'string' && raw.errorText\n              ? raw.errorText\n              : null;\n        if (result != null) {\n          pairedResults.push({ id: toolCallId, content: result });\n        }\n      }\n      if (text || toolCalls.length) {\n        out.push({\n          role: 'assistant',\n          content: text || null,\n          tool_calls: toolCalls.length ? toolCalls : null,\n        });\n      }\n      for (const r of pairedResults) {\n        out.push({ role: 'tool', content: r.content, tool_call_id: r.id });\n      }\n    }\n  }\n  return out;\n}\n"
  },
  {
    "path": "frontend/src/lib/research-store.ts",
    "content": "/**\n * Persist research sub-agent state (steps + stats) per session.\n * Survives page refresh so the rolling display isn't lost mid-research.\n */\nimport type { PerSessionState } from '@/store/agentStore';\n\n/** Max steps to keep in storage and display. Single source of truth. */\nexport const RESEARCH_MAX_STEPS = 4;\n\nconst STORAGE_KEY = 'hf-agent-research';\n\ntype ResearchState = {\n  steps: string[];\n  stats: PerSessionState['researchStats'];\n};\n\ntype ResearchMap = Record<string, ResearchState>;\n\nfunction readAll(): ResearchMap {\n  try {\n    const raw = localStorage.getItem(STORAGE_KEY);\n    return raw ? JSON.parse(raw) : {};\n  } catch {\n    return {};\n  }\n}\n\nfunction writeAll(map: ResearchMap): void {\n  try {\n    localStorage.setItem(STORAGE_KEY, JSON.stringify(map));\n  } catch { /* quota exceeded — ignore */ }\n}\n\nexport function saveResearch(\n  sessionId: string,\n  steps: string[],\n  stats: PerSessionState['researchStats'],\n): void {\n  const map = readAll();\n  map[sessionId] = {\n    steps: steps.slice(-RESEARCH_MAX_STEPS),\n    stats,\n  };\n  writeAll(map);\n}\n\nexport function loadResearch(sessionId: string): ResearchState | null {\n  const map = readAll();\n  return map[sessionId] ?? null;\n}\n\nexport function clearResearch(sessionId: string): void {\n  const map = readAll();\n  delete map[sessionId];\n  writeAll(map);\n}\n"
  },
  {
    "path": "frontend/src/lib/sse-chat-transport.ts",
    "content": "/**\n * SSE-based ChatTransport that bridges our backend event protocol\n * to the Vercel AI SDK's UIMessageChunk streaming interface.\n *\n * Each sendMessages() call does a POST → SSE response.\n * One request per turn phase (initial message, or approval continuation).\n */\nimport type { ChatTransport, UIMessage, UIMessageChunk, ChatRequestOptions } from 'ai';\nimport { apiFetch } from '@/utils/api';\nimport { logger } from '@/utils/logger';\nimport type { AgentEvent } from '@/types/events';\nimport { useAgentStore } from '@/store/agentStore';\n\n// ---------------------------------------------------------------------------\n// Side-channel callback interface (non-chat events forwarded to the store)\n// ---------------------------------------------------------------------------\nexport interface SideChannelCallbacks {\n  onReady: () => void;\n  onShutdown: () => void;\n  onError: (error: string) => void;\n  onProcessing: () => void;\n  onProcessingDone: () => void;\n  onUndoComplete: () => void;\n  onCompacted: (oldTokens: number, newTokens: number) => void;\n  onPlanUpdate: (plan: Array<{ id: string; content: string; status: string }>) => void;\n  onToolLog: (tool: string, log: string, agentId?: string, label?: string) => void;\n  onConnectionChange: (connected: boolean) => void;\n  onSessionDead: (sessionId: string) => void;\n  onApprovalRequired: (tools: Array<{ tool: string; arguments: Record<string, unknown>; tool_call_id: string }>) => void;\n  onToolCallPanel: (tool: string, args: Record<string, unknown>) => void;\n  onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;\n  onStreaming: () => void;\n  onToolRunning: (toolName: string, description?: string) => void;\n  onInterrupted: () => void;\n}\n\n// ---------------------------------------------------------------------------\n// Helpers\n// ---------------------------------------------------------------------------\nlet partIdCounter = 0;\nfunction nextPartId(prefix: string): string {\n  return `${prefix}-${Date.now()}-${++partIdCounter}`;\n}\n\n/** Parse an SSE text stream into AgentEvent objects. */\nfunction createSSEParserStream(): TransformStream<string, AgentEvent> {\n  let buffer = '';\n  return new TransformStream<string, AgentEvent>({\n    transform(chunk, controller) {\n      buffer += chunk;\n      const lines = buffer.split('\\n');\n      // Keep the last (possibly incomplete) line in the buffer\n      buffer = lines.pop() || '';\n      for (const line of lines) {\n        const trimmed = line.trim();\n        if (trimmed.startsWith('data: ')) {\n          try {\n            const json = JSON.parse(trimmed.slice(6));\n            controller.enqueue(json as AgentEvent);\n          } catch {\n            logger.warn('SSE parse error:', trimmed);\n          }\n        }\n      }\n    },\n    flush(controller) {\n      // Process any remaining data in buffer\n      if (buffer.trim().startsWith('data: ')) {\n        try {\n          const json = JSON.parse(buffer.trim().slice(6));\n          controller.enqueue(json as AgentEvent);\n        } catch { /* ignore incomplete */ }\n      }\n    },\n  });\n}\n\n/** Transform AgentEvent objects into UIMessageChunk objects for the Vercel AI SDK. */\nfunction createEventToChunkStream(sideChannel: SideChannelCallbacks): TransformStream<AgentEvent, UIMessageChunk> {\n  let textPartId: string | null = null;\n\n  function endTextPart(controller: TransformStreamDefaultController<UIMessageChunk>) {\n    if (textPartId) {\n      controller.enqueue({ type: 'text-end', id: textPartId });\n      textPartId = null;\n    }\n  }\n\n  return new TransformStream<AgentEvent, UIMessageChunk>({\n    transform(event, controller) {\n      switch (event.event_type) {\n        // -- Side-channel only events ----------------------------------------\n        case 'ready':\n          sideChannel.onReady();\n          break;\n\n        case 'shutdown':\n          endTextPart(controller);\n          controller.enqueue({ type: 'finish-step' });\n          controller.enqueue({ type: 'finish', finishReason: 'stop' });\n          sideChannel.onShutdown();\n          break;\n\n        case 'interrupted':\n          endTextPart(controller);\n          controller.enqueue({ type: 'finish-step' });\n          controller.enqueue({ type: 'finish', finishReason: 'stop' });\n          sideChannel.onInterrupted();\n          sideChannel.onProcessingDone();\n          break;\n\n        case 'undo_complete':\n          endTextPart(controller);\n          sideChannel.onUndoComplete();\n          break;\n\n        case 'compacted':\n          sideChannel.onCompacted(\n            (event.data?.old_tokens as number) || 0,\n            (event.data?.new_tokens as number) || 0,\n          );\n          break;\n\n        case 'plan_update':\n          sideChannel.onPlanUpdate(\n            (event.data?.plan as Array<{ id: string; content: string; status: string }>) || [],\n          );\n          break;\n\n        case 'tool_log':\n          sideChannel.onToolLog(\n            (event.data?.tool as string) || '',\n            (event.data?.log as string) || '',\n            (event.data?.agent_id as string) || '',\n            (event.data?.label as string) || '',\n          );\n          break;\n\n        // -- Chat stream events ----------------------------------------------\n        case 'processing':\n          sideChannel.onProcessing();\n          controller.enqueue({ type: 'start', messageMetadata: { createdAt: new Date().toISOString() } });\n          controller.enqueue({ type: 'start-step' });\n          break;\n\n        case 'assistant_chunk': {\n          const delta = (event.data?.content as string) || '';\n          if (!delta) break;\n          if (!textPartId) {\n            textPartId = nextPartId('text');\n            controller.enqueue({ type: 'text-start', id: textPartId });\n            sideChannel.onStreaming();\n          }\n          controller.enqueue({ type: 'text-delta', id: textPartId, delta });\n          break;\n        }\n\n        case 'assistant_stream_end':\n          endTextPart(controller);\n          break;\n\n        case 'assistant_message': {\n          const content = (event.data?.content as string) || '';\n          if (!content) break;\n          const id = nextPartId('text');\n          controller.enqueue({ type: 'text-start', id });\n          controller.enqueue({ type: 'text-delta', id, delta: content });\n          controller.enqueue({ type: 'text-end', id });\n          break;\n        }\n\n        case 'tool_call': {\n          const toolName = (event.data?.tool as string) || 'unknown';\n          const toolCallId = (event.data?.tool_call_id as string) || '';\n          const args = (event.data?.arguments as Record<string, unknown>) || {};\n          if (toolName === 'plan_tool') break;\n\n          endTextPart(controller);\n          controller.enqueue({ type: 'tool-input-start', toolCallId, toolName, dynamic: true });\n          controller.enqueue({ type: 'tool-input-available', toolCallId, toolName, input: args, dynamic: true });\n\n          sideChannel.onToolRunning(toolName, (args as Record<string, unknown>)?.description as string | undefined);\n          sideChannel.onToolCallPanel(toolName, args as Record<string, unknown>);\n          break;\n        }\n\n        case 'tool_output': {\n          const toolCallId = (event.data?.tool_call_id as string) || '';\n          const output = (event.data?.output as string) || '';\n          const success = event.data?.success as boolean;\n          const toolName = (event.data?.tool as string) || '';\n          if (toolName === 'plan_tool' || toolCallId.startsWith('plan_tool')) break;\n\n          if (success) {\n            controller.enqueue({ type: 'tool-output-available', toolCallId, output, dynamic: true });\n          } else {\n            controller.enqueue({ type: 'tool-output-error', toolCallId, errorText: output, dynamic: true });\n          }\n          sideChannel.onToolOutputPanel(toolName, toolCallId, output, success);\n          break;\n        }\n\n        case 'approval_required': {\n          const tools = event.data?.tools as Array<{\n            tool: string;\n            arguments: Record<string, unknown>;\n            tool_call_id: string;\n          }>;\n          if (!tools) break;\n\n          endTextPart(controller);\n          for (const t of tools) {\n            controller.enqueue({ type: 'tool-input-start', toolCallId: t.tool_call_id, toolName: t.tool, dynamic: true });\n            controller.enqueue({ type: 'tool-input-available', toolCallId: t.tool_call_id, toolName: t.tool, input: t.arguments, dynamic: true });\n            controller.enqueue({ type: 'tool-approval-request', approvalId: `approval-${t.tool_call_id}`, toolCallId: t.tool_call_id });\n          }\n          sideChannel.onApprovalRequired(tools);\n          // DON'T emit finish here — the stream will close naturally and the SDK\n          // will see there's a pending approval. The SDK calls sendMessages again\n          // after addToolApprovalResponse.\n          break;\n        }\n\n        case 'tool_state_change': {\n          const tcId = (event.data?.tool_call_id as string) || '';\n          const state = (event.data?.state as string) || '';\n          const toolName = (event.data?.tool as string) || '';\n          const jobUrl = (event.data?.jobUrl as string) || undefined;\n\n          if (tcId.startsWith('plan_tool')) break;\n\n          if (jobUrl && tcId) {\n            useAgentStore.getState().setJobUrl(tcId, jobUrl);\n          }\n          if (state === 'running' && toolName) {\n            sideChannel.onToolRunning(toolName);\n          }\n          if (state === 'rejected' || state === 'abandoned') {\n            controller.enqueue({ type: 'tool-output-denied', toolCallId: tcId });\n          }\n          if (state === 'cancelled') {\n            controller.enqueue({ type: 'tool-output-error', toolCallId: tcId, errorText: 'Cancelled by user', dynamic: true });\n          }\n          break;\n        }\n\n        case 'turn_complete':\n          endTextPart(controller);\n          controller.enqueue({ type: 'finish-step' });\n          controller.enqueue({ type: 'finish', finishReason: 'stop' });\n          sideChannel.onProcessingDone();\n          break;\n\n        case 'error': {\n          const errorMsg = (event.data?.error as string) || 'Unknown error';\n          endTextPart(controller);\n          controller.enqueue({ type: 'finish-step' });\n          controller.enqueue({ type: 'finish', finishReason: 'error' });\n          sideChannel.onError(errorMsg);\n          sideChannel.onProcessingDone();\n          break;\n        }\n\n        default:\n          logger.log('SSE transport: unknown event', event);\n      }\n    },\n  });\n}\n\n// ---------------------------------------------------------------------------\n// Transport implementation\n// ---------------------------------------------------------------------------\nexport class SSEChatTransport implements ChatTransport<UIMessage> {\n  private sessionId: string;\n  private sideChannel: SideChannelCallbacks;\n\n  constructor(sessionId: string, sideChannel: SideChannelCallbacks) {\n    this.sessionId = sessionId;\n    this.sideChannel = sideChannel;\n    // Mark as connected immediately — no persistent connection to establish\n    // Defer to avoid setState during render\n    queueMicrotask(() => sideChannel.onConnectionChange(true));\n  }\n\n  updateSideChannel(sideChannel: SideChannelCallbacks): void {\n    this.sideChannel = sideChannel;\n  }\n\n  destroy(): void {\n    // Nothing to clean up — no persistent connections\n  }\n\n  // -- ChatTransport interface ---------------------------------------------\n\n  async sendMessages(\n    options: {\n      trigger: 'submit-message' | 'regenerate-message';\n      chatId: string;\n      messageId: string | undefined;\n      messages: UIMessage[];\n      abortSignal: AbortSignal | undefined;\n    } & ChatRequestOptions,\n  ): Promise<ReadableStream<UIMessageChunk>> {\n    const sessionId = this.sessionId;\n\n    // Detect: is this an approval continuation or a new user message?\n    // After addToolApprovalResponse, the SDK calls sendMessages again.\n    // The last assistant message will have tool parts in 'approval-responded' state.\n    const lastAssistant = [...options.messages].reverse().find(m => m.role === 'assistant');\n    const approvedParts = lastAssistant?.parts.filter(\n      (p) => p.type === 'dynamic-tool' && p.state === 'approval-responded'\n    ) || [];\n\n    let body: Record<string, unknown>;\n    if (approvedParts.length > 0) {\n      // Approval continuation — extract approval decisions\n      const approvals = approvedParts.map((p) => {\n        if (p.type !== 'dynamic-tool') return null;\n        const approved = p.approval?.approved ?? true;\n        // Get edited script from agentStore if available\n        const editedScript = useAgentStore.getState().getEditedScript(p.toolCallId);\n        return {\n          tool_call_id: p.toolCallId,\n          approved,\n          feedback: approved ? null : (p.approval?.reason || 'Rejected by user'),\n          edited_script: editedScript ?? null,\n        };\n      }).filter(Boolean);\n      body = { approvals };\n    } else {\n      // Normal user message\n      const lastUserMsg = [...options.messages].reverse().find(m => m.role === 'user');\n      const text = lastUserMsg\n        ? lastUserMsg.parts\n            .filter((p): p is Extract<typeof p, { type: 'text' }> => p.type === 'text')\n            .map(p => p.text)\n            .join('')\n        : '';\n      body = { text };\n    }\n\n    // POST to SSE endpoint\n    const response = await apiFetch(`/api/chat/${sessionId}`, {\n      method: 'POST',\n      body: JSON.stringify(body),\n      signal: options.abortSignal,\n      headers: {\n        'Content-Type': 'application/json',\n        'Accept': 'text/event-stream',\n      },\n    });\n\n    if (response.status === 404) {\n      // Backend lost this session (e.g. Space restart). Signal the UI so\n      // it can flag the session for the catch-up banner.\n      this.sideChannel.onSessionDead(sessionId);\n    }\n    if (response.status === 429) {\n      // Claude daily-quota gate tripped. The prefix is the detection marker\n      // for useAgentChat's onError handler, which surfaces the cap dialog\n      // instead of a generic error banner.\n      throw new Error('CLAUDE_QUOTA_EXHAUSTED');\n    }\n    if (!response.ok) {\n      const errorText = await response.text().catch(() => 'Request failed');\n      throw new Error(`Chat request failed: ${response.status} ${errorText}`);\n    }\n\n    if (!response.body) {\n      throw new Error('No response body');\n    }\n\n    // Pipe: response bytes → text → SSE events → UIMessageChunks\n    return response.body\n      .pipeThrough(new TextDecoderStream())\n      .pipeThrough(createSSEParserStream())\n      .pipeThrough(createEventToChunkStream(this.sideChannel));\n  }\n\n  async reconnectToStream(): Promise<ReadableStream<UIMessageChunk> | null> {\n    // Check if the backend session is still processing a turn.\n    // If so, subscribe to its event stream so we can resume live updates\n    // (e.g. after page refresh or wake-from-sleep reconnection).\n    try {\n      const infoRes = await apiFetch(`/api/session/${this.sessionId}`);\n      if (!infoRes.ok) return null;\n      const info = await infoRes.json();\n      if (!info.is_processing) return null;\n\n      // Session is mid-turn — subscribe to its event broadcast.\n      const response = await apiFetch(`/api/events/${this.sessionId}`, {\n        headers: { 'Accept': 'text/event-stream' },\n      });\n      if (!response.ok || !response.body) return null;\n\n      this.sideChannel.onProcessing();\n\n      return response.body\n        .pipeThrough(new TextDecoderStream())\n        .pipeThrough(createSSEParserStream())\n        .pipeThrough(createEventToChunkStream(this.sideChannel));\n    } catch {\n      return null;\n    }\n  }\n}\n"
  },
  {
    "path": "frontend/src/main.tsx",
    "content": "import { StrictMode } from 'react';\nimport { createRoot } from 'react-dom/client';\nimport { ThemeProvider } from '@mui/material/styles';\nimport CssBaseline from '@mui/material/CssBaseline';\nimport App from './App';\nimport { darkTheme, lightTheme } from './theme';\nimport { useLayoutStore } from './store/layoutStore';\n\nfunction Root() {\n  const themeMode = useLayoutStore((s) => s.themeMode);\n  const theme = themeMode === 'light' ? lightTheme : darkTheme;\n\n  return (\n    <ThemeProvider theme={theme}>\n      <CssBaseline />\n      <App />\n    </ThemeProvider>\n  );\n}\n\ncreateRoot(document.getElementById('root')!).render(\n  <StrictMode>\n    <Root />\n  </StrictMode>\n);\n"
  },
  {
    "path": "frontend/src/store/agentStore.ts",
    "content": "/**\n * Agent store — manages UI state that is NOT handled by the Vercel AI SDK.\n *\n * Message state (messages, streaming, tool calls) is now managed by useChat().\n * This store only handles:\n *  - Connection / processing flags\n *  - Panel state (right panel — single-artifact pattern)\n *  - Plan state\n *  - User info / error banners\n *  - Edited scripts (for hf_jobs code editing)\n *\n * Per-session state:\n *  Each session maintains its own snapshot of processing/activity/panel/plan\n *  state in `sessionStates`. Background sessions keep updating their own\n *  snapshot via `updateSession()`. The active session's snapshot is mirrored\n *  to the flat top-level fields so the UI reads from a single place.\n */\nimport { create } from 'zustand';\nimport type { User } from '@/types/agent';\n\nexport interface PlanItem {\n  id: string;\n  content: string;\n  status: 'pending' | 'in_progress' | 'completed';\n}\n\nexport interface PanelSection {\n  content: string;\n  language: string;\n}\n\nexport interface PanelData {\n  title: string;\n  script?: PanelSection;\n  output?: PanelSection;\n  input?: PanelSection;\n  parameters?: Record<string, unknown>;\n}\n\nexport type PanelView = 'script' | 'output';\n\nexport interface LLMHealthError {\n  error: string;\n  errorType: 'auth' | 'credits' | 'rate_limit' | 'network' | 'unknown';\n  model: string;\n}\n\nexport type ActivityStatus =\n  | { type: 'idle' }\n  | { type: 'thinking' }\n  | { type: 'tool'; toolName: string; description?: string }\n  | { type: 'waiting-approval' }\n  | { type: 'streaming' }\n  | { type: 'cancelled' };\n\nexport interface ResearchAgentStats {\n  toolCount: number;\n  tokenCount: number;\n  startedAt: number | null;\n  finalElapsed: number | null;\n}\n\nexport interface ResearchAgentState {\n  label: string;\n  steps: string[];\n  stats: ResearchAgentStats;\n}\n\n/** State that is tracked per-session (each session has its own copy). */\nexport interface PerSessionState {\n  isProcessing: boolean;\n  activityStatus: ActivityStatus;\n  panelData: PanelData | null;\n  panelView: PanelView;\n  panelEditable: boolean;\n  plan: PlanItem[];\n  /** Per-agent research state, keyed by agent_id. */\n  researchAgents: Record<string, ResearchAgentState>;\n  /** @deprecated kept for backward compat selectors — use researchAgents instead */\n  researchSteps: string[];\n  /** @deprecated kept for backward compat selectors — use researchAgents instead */\n  researchStats: ResearchAgentStats;\n}\n\nconst defaultResearchStats: ResearchAgentStats = { toolCount: 0, tokenCount: 0, startedAt: null, finalElapsed: null };\n\nconst defaultSessionState: PerSessionState = {\n  isProcessing: false,\n  activityStatus: { type: 'idle' },\n  panelData: null,\n  panelView: 'script',\n  panelEditable: false,\n  plan: [],\n  researchAgents: {},\n  researchSteps: [],\n  researchStats: { ...defaultResearchStats },\n};\n\ninterface AgentStore {\n  // ── Per-session state map ───────────────────────────────────────────\n  sessionStates: Record<string, PerSessionState>;\n  activeSessionId: string | null;\n\n  // ── Flat state (mirrors active session — UI reads from here) ────────\n  isProcessing: boolean;\n  isConnected: boolean;\n  activityStatus: ActivityStatus;\n  user: User | null;\n  error: string | null;\n  llmHealthError: LLMHealthError | null;\n  /** Set when a Claude-send hits the daily quota — ChatInput opens the cap dialog in response. */\n  claudeQuotaExhausted: boolean;\n\n  // Right panel (single-artifact pattern)\n  panelData: PanelData | null;\n  panelView: PanelView;\n  panelEditable: boolean;\n\n  // Plan\n  plan: PlanItem[];\n\n  // Edited scripts (tool_call_id -> edited content)\n  editedScripts: Record<string, string>;\n\n  // Job URLs (tool_call_id -> job URL) for HF jobs\n  jobUrls: Record<string, string>;\n\n  // Job statuses (tool_call_id -> job status) for HF jobs\n  jobStatuses: Record<string, string>;\n\n  // Tool error states (tool_call_id -> true if errored) - persisted across renders\n  toolErrors: Record<string, boolean>;\n\n  // Tool rejected states (tool_call_id -> true if rejected by user) - persisted across renders\n  rejectedTools: Record<string, boolean>;\n\n  // ── Per-session actions ─────────────────────────────────────────────\n\n  /** Update a session's state. If it's the active session, also update flat state. */\n  updateSession: (sessionId: string, updates: Partial<PerSessionState>) => void;\n\n  /** Get a session's current state (from map, not flat). */\n  getSessionState: (sessionId: string) => PerSessionState;\n\n  /** Switch the active session — restores its state to flat fields. */\n  switchActiveSession: (sessionId: string) => void;\n\n  /** Remove a session's state from the map. */\n  clearSessionState: (sessionId: string) => void;\n\n  // ── Global actions (not per-session) ────────────────────────────────\n  setProcessing: (isProcessing: boolean) => void;\n  setConnected: (isConnected: boolean) => void;\n  setActivityStatus: (status: ActivityStatus) => void;\n  setUser: (user: User | null) => void;\n  setError: (error: string | null) => void;\n  setLlmHealthError: (error: LLMHealthError | null) => void;\n  setClaudeQuotaExhausted: (exhausted: boolean) => void;\n\n  setPanel: (data: PanelData, view?: PanelView, editable?: boolean) => void;\n  setPanelView: (view: PanelView) => void;\n  setPanelOutput: (output: PanelSection) => void;\n  updatePanelScript: (content: string) => void;\n  lockPanel: () => void;\n  clearPanel: () => void;\n\n  setPlan: (plan: PlanItem[]) => void;\n\n  setEditedScript: (toolCallId: string, content: string) => void;\n  getEditedScript: (toolCallId: string) => string | undefined;\n  clearEditedScripts: () => void;\n\n  setJobUrl: (toolCallId: string, jobUrl: string) => void;\n  getJobUrl: (toolCallId: string) => string | undefined;\n\n  setJobStatus: (toolCallId: string, status: string) => void;\n  getJobStatus: (toolCallId: string) => string | undefined;\n\n  setToolError: (toolCallId: string, hasError: boolean) => void;\n  getToolError: (toolCallId: string) => boolean | undefined;\n\n  setToolRejected: (toolCallId: string, isRejected: boolean) => void;\n  getToolRejected: (toolCallId: string) => boolean | undefined;\n}\n\n/**\n * Helper: patch the active session's snapshot with partial per-session fields.\n * Returns the `sessionStates` slice to spread into a `set()` call, or `{}`\n * if there's no active session snapshot to update.\n */\nfunction syncSnapshot(\n  state: AgentStore,\n  patch: Partial<PerSessionState>,\n): { sessionStates: Record<string, PerSessionState> } | Record<string, never> {\n  const { activeSessionId, sessionStates } = state;\n  if (!activeSessionId || !sessionStates[activeSessionId]) return {};\n  return {\n    sessionStates: {\n      ...sessionStates,\n      [activeSessionId]: { ...sessionStates[activeSessionId], ...patch },\n    },\n  };\n}\n\n// Load persisted tool errors from localStorage\nfunction loadToolErrors(): Record<string, boolean> {\n  try {\n    const stored = localStorage.getItem('hf-agent-tool-errors');\n    return stored ? JSON.parse(stored) : {};\n  } catch {\n    return {};\n  }\n}\n\n// Save tool errors to localStorage\nfunction saveToolErrors(errors: Record<string, boolean>): void {\n  try {\n    localStorage.setItem('hf-agent-tool-errors', JSON.stringify(errors));\n  } catch (e) {\n    console.warn('Failed to persist tool errors:', e);\n  }\n}\n\n// Load persisted rejected tools from localStorage\nfunction loadRejectedTools(): Record<string, boolean> {\n  try {\n    const stored = localStorage.getItem('hf-agent-rejected-tools');\n    return stored ? JSON.parse(stored) : {};\n  } catch {\n    return {};\n  }\n}\n\n// Save rejected tools to localStorage\nfunction saveRejectedTools(rejected: Record<string, boolean>): void {\n  try {\n    localStorage.setItem('hf-agent-rejected-tools', JSON.stringify(rejected));\n  } catch (e) {\n    console.warn('Failed to persist rejected tools:', e);\n  }\n}\n\nexport const useAgentStore = create<AgentStore>()((set, get) => ({\n  sessionStates: {},\n  activeSessionId: null,\n\n  isProcessing: false,\n  isConnected: false,\n  activityStatus: { type: 'idle' },\n  user: null,\n  error: null,\n  llmHealthError: null,\n  claudeQuotaExhausted: false,\n\n  panelData: null,\n  panelView: 'script',\n  panelEditable: false,\n\n  plan: [],\n\n  editedScripts: {},\n  jobUrls: {},\n  jobStatuses: {},\n  toolErrors: loadToolErrors(),\n  rejectedTools: loadRejectedTools(),\n\n  // ── Per-session state management ──────────────────────────────────\n\n  updateSession: (sessionId, updates) => {\n    const state = get();\n    const current = state.sessionStates[sessionId] || { ...defaultSessionState };\n    const updated = { ...current, ...updates };\n\n    // Apply the processing→idle side effect\n    const processingCleared = 'isProcessing' in updates && !updates.isProcessing;\n    if (processingCleared) {\n      if (updated.activityStatus.type !== 'waiting-approval' && updated.activityStatus.type !== 'cancelled') {\n        updated.activityStatus = { type: 'idle' };\n      }\n    }\n\n    const isActive = state.activeSessionId === sessionId;\n\n    // Build flat-state mirror: only the fields explicitly in `updates`\n    // (plus activityStatus when the processing→idle side-effect fires).\n    // This prevents overwriting flat fields changed by global setters\n    // (e.g. setPanelView called from CodePanel) with stale snapshot values.\n    let flatMirror: Record<string, unknown> = {};\n    if (isActive) {\n      for (const key of Object.keys(updates)) {\n        flatMirror[key] = updated[key as keyof PerSessionState];\n      }\n      // Side-effect may have changed activityStatus even if it wasn't in updates\n      if (processingCleared) {\n        flatMirror.activityStatus = updated.activityStatus;\n      }\n    }\n\n    set({\n      sessionStates: { ...state.sessionStates, [sessionId]: updated },\n      ...flatMirror,\n    });\n  },\n\n  getSessionState: (sessionId) => {\n    return get().sessionStates[sessionId] || { ...defaultSessionState };\n  },\n\n  switchActiveSession: (sessionId) => {\n    const state = get();\n\n    // Build a new sessionStates map (never mutate the existing object)\n    const updatedStates = { ...state.sessionStates };\n\n    // Save current active session's flat state back to its snapshot\n    if (state.activeSessionId && state.activeSessionId !== sessionId) {\n      updatedStates[state.activeSessionId] = {\n        isProcessing: state.isProcessing,\n        activityStatus: state.activityStatus,\n        panelData: state.panelData,\n        panelView: state.panelView,\n        panelEditable: state.panelEditable,\n        plan: state.plan,\n        researchAgents: state.sessionStates[state.activeSessionId]?.researchAgents ?? {},\n        researchSteps: state.sessionStates[state.activeSessionId]?.researchSteps ?? [],\n        researchStats: state.sessionStates[state.activeSessionId]?.researchStats ?? { ...defaultResearchStats },\n      };\n    }\n\n    // Restore the new session's state\n    const incoming = updatedStates[sessionId] || { ...defaultSessionState };\n    set({\n      activeSessionId: sessionId,\n      sessionStates: updatedStates,\n      isProcessing: incoming.isProcessing,\n      activityStatus: incoming.activityStatus,\n      panelData: incoming.panelData,\n      panelView: incoming.panelView,\n      panelEditable: incoming.panelEditable,\n      plan: incoming.plan,\n      // Clear transient error on switch\n      error: null,\n    });\n  },\n\n  clearSessionState: (sessionId) => {\n    set((state) => {\n      const { [sessionId]: _, ...rest } = state.sessionStates;\n      return { sessionStates: rest };\n    });\n  },\n\n  // ── Global flags ──────────────────────────────────────────────────\n\n  setProcessing: (isProcessing) => {\n    const current = get().activityStatus;\n    const preserveStatus = current.type === 'waiting-approval' || current.type === 'cancelled';\n    set({ isProcessing, ...(!isProcessing && !preserveStatus ? { activityStatus: { type: 'idle' } } : {}) });\n  },\n  setConnected: (isConnected) => set({ isConnected }),\n  setActivityStatus: (status) => set({ activityStatus: status }),\n  setUser: (user) => set({ user }),\n  setError: (error) => set({ error }),\n  setLlmHealthError: (error) => set({ llmHealthError: error }),\n  setClaudeQuotaExhausted: (exhausted) => set({ claudeQuotaExhausted: exhausted }),\n\n  // ── Panel (single-artifact) ───────────────────────────────────────\n  // Each setter also patches the active session's snapshot so that\n  // getSessionState() stays consistent with flat state.\n\n  setPanel: (data, view, editable) => set((state) => {\n    const patch: Partial<PerSessionState> = {\n      panelData: data,\n      panelView: view ?? (data.script ? 'script' : 'output'),\n      panelEditable: editable ?? false,\n    };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  setPanelView: (view) => set((state) => {\n    const patch: Partial<PerSessionState> = { panelView: view };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  setPanelOutput: (output) => set((state) => {\n    const panelData = state.panelData\n      ? { ...state.panelData, output }\n      : { title: 'Output', output };\n    const patch: Partial<PerSessionState> = { panelData, panelView: 'output' };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  updatePanelScript: (content) => set((state) => {\n    const panelData = state.panelData?.script\n      ? { ...state.panelData, script: { ...state.panelData.script, content } }\n      : state.panelData;\n    if (!panelData) return {};\n    const patch: Partial<PerSessionState> = { panelData };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  lockPanel: () => set((state) => {\n    const patch: Partial<PerSessionState> = { panelEditable: false };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  clearPanel: () => set((state) => {\n    const patch: Partial<PerSessionState> = { panelData: null, panelView: 'script', panelEditable: false };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  // ── Plan ──────────────────────────────────────────────────────────\n\n  setPlan: (plan) => set((state) => {\n    const patch: Partial<PerSessionState> = { plan };\n    return { ...patch, ...syncSnapshot(state, patch) };\n  }),\n\n  // ── Edited scripts ────────────────────────────────────────────────\n\n  setEditedScript: (toolCallId, content) => {\n    set((state) => ({\n      editedScripts: { ...state.editedScripts, [toolCallId]: content },\n    }));\n  },\n\n  getEditedScript: (toolCallId) => get().editedScripts[toolCallId],\n\n  clearEditedScripts: () => set({ editedScripts: {} }),\n\n  // ── Job URLs ────────────────────────────────────────────────────────\n\n  setJobUrl: (toolCallId, jobUrl) => {\n    set((state) => ({\n      jobUrls: { ...state.jobUrls, [toolCallId]: jobUrl },\n    }));\n  },\n\n  getJobUrl: (toolCallId) => get().jobUrls[toolCallId],\n\n  // ── Job Statuses ────────────────────────────────────────────────────\n\n  setJobStatus: (toolCallId, status) => {\n    set((state) => ({\n      jobStatuses: { ...state.jobStatuses, [toolCallId]: status },\n    }));\n  },\n\n  getJobStatus: (toolCallId) => get().jobStatuses[toolCallId],\n\n  // ── Tool Errors ─────────────────────────────────────────────────────\n\n  setToolError: (toolCallId, hasError) => {\n    set((state) => {\n      const updated = { ...state.toolErrors, [toolCallId]: hasError };\n      saveToolErrors(updated);\n      return { toolErrors: updated };\n    });\n  },\n\n  getToolError: (toolCallId) => get().toolErrors[toolCallId],\n\n  // ── Tool Rejections ──────────────────────────────────────────────────\n\n  setToolRejected: (toolCallId, isRejected) => {\n    set((state) => {\n      const updated = { ...state.rejectedTools, [toolCallId]: isRejected };\n      saveRejectedTools(updated);\n      return { rejectedTools: updated };\n    });\n  },\n\n  getToolRejected: (toolCallId) => get().rejectedTools[toolCallId],\n}));\n"
  },
  {
    "path": "frontend/src/store/layoutStore.ts",
    "content": "import { create } from 'zustand';\nimport { persist } from 'zustand/middleware';\n\nexport type ThemeMode = 'dark' | 'light';\n\ninterface LayoutStore {\n  isLeftSidebarOpen: boolean;\n  isRightPanelOpen: boolean;\n  rightPanelWidth: number;\n  themeMode: ThemeMode;\n  setLeftSidebarOpen: (open: boolean) => void;\n  setRightPanelOpen: (open: boolean) => void;\n  setRightPanelWidth: (width: number) => void;\n  toggleLeftSidebar: () => void;\n  toggleRightPanel: () => void;\n  toggleTheme: () => void;\n}\n\nexport const useLayoutStore = create<LayoutStore>()(\n  persist(\n    (set) => ({\n      isLeftSidebarOpen: true,\n      isRightPanelOpen: false,\n      rightPanelWidth: 450,\n      themeMode: 'dark' as ThemeMode,\n      setLeftSidebarOpen: (open) => set({ isLeftSidebarOpen: open }),\n      setRightPanelOpen: (open) => set({ isRightPanelOpen: open }),\n      setRightPanelWidth: (width) => set({ rightPanelWidth: width }),\n      toggleLeftSidebar: () => set((state) => ({ isLeftSidebarOpen: !state.isLeftSidebarOpen })),\n      toggleRightPanel: () => set((state) => ({ isRightPanelOpen: !state.isRightPanelOpen })),\n      toggleTheme: () =>\n        set((state) => ({\n          themeMode: state.themeMode === 'dark' ? 'light' : 'dark',\n        })),\n    }),\n    {\n      name: 'hf-agent-layout',\n      partialize: (state) => ({ themeMode: state.themeMode }),\n    }\n  )\n);\n"
  },
  {
    "path": "frontend/src/store/sessionStore.ts",
    "content": "import { create } from 'zustand';\nimport { persist } from 'zustand/middleware';\nimport type { SessionMeta } from '@/types/agent';\nimport { deleteMessages, moveMessages } from '@/lib/chat-message-store';\nimport { moveBackendMessages, deleteBackendMessages } from '@/lib/backend-message-store';\n\ninterface SessionStore {\n  sessions: SessionMeta[];\n  activeSessionId: string | null;\n\n  // Actions\n  createSession: (id: string) => void;\n  deleteSession: (id: string) => void;\n  switchSession: (id: string) => void;\n  setSessionActive: (id: string, isActive: boolean) => void;\n  updateSessionTitle: (id: string, title: string) => void;\n  setNeedsAttention: (id: string, needs: boolean) => void;\n  /** Mark a session as expired (backend no longer has it). The UI shows a\n   *  recovery banner and disables input. */\n  markExpired: (id: string) => void;\n  /** Clear the expired flag (used after restore-with-summary succeeds). */\n  clearExpired: (id: string) => void;\n  /** Atomically swap a session's id in the list + both localStorage caches.\n   *  Used when we rehydrate an expired session into a freshly-created backend\n   *  session — preserves title, timestamps, and messages. */\n  renameSession: (oldId: string, newId: string) => void;\n}\n\nexport const useSessionStore = create<SessionStore>()(\n  persist(\n    (set, get) => ({\n      sessions: [],\n      activeSessionId: null,\n\n      createSession: (id: string) => {\n        const newSession: SessionMeta = {\n          id,\n          title: `Chat ${get().sessions.length + 1}`,\n          createdAt: new Date().toISOString(),\n          isActive: true,\n          needsAttention: false,\n        };\n        set((state) => ({\n          sessions: [...state.sessions, newSession],\n          activeSessionId: id,\n        }));\n      },\n\n      deleteSession: (id: string) => {\n        deleteMessages(id);\n        deleteBackendMessages(id);\n        set((state) => {\n          const newSessions = state.sessions.filter((s) => s.id !== id);\n          const newActiveId =\n            state.activeSessionId === id\n              ? newSessions[newSessions.length - 1]?.id || null\n              : state.activeSessionId;\n          return {\n            sessions: newSessions,\n            activeSessionId: newActiveId,\n          };\n        });\n      },\n\n      markExpired: (id: string) => {\n        set((state) => ({\n          sessions: state.sessions.map((s) => (s.id === id ? { ...s, expired: true } : s)),\n        }));\n      },\n\n      clearExpired: (id: string) => {\n        set((state) => ({\n          sessions: state.sessions.map((s) =>\n            s.id === id ? { ...s, expired: false } : s,\n          ),\n        }));\n      },\n\n      renameSession: (oldId: string, newId: string) => {\n        if (oldId === newId) return;\n        moveMessages(oldId, newId);\n        moveBackendMessages(oldId, newId);\n        set((state) => ({\n          sessions: state.sessions.map((s) =>\n            s.id === oldId ? { ...s, id: newId, expired: false } : s,\n          ),\n          activeSessionId: state.activeSessionId === oldId ? newId : state.activeSessionId,\n        }));\n      },\n\n      switchSession: (id: string) => {\n        set((state) => ({\n          activeSessionId: id,\n          sessions: state.sessions.map((s) =>\n            s.id === id ? { ...s, needsAttention: false } : s\n          ),\n        }));\n      },\n\n      setSessionActive: (id: string, isActive: boolean) => {\n        set((state) => ({\n          sessions: state.sessions.map((s) =>\n            s.id === id ? { ...s, isActive } : s\n          ),\n        }));\n      },\n\n      updateSessionTitle: (id: string, title: string) => {\n        set((state) => ({\n          sessions: state.sessions.map((s) =>\n            s.id === id ? { ...s, title } : s\n          ),\n        }));\n      },\n\n      setNeedsAttention: (id: string, needs: boolean) => {\n        set((state) => ({\n          sessions: state.sessions.map((s) =>\n            s.id === id ? { ...s, needsAttention: needs } : s\n          ),\n        }));\n      },\n    }),\n    {\n      name: 'hf-agent-sessions',\n      partialize: (state) => ({\n        sessions: state.sessions,\n        activeSessionId: state.activeSessionId,\n      }),\n    }\n  )\n);\n"
  },
  {
    "path": "frontend/src/theme.ts",
    "content": "import { createTheme, type ThemeOptions } from '@mui/material/styles';\n\n// ── Shared tokens ────────────────────────────────────────────────\nconst sharedTypography: ThemeOptions['typography'] = {\n  fontFamily: 'Inter, system-ui, -apple-system, \"Segoe UI\", Roboto, Arial, sans-serif',\n  fontSize: 15,\n  button: {\n    fontFamily: 'Inter, system-ui, -apple-system, \"Segoe UI\", Roboto, Arial, sans-serif',\n    textTransform: 'none' as const,\n    fontWeight: 600,\n  },\n};\n\nconst sharedComponents: ThemeOptions['components'] = {\n  MuiButton: {\n    styleOverrides: {\n      root: {\n        borderRadius: '10px',\n        fontWeight: 600,\n        transition: 'transform 0.06s ease, background 0.12s ease, box-shadow 0.12s ease',\n        '&:hover': { transform: 'translateY(-1px)' },\n      },\n    },\n  },\n  MuiPaper: {\n    styleOverrides: {\n      root: { backgroundImage: 'none' },\n    },\n  },\n};\n\nconst sharedShape: ThemeOptions['shape'] = { borderRadius: 12 };\n\n// ── Dark palette ─────────────────────────────────────────────────\nconst darkVars = {\n  '--bg': '#0B0D10',\n  '--panel': '#0F1316',\n  '--surface': '#121416',\n  '--text': '#E6EEF8',\n  '--muted-text': '#98A0AA',\n  '--accent-yellow': '#FF9D00',\n  '--accent-yellow-weak': 'rgba(255,157,0,0.08)',\n  '--accent-green': '#2FCC71',\n  '--accent-red': '#E05A4F',\n  '--shadow-1': '0 6px 18px rgba(2,6,12,0.55)',\n  '--radius-lg': '20px',\n  '--radius-md': '12px',\n  '--focus': '0 0 0 3px rgba(255,157,0,0.12)',\n  '--border': 'rgba(255,255,255,0.03)',\n  '--border-hover': 'rgba(255,255,255,0.1)',\n  '--code-bg': 'rgba(0,0,0,0.5)',\n  '--tool-bg': 'rgba(0,0,0,0.3)',\n  '--tool-border': 'rgba(255,255,255,0.05)',\n  '--hover-bg': 'rgba(255,255,255,0.05)',\n  '--composer-bg': 'rgba(255,255,255,0.01)',\n  '--msg-gradient': 'linear-gradient(180deg, rgba(255,255,255,0.015), transparent)',\n  '--body-gradient': 'linear-gradient(180deg, #0B0D10, #090B0D)',\n  '--scrollbar-thumb': '#30363D',\n  '--success-icon': '#FDB022',\n  '--error-icon': '#F87171',\n  '--clickable-text': 'rgba(255, 255, 255, 0.9)',\n  '--clickable-underline': 'rgba(255,255,255,0.3)',\n  '--code-panel-bg': '#0A0B0C',\n  '--tab-active-bg': 'rgba(255,255,255,0.08)',\n  '--tab-active-border': 'rgba(255,255,255,0.1)',\n  '--tab-hover-bg': 'rgba(255,255,255,0.05)',\n  '--tab-close-hover': 'rgba(255,255,255,0.1)',\n  '--plan-bg': 'rgba(0,0,0,0.2)',\n} as const;\n\n// ── Light palette ────────────────────────────────────────────────\nconst lightVars = {\n  '--bg': '#FFFFFF',\n  '--panel': '#F7F8FA',\n  '--surface': '#F0F1F3',\n  '--text': '#1A1A2E',\n  '--muted-text': '#6B7280',\n  '--accent-yellow': '#FF9D00',\n  '--accent-yellow-weak': 'rgba(255,157,0,0.08)',\n  '--accent-green': '#16A34A',\n  '--accent-red': '#DC2626',\n  '--shadow-1': '0 4px 12px rgba(0,0,0,0.08)',\n  '--radius-lg': '20px',\n  '--radius-md': '12px',\n  '--focus': '0 0 0 3px rgba(255,157,0,0.15)',\n  '--border': 'rgba(0,0,0,0.08)',\n  '--border-hover': 'rgba(0,0,0,0.15)',\n  '--code-bg': 'rgba(0,0,0,0.04)',\n  '--tool-bg': 'rgba(0,0,0,0.03)',\n  '--tool-border': 'rgba(0,0,0,0.08)',\n  '--hover-bg': 'rgba(0,0,0,0.04)',\n  '--composer-bg': 'rgba(0,0,0,0.02)',\n  '--msg-gradient': 'linear-gradient(180deg, rgba(0,0,0,0.01), transparent)',\n  '--body-gradient': 'linear-gradient(180deg, #FFFFFF, #F7F8FA)',\n  '--scrollbar-thumb': '#C4C8CC',\n  '--success-icon': '#FF9D00',\n  '--error-icon': '#DC2626',\n  '--clickable-text': 'rgba(0, 0, 0, 0.85)',\n  '--clickable-underline': 'rgba(0,0,0,0.25)',\n  '--code-panel-bg': '#F5F6F8',\n  '--tab-active-bg': 'rgba(0,0,0,0.06)',\n  '--tab-active-border': 'rgba(0,0,0,0.1)',\n  '--tab-hover-bg': 'rgba(0,0,0,0.04)',\n  '--tab-close-hover': 'rgba(0,0,0,0.08)',\n  '--plan-bg': 'rgba(0,0,0,0.03)',\n} as const;\n\n// ── Shared CSS baseline (scrollbar, code, brand-logo) ────────────\nfunction makeCssBaseline(vars: Record<string, string>) {\n  return {\n    styleOverrides: {\n      ':root': vars,\n      body: {\n        background: 'var(--body-gradient)',\n        color: 'var(--text)',\n        scrollbarWidth: 'thin' as const,\n        '&::-webkit-scrollbar': { width: '8px', height: '8px' },\n        '&::-webkit-scrollbar-thumb': {\n          backgroundColor: 'var(--scrollbar-thumb)',\n          borderRadius: '2px',\n        },\n        '&::-webkit-scrollbar-track': { backgroundColor: 'transparent' },\n      },\n      'code, pre': {\n        fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, \"Roboto Mono\", monospace',\n      },\n      '.brand-logo': {\n        position: 'relative' as const,\n        padding: '6px',\n        borderRadius: '8px',\n        '&::after': {\n          content: '\"\"',\n          position: 'absolute' as const,\n          inset: '-6px',\n          borderRadius: '10px',\n          background: 'var(--accent-yellow-weak)',\n          zIndex: -1,\n          pointerEvents: 'none' as const,\n        },\n      },\n    },\n  };\n}\n\nfunction makeDrawer() {\n  return {\n    styleOverrides: {\n      paper: {\n        backgroundColor: 'var(--panel)',\n        borderRight: '1px solid var(--border)',\n      },\n    },\n  };\n}\n\nfunction makeTextField() {\n  return {\n    styleOverrides: {\n      root: {\n        '& .MuiOutlinedInput-root': {\n          borderRadius: 'var(--radius-md)',\n          '& fieldset': { borderColor: 'var(--border)' },\n          '&:hover fieldset': { borderColor: 'var(--border-hover)' },\n          '&.Mui-focused fieldset': {\n            borderColor: 'var(--accent-yellow)',\n            borderWidth: '1px',\n            boxShadow: 'var(--focus)',\n          },\n        },\n      },\n    },\n  };\n}\n\n// ── Theme builders ───────────────────────────────────────────────\nexport const darkTheme = createTheme({\n  palette: {\n    mode: 'dark',\n    primary: { main: '#FF9D00', light: '#FFB740', dark: '#E08C00', contrastText: '#fff' },\n    secondary: { main: '#FF9D00' },\n    background: { default: '#0B0D10', paper: '#0F1316' },\n    text: { primary: '#E6EEF8', secondary: '#98A0AA' },\n    divider: 'rgba(255,255,255,0.03)',\n    success: { main: '#2FCC71' },\n    error: { main: '#E05A4F' },\n    warning: { main: '#FF9D00' },\n    info: { main: '#58A6FF' },\n  },\n  typography: sharedTypography,\n  components: {\n    ...sharedComponents,\n    MuiCssBaseline: makeCssBaseline(darkVars),\n    MuiDrawer: makeDrawer(),\n    MuiTextField: makeTextField(),\n  },\n  shape: sharedShape,\n});\n\nexport const lightTheme = createTheme({\n  palette: {\n    mode: 'light',\n    primary: { main: '#FF9D00', light: '#FFB740', dark: '#E08C00', contrastText: '#fff' },\n    secondary: { main: '#E08C00' },\n    background: { default: '#FFFFFF', paper: '#F7F8FA' },\n    text: { primary: '#1A1A2E', secondary: '#6B7280' },\n    divider: 'rgba(0,0,0,0.08)',\n    success: { main: '#16A34A' },\n    error: { main: '#DC2626' },\n    warning: { main: '#FF9D00' },\n    info: { main: '#2563EB' },\n  },\n  typography: sharedTypography,\n  components: {\n    ...sharedComponents,\n    MuiCssBaseline: makeCssBaseline(lightVars),\n    MuiDrawer: makeDrawer(),\n    MuiTextField: makeTextField(),\n  },\n  shape: sharedShape,\n});\n\n// Keep default export for backwards compat\nexport default darkTheme;\n"
  },
  {
    "path": "frontend/src/types/agent.ts",
    "content": "/**\n * Agent-related types.\n *\n * Message and tool-call types are now provided by the Vercel AI SDK\n * (UIMessage, UIMessagePart, etc.). Only non-SDK types remain here.\n */\n\n/** Custom metadata attached to every UIMessage via the `metadata` field. */\nexport interface MessageMeta {\n  createdAt?: string;\n}\n\nexport interface SessionMeta {\n  id: string;\n  title: string;\n  createdAt: string;\n  isActive: boolean;\n  needsAttention: boolean;\n  /** True when the backend no longer recognizes this session id (e.g.\n   *  after a backend restart). The UI shows a recovery banner and\n   *  disables input until the user chooses to restore-with-summary or\n   *  start fresh. */\n  expired?: boolean;\n}\n\nexport interface ToolApproval {\n  tool_call_id: string;\n  approved: boolean;\n  feedback?: string | null;\n}\n\nexport interface User {\n  authenticated: boolean;\n  username?: string;\n  name?: string;\n  picture?: string;\n  orgMember?: boolean;\n}\n"
  },
  {
    "path": "frontend/src/types/events.ts",
    "content": "/**\n * Event types from the agent backend\n */\n\nexport type EventType =\n  | 'ready'\n  | 'processing'\n  | 'assistant_message'\n  | 'assistant_chunk'\n  | 'assistant_stream_end'\n  | 'tool_call'\n  | 'tool_output'\n  | 'tool_log'\n  | 'approval_required'\n  | 'tool_state_change'\n  | 'turn_complete'\n  | 'compacted'\n  | 'error'\n  | 'shutdown'\n  | 'interrupted'\n  | 'undo_complete'\n  | 'plan_update';\n\nexport interface AgentEvent {\n  event_type: EventType;\n  data?: Record<string, unknown>;\n}\n\nexport interface ReadyEventData {\n  message: string;\n}\n\nexport interface ProcessingEventData {\n  message: string;\n}\n\nexport interface AssistantMessageEventData {\n  content: string;\n}\n\nexport interface ToolCallEventData {\n  tool: string;\n  arguments: Record<string, unknown>;\n}\n\nexport interface ToolOutputEventData {\n  tool: string;\n  output: string;\n  success: boolean;\n}\n\nexport interface ToolLogEventData {\n  tool: string;\n  log: string;\n}\n\nexport interface PlanUpdateEventData {\n  plan: Array<{ id: string; content: string; status: 'pending' | 'in_progress' | 'completed' }>;\n}\n\nexport interface ApprovalRequiredEventData {\n  tools: ApprovalToolItem[];\n  count: number;\n}\n\nexport interface ApprovalToolItem {\n  tool: string;\n  arguments: Record<string, unknown>;\n  tool_call_id: string;\n}\n\nexport interface TurnCompleteEventData {\n  history_size: number;\n}\n\nexport interface CompactedEventData {\n  old_tokens: number;\n  new_tokens: number;\n}\n\nexport interface ErrorEventData {\n  error: string;\n}\n"
  },
  {
    "path": "frontend/src/utils/api.ts",
    "content": "/**\n * Centralized API utilities.\n *\n * In production: HttpOnly cookie (hf_access_token) is sent automatically.\n * In development: auth is bypassed on the backend.\n */\n\nimport { triggerLogin } from '@/hooks/useAuth';\n\n/** Wrapper around fetch with credentials and common headers. */\nexport async function apiFetch(\n  path: string,\n  options: RequestInit = {}\n): Promise<Response> {\n  const headers: Record<string, string> = {\n    'Content-Type': 'application/json',\n    ...(options.headers as Record<string, string>),\n  };\n\n  const response = await fetch(path, {\n    ...options,\n    headers,\n    credentials: 'include', // Send cookies with every request\n  });\n\n  // Handle 401 — redirect to login\n  if (response.status === 401) {\n    try {\n      const authStatus = await fetch('/auth/status', { credentials: 'include' });\n      const data = await authStatus.json();\n      if (data.auth_enabled) {\n        triggerLogin();\n        throw new Error('Authentication required — redirecting to login.');\n      }\n    } catch (e) {\n      if (e instanceof Error && e.message.includes('redirecting')) throw e;\n    }\n  }\n\n  return response;\n}"
  },
  {
    "path": "frontend/src/utils/logProcessor.ts",
    "content": "export function processLogs(logs: string): string {\n  if (!logs) return '';\n\n  // 1. Handle \\r (Carriage Return) for progress bars\n  const rawLines = logs.split('\\n');\n  const processedLines: string[] = [];\n  \n  for (const rawLine of rawLines) {\n    // Remove potential trailing \\r from \\r\\n split\n    let line = rawLine;\n    if (line.endsWith('\\r')) {\n        line = line.slice(0, -1);\n    }\n\n    if (line.includes('\\r')) {\n      const segments = line.split('\\r');\n      // Find the last non-empty segment\n      // Iterate backwards\n      let found = false;\n      for (let i = segments.length - 1; i >= 0; i--) {\n        if (segments[i].length > 0) {\n            processedLines.push(segments[i]);\n            found = true;\n            break;\n        }\n      }\n      if (!found) {\n        // If all segments were empty, push empty string (or skip?)\n        processedLines.push(\"\");\n      }\n    } else {\n      processedLines.push(line);\n    }\n  }\n\n  // 2. Compaction (Downloading & TQDM)\n  const finalLines: string[] = [];\n  \n  // Regex for \"Downloading <package>\" or \"Downloaded <package>\"\n  const downloadPattern = /^(Downloading|Downloaded)\\s+/;\n  \n  // Regex for TQDM-like progress bars\n  // Examples:\n  // \"100%|██████████| 10/10 [00:01<00:00,  8.00it/s]\"\n  // \" 20%|##        | ...\"\n  // \"Downloading:  10%\"\n  const tqdmPattern = /^\\s*\\d+%\\|.*\\||^\\s*\\d+%\\s+/;\n\n  for (let i = 0; i < processedLines.length; i++) {\n    const line = processedLines[i];\n    \n    // Check for Download pattern\n    if (downloadPattern.test(line)) {\n      // Look ahead for consecutive download lines\n      let nextIsDownload = false;\n      if (i + 1 < processedLines.length) {\n        nextIsDownload = downloadPattern.test(processedLines[i + 1]);\n      }\n      \n      if (nextIsDownload) {\n        continue; // Skip this line\n      }\n    } \n    // Check for TQDM pattern\n    else if (tqdmPattern.test(line)) {\n        // Look ahead for consecutive TQDM lines\n        let nextIsTqdm = false;\n        if (i + 1 < processedLines.length) {\n            nextIsTqdm = tqdmPattern.test(processedLines[i + 1]);\n        }\n        \n        if (nextIsTqdm) {\n            continue; // Skip this line\n        }\n    }\n    \n    finalLines.push(line);\n  }\n\n  return finalLines.join('\\n');\n}"
  },
  {
    "path": "frontend/src/utils/logger.ts",
    "content": "/**\n * Lightweight logger that silences verbose output in production.\n *\n * - `log` / `debug` are only emitted when `import.meta.env.DEV` is true.\n * - `warn` and `error` always go through so real issues surface in prod.\n */\n\nconst isDev = import.meta.env.DEV;\n\n/* eslint-disable no-console */\nexport const logger = {\n  /** Debug-level log — DEV only. */\n  log: (...args: unknown[]) => {\n    if (isDev) console.log(...args);\n  },\n  /** Debug-level log — DEV only. */\n  debug: (...args: unknown[]) => {\n    if (isDev) console.debug(...args);\n  },\n  /** Warning — always emitted. */\n  warn: console.warn.bind(console),\n  /** Error — always emitted. */\n  error: console.error.bind(console),\n};\n"
  },
  {
    "path": "frontend/src/utils/model.ts",
    "content": "/**\n * Shared model-id constants used by session-create call sites and the\n * ClaudeCapDialog \"Use a free model\" escape hatch.\n *\n * Keep in sync with MODEL_OPTIONS in components/Chat/ChatInput.tsx and\n * AVAILABLE_MODELS in backend/routes/agent.py. Bare HF ids (no\n * `huggingface/` prefix) — matches upstream's auto-router.\n */\n\nexport const CLAUDE_MODEL_PATH = 'anthropic/claude-opus-4-6';\nexport const FIRST_FREE_MODEL_PATH = 'moonshotai/Kimi-K2.6';\n\nexport function isClaudePath(modelPath: string | undefined): boolean {\n  return !!modelPath && modelPath.startsWith('anthropic/');\n}\n"
  },
  {
    "path": "frontend/src/vite-env.d.ts",
    "content": "/// <reference types=\"vite/client\" />\n"
  },
  {
    "path": "frontend/tsconfig.json",
    "content": "{\n  \"compilerOptions\": {\n    \"target\": \"ES2020\",\n    \"useDefineForClassFields\": true,\n    \"lib\": [\"ES2020\", \"DOM\", \"DOM.Iterable\"],\n    \"module\": \"ESNext\",\n    \"skipLibCheck\": true,\n    \"moduleResolution\": \"bundler\",\n    \"allowImportingTsExtensions\": true,\n    \"isolatedModules\": true,\n    \"moduleDetection\": \"force\",\n    \"noEmit\": true,\n    \"jsx\": \"react-jsx\",\n    \"strict\": true,\n    \"noUnusedLocals\": true,\n    \"noUnusedParameters\": true,\n    \"noFallthroughCasesInSwitch\": true,\n    \"noUncheckedSideEffectImports\": true,\n    \"baseUrl\": \".\",\n    \"paths\": {\n      \"@/*\": [\"src/*\"]\n    }\n  },\n  \"include\": [\"src\"]\n}\n"
  },
  {
    "path": "frontend/vite.config.ts",
    "content": "import { defineConfig } from 'vite'\nimport react from '@vitejs/plugin-react'\nimport path from 'path'\n\nexport default defineConfig({\n  plugins: [react()],\n  resolve: {\n    alias: {\n      '@': path.resolve(__dirname, './src'),\n    },\n  },\n  server: {\n    port: 5173,\n    proxy: {\n      '/api': {\n        target: 'http://localhost:7860',\n        changeOrigin: true,\n        ws: true, // Proxy WebSocket connections (/api/ws/...)\n      },\n      '/auth': {\n        target: 'http://localhost:7860',\n        changeOrigin: true,\n      },\n    },\n  },\n  build: {\n    outDir: 'dist',\n    sourcemap: false,\n  },\n})\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"hf-agent\"\nversion = \"0.1.0\"\ndescription = \"Add your description here\"\nreadme = \"README.md\"\nrequires-python = \">=3.11\"\ndependencies = [\n    # Core dependencies\n    \"datasets>=4.4.1\",\n    \"pydantic>=2.12.3\",\n    \"python-dotenv>=1.2.1\",\n    # Agent runtime dependencies\n    \"requests>=2.33.0\",\n    \"litellm>=1.83.0\",\n    \"boto3>=1.35.0\",\n    \"huggingface-hub>=1.0.1\",\n    \"fastmcp>=3.2.0\",\n    \"prompt-toolkit>=3.0.0\",\n    \"thefuzz>=0.22.1\",\n    \"rich>=13.0.0\",\n    \"nbconvert>=7.16.6\",\n    \"nbformat>=5.10.4\",\n    \"whoosh>=2.7.4\",\n    # Web backend dependencies\n    \"fastapi>=0.115.0\",\n    \"uvicorn[standard]>=0.32.0\",\n    \"httpx>=0.27.0\",\n    \"websockets>=13.0\",\n]\n\n[project.optional-dependencies]\n\n# Evaluation/benchmarking dependencies\neval = [\n    \"inspect-ai>=0.3.149\",\n    \"pandas>=2.3.3\",\n    \"datasets>=4.3.0\",\n    \"tenacity>=8.0.0\",\n]\n\n# Development and testing dependencies\ndev = [\n    \"pytest>=9.0.2\",\n]\n\n# All dependencies (eval + dev)\nall = [\n    \"hf-agent[eval,dev]\",\n]\n\n[project.scripts]\nml-intern = \"agent.main:cli\"\n\n[build-system]\nrequires = [\"setuptools>=64\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools.packages.find]\ninclude = [\"agent*\"]\n\n[tool.uv]\npackage = true\n"
  },
  {
    "path": "tests/unit/test_user_quotas.py",
    "content": "\"\"\"Tests for backend/user_quotas.py — the in-memory Claude daily-quota store.\"\"\"\n\nimport asyncio\nimport os\nimport sys\nfrom pathlib import Path\nfrom unittest.mock import patch\n\nimport pytest\n\n# The backend package isn't on sys.path by default; add it so we can import\n# the module under test without pulling in the whole FastAPI app.\n_BACKEND_DIR = Path(__file__).resolve().parent.parent.parent / \"backend\"\nif str(_BACKEND_DIR) not in sys.path:\n    sys.path.insert(0, str(_BACKEND_DIR))\n\nimport user_quotas  # noqa: E402\n\n\n@pytest.fixture(autouse=True)\ndef _reset_store():\n    \"\"\"Fresh in-memory store per test.\"\"\"\n    user_quotas._reset_for_tests()\n    yield\n    user_quotas._reset_for_tests()\n\n\ndef test_daily_cap_for_known_plans():\n    assert user_quotas.daily_cap_for(\"free\") == user_quotas.CLAUDE_FREE_DAILY\n    assert user_quotas.daily_cap_for(\"pro\") == user_quotas.CLAUDE_PRO_DAILY\n    assert user_quotas.daily_cap_for(\"org\") == user_quotas.CLAUDE_PRO_DAILY\n\n\ndef test_daily_cap_for_unknown_or_missing_defaults_to_free():\n    assert user_quotas.daily_cap_for(None) == user_quotas.CLAUDE_FREE_DAILY\n    assert user_quotas.daily_cap_for(\"\") == user_quotas.CLAUDE_FREE_DAILY\n    # Anything we don't recognize as the Pro/Org tier gets the Pro cap because\n    # the function's contract is \"free\" is the only downgraded tier. If that\n    # ever flips, this test will flip too — adjust consciously.\n    assert user_quotas.daily_cap_for(\"mystery\") == user_quotas.CLAUDE_PRO_DAILY\n\n\n@pytest.mark.asyncio\nasync def test_increment_and_read_back_same_day():\n    assert await user_quotas.get_claude_used_today(\"u1\") == 0\n    assert await user_quotas.increment_claude(\"u1\") == 1\n    assert await user_quotas.increment_claude(\"u1\") == 2\n    assert await user_quotas.get_claude_used_today(\"u1\") == 2\n\n\n@pytest.mark.asyncio\nasync def test_independent_users_do_not_share_counts():\n    await user_quotas.increment_claude(\"alice\")\n    await user_quotas.increment_claude(\"alice\")\n    await user_quotas.increment_claude(\"bob\")\n    assert await user_quotas.get_claude_used_today(\"alice\") == 2\n    assert await user_quotas.get_claude_used_today(\"bob\") == 1\n\n\n@pytest.mark.asyncio\nasync def test_stale_day_resets_before_next_read():\n    await user_quotas.increment_claude(\"u1\")\n    # Simulate yesterday's entry still in the store.\n    user_quotas._claude_counts[\"u1\"] = (\"2000-01-01\", 99)\n    assert await user_quotas.get_claude_used_today(\"u1\") == 0\n    # And a fresh increment starts from 0.\n    assert await user_quotas.increment_claude(\"u1\") == 1\n\n\n@pytest.mark.asyncio\nasync def test_concurrent_increments_under_lock_do_not_lose_writes():\n    \"\"\"50 coroutines bumping the same user must land at exactly 50.\"\"\"\n    await asyncio.gather(*[user_quotas.increment_claude(\"race\") for _ in range(50)])\n    assert await user_quotas.get_claude_used_today(\"race\") == 50\n\n\n@pytest.mark.asyncio\nasync def test_refund_decrements_and_drops_entry_at_zero():\n    await user_quotas.increment_claude(\"u1\")\n    assert await user_quotas.get_claude_used_today(\"u1\") == 1\n    await user_quotas.refund_claude(\"u1\")\n    assert await user_quotas.get_claude_used_today(\"u1\") == 0\n    assert \"u1\" not in user_quotas._claude_counts\n\n\n@pytest.mark.asyncio\nasync def test_refund_on_nonexistent_user_is_noop():\n    await user_quotas.refund_claude(\"ghost\")  # should not raise\n    assert await user_quotas.get_claude_used_today(\"ghost\") == 0\n\n\n@pytest.mark.asyncio\nasync def test_refund_on_stale_day_resets_rather_than_underflow():\n    user_quotas._claude_counts[\"u1\"] = (\"2000-01-01\", 5)\n    await user_quotas.refund_claude(\"u1\")\n    # Stale entry dropped; today's count stays 0.\n    assert await user_quotas.get_claude_used_today(\"u1\") == 0\n\n\n@pytest.mark.asyncio\nasync def test_free_user_cap_reached_at_one():\n    cap = user_quotas.daily_cap_for(\"free\")\n    used = await user_quotas.increment_claude(\"freebie\")\n    assert used == 1\n    assert used >= cap  # first bump exhausts the free tier (cap=1)\n\n\n@pytest.mark.asyncio\nasync def test_pro_user_cap_reached_at_twenty():\n    cap = user_quotas.daily_cap_for(\"pro\")\n    assert cap == 20\n    for i in range(1, 21):\n        assert await user_quotas.increment_claude(\"pro_user\") == i\n    # 21st would exceed — the gate in routes/agent.py enforces this; here\n    # we just confirm the counter tracks past the cap so that check works.\n    assert await user_quotas.increment_claude(\"pro_user\") == 21\n"
  }
]